From 4b60f6d4f1254a7c18d5f30e88a8fd629ba4e633 Mon Sep 17 00:00:00 2001 From: binqi zhang Date: Thu, 12 Aug 2021 17:02:38 +0800 Subject: [PATCH] gpu: update r32p1 gpu driver [2/14] PD#SWPL-56978 Problem: mali driver is not official for AndroidS Solution: update to r32p1 beta Verify: full verification tests on ohm Change-Id: Id6cd1282e9f8f0743602224c763808700e56e9f9 Signed-off-by: binqi zhang --- 0001-update-r32p1-gpu-driver.patch | 94611 ++++++++++++++++ .../devicetree/bindings/arm/mali-midgard.txt | 226 + .../bindings/arm/memory_group_manager.txt | 49 + .../arm/protected_memory_allocator.txt | 69 + .../devicetree/bindings/power/mali-opp.txt | 202 + .../Documentation/dma-buf-test-exporter.txt | 46 + bifrost/r25p0/kernel/Mconfig | 27 + bifrost/r25p0/kernel/build.bp | 102 + .../drivers/base/dma_buf_lock/src/Kbuild | 0 .../drivers/base/dma_buf_lock/src/Makefile | 0 .../base/dma_buf_lock/src/dma_buf_lock.c | 0 .../base/dma_buf_lock/src/dma_buf_lock.h | 0 .../drivers/base/dma_buf_test_exporter/Kbuild | 0 .../base/dma_buf_test_exporter/Kconfig | 0 .../base/dma_buf_test_exporter/Makefile | 0 .../base/dma_buf_test_exporter/build.bp | 0 .../dma-buf-test-exporter.c | 0 .../drivers/base/memory_group_manager/Kbuild | 0 .../drivers/base/memory_group_manager/Kconfig | 0 .../base/memory_group_manager/Makefile | 0 .../base/memory_group_manager/build.bp | 0 .../memory_group_manager.c | 0 .../base/protected_memory_allocator/Kbuild | 0 .../base/protected_memory_allocator/Kconfig | 0 .../base/protected_memory_allocator/Makefile | 0 .../base/protected_memory_allocator/build.bp | 0 .../protected_memory_allocator.c | 0 bifrost/r25p0/kernel/drivers/gpu/arm/Kbuild | 23 + bifrost/r25p0/kernel/drivers/gpu/arm/Kconfig | 25 + .../kernel/drivers/gpu/arm/midgard/Kbuild | 216 + .../kernel/drivers/gpu/arm/midgard/Kconfig | 309 + .../kernel/drivers/gpu/arm/midgard/Makefile | 38 + .../drivers/gpu/arm/midgard/Makefile.kbase | 0 .../kernel/drivers/gpu/arm/midgard/Mconfig | 278 + .../drivers/gpu/arm/midgard/arbiter/Kbuild | 24 + .../arm/midgard/arbiter/mali_kbase_arbif.c | 175 + .../arm/midgard/arbiter/mali_kbase_arbif.h | 133 + .../midgard/arbiter/mali_kbase_arbiter_defs.h | 95 + .../arbiter/mali_kbase_arbiter_interface.h | 181 + .../midgard/arbiter/mali_kbase_arbiter_pm.c | 645 + .../midgard/arbiter/mali_kbase_arbiter_pm.h | 159 + .../gpu/arm/midgard/backend/gpu/Kbuild | 65 + .../backend/gpu/mali_kbase_backend_config.h | 31 + .../gpu/mali_kbase_cache_policy_backend.c | 34 + .../gpu/mali_kbase_cache_policy_backend.h | 39 + .../gpu/mali_kbase_debug_job_fault_backend.c | 164 + .../midgard/backend/gpu/mali_kbase_devfreq.c | 731 + .../midgard/backend/gpu/mali_kbase_devfreq.h | 46 + .../backend/gpu/mali_kbase_device_hw.c | 0 .../backend/gpu/mali_kbase_device_internal.h | 127 + .../backend/gpu/mali_kbase_gpuprops_backend.c | 146 + .../backend/gpu/mali_kbase_instr_backend.c | 411 + .../backend/gpu/mali_kbase_instr_defs.h | 60 + .../backend/gpu/mali_kbase_instr_internal.h | 44 + .../backend/gpu/mali_kbase_irq_internal.h | 44 + .../backend/gpu/mali_kbase_irq_linux.c | 499 + .../midgard/backend/gpu/mali_kbase_jm_as.c | 243 + .../midgard/backend/gpu/mali_kbase_jm_defs.h | 111 + .../midgard/backend/gpu/mali_kbase_jm_hw.c | 1421 + .../backend/gpu/mali_kbase_jm_internal.h | 177 + .../midgard/backend/gpu/mali_kbase_jm_rb.c | 1656 + .../midgard/backend/gpu/mali_kbase_jm_rb.h | 83 + .../backend/gpu/mali_kbase_js_backend.c | 351 + .../backend/gpu/mali_kbase_js_internal.h | 74 + .../backend/gpu/mali_kbase_l2_mmu_config.c | 122 + .../backend/gpu/mali_kbase_l2_mmu_config.h | 55 + .../backend/gpu/mali_kbase_pm_always_on.c | 67 + .../backend/gpu/mali_kbase_pm_always_on.h | 81 + .../backend/gpu/mali_kbase_pm_backend.c | 747 + .../midgard/backend/gpu/mali_kbase_pm_ca.c | 112 + .../midgard/backend/gpu/mali_kbase_pm_ca.h | 89 + .../backend/gpu/mali_kbase_pm_ca_devfreq.h | 60 + .../backend/gpu/mali_kbase_pm_coarse_demand.c | 66 + .../backend/gpu/mali_kbase_pm_coarse_demand.h | 69 + .../midgard/backend/gpu/mali_kbase_pm_defs.h | 517 + .../backend/gpu/mali_kbase_pm_driver.c | 2226 + .../backend/gpu/mali_kbase_pm_internal.h | 710 + .../backend/gpu/mali_kbase_pm_l2_states.h | 38 + .../backend/gpu/mali_kbase_pm_metrics.c | 318 + .../backend/gpu/mali_kbase_pm_policy.c | 249 + .../backend/gpu/mali_kbase_pm_policy.h | 106 + .../backend/gpu/mali_kbase_pm_shader_states.h | 43 + .../arm/midgard/backend/gpu/mali_kbase_time.c | 70 + .../kernel/drivers/gpu/arm/midgard/build.bp | 183 + .../context/backend/mali_kbase_context_jm.c | 213 + .../arm/midgard/context/mali_kbase_context.c | 210 + .../arm/midgard/context/mali_kbase_context.h | 157 + .../context/mali_kbase_context_internal.h | 60 + .../mali_kbase_debug_ktrace_codes_jm.h | 170 + .../backend/mali_kbase_debug_ktrace_defs_jm.h | 75 + .../backend/mali_kbase_debug_ktrace_jm.c | 113 + .../backend/mali_kbase_debug_ktrace_jm.h | 362 + .../mali_kbase_debug_linux_ktrace_jm.h | 150 + .../midgard/debug/mali_kbase_debug_ktrace.c | 342 + .../midgard/debug/mali_kbase_debug_ktrace.h | 219 + .../debug/mali_kbase_debug_ktrace_codes.h | 158 + .../debug/mali_kbase_debug_ktrace_defs.h | 152 + .../debug/mali_kbase_debug_ktrace_internal.h | 89 + .../debug/mali_kbase_debug_linux_ktrace.h | 99 + .../device/backend/mali_kbase_device_jm.c | 260 + .../arm/midgard/device/mali_kbase_device.c | 429 + .../arm/midgard/device/mali_kbase_device.h | 71 + .../device/mali_kbase_device_internal.h | 78 + .../gpu/backend/mali_kbase_gpu_fault_jm.c | 181 + .../gpu/backend/mali_kbase_gpu_regmap_jm.h | 0 .../gpu/arm/midgard/gpu/mali_kbase_gpu.c | 41 + .../gpu/arm/midgard/gpu/mali_kbase_gpu.h | 0 .../midgard/gpu/mali_kbase_gpu_coherency.h | 0 .../arm/midgard/gpu/mali_kbase_gpu_fault.h | 59 + .../gpu/arm/midgard/gpu/mali_kbase_gpu_id.h | 0 .../arm/midgard/gpu/mali_kbase_gpu_regmap.h | 437 + .../kernel/drivers/gpu/arm/midgard/ipa/Kbuild | 28 + .../gpu/arm/midgard/ipa/mali_kbase_ipa.c | 675 + .../gpu/arm/midgard/ipa/mali_kbase_ipa.h | 253 + .../arm/midgard/ipa/mali_kbase_ipa_debugfs.c | 322 + .../arm/midgard/ipa/mali_kbase_ipa_debugfs.h | 68 + .../arm/midgard/ipa/mali_kbase_ipa_simple.c | 351 + .../arm/midgard/ipa/mali_kbase_ipa_simple.h | 45 + .../ipa/mali_kbase_ipa_vinstr_common.c | 0 .../ipa/mali_kbase_ipa_vinstr_common.h | 0 .../midgard/ipa/mali_kbase_ipa_vinstr_g7x.c | 0 .../gpu/arm/midgard/jm/mali_base_jm_kernel.h | 0 .../gpu/arm/midgard/jm/mali_kbase_jm_defs.h | 818 + .../gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 0 .../gpu/arm/midgard/jm/mali_kbase_jm_js.h | 892 + .../gpu/arm/midgard/jm/mali_kbase_js_defs.h | 409 + .../arm/midgard/mali_base_hwconfig_features.h | 486 + .../arm/midgard/mali_base_hwconfig_issues.h | 635 + .../gpu/arm/midgard/mali_base_kernel.h | 0 .../gpu/arm/midgard/mali_base_mem_priv.h | 0 .../drivers/gpu/arm/midgard/mali_kbase.h | 628 + .../arm/midgard/mali_kbase_as_fault_debugfs.c | 113 + .../arm/midgard/mali_kbase_as_fault_debugfs.h | 50 + .../drivers/gpu/arm/midgard/mali_kbase_bits.h | 41 + .../gpu/arm/midgard/mali_kbase_cache_policy.c | 67 + .../gpu/arm/midgard/mali_kbase_cache_policy.h | 50 + .../gpu/arm/midgard/mali_kbase_config.c | 48 + .../gpu/arm/midgard/mali_kbase_config.h | 311 + .../arm/midgard/mali_kbase_config_defaults.h | 213 + .../gpu/arm/midgard/mali_kbase_core_linux.c | 4548 + .../arm/midgard/mali_kbase_cs_experimental.h | 54 + .../gpu/arm/midgard/mali_kbase_ctx_sched.c | 344 + .../gpu/arm/midgard/mali_kbase_ctx_sched.h | 209 + .../gpu/arm/midgard/mali_kbase_debug.c | 44 + .../gpu/arm/midgard/mali_kbase_debug.h | 169 + .../arm/midgard/mali_kbase_debug_job_fault.c | 566 + .../arm/midgard/mali_kbase_debug_job_fault.h | 116 + .../arm/midgard/mali_kbase_debug_mem_view.c | 313 + .../arm/midgard/mali_kbase_debug_mem_view.h | 40 + .../arm/midgard/mali_kbase_debugfs_helper.c | 183 + .../arm/midgard/mali_kbase_debugfs_helper.h | 141 + .../drivers/gpu/arm/midgard/mali_kbase_defs.h | 1681 + .../arm/midgard/mali_kbase_disjoint_events.c | 81 + .../gpu/arm/midgard/mali_kbase_dma_fence.c | 456 + .../gpu/arm/midgard/mali_kbase_dma_fence.h | 136 + .../gpu/arm/midgard/mali_kbase_dummy_job_wa.c | 442 + .../gpu/arm/midgard/mali_kbase_dummy_job_wa.h | 45 + .../gpu/arm/midgard/mali_kbase_event.c | 264 + .../gpu/arm/midgard/mali_kbase_fence.c | 214 + .../gpu/arm/midgard/mali_kbase_fence.h | 282 + .../gpu/arm/midgard/mali_kbase_fence_defs.h | 68 + .../gpu/arm/midgard/mali_kbase_gator.h | 53 + .../midgard/mali_kbase_gpu_memory_debugfs.c | 110 + .../midgard/mali_kbase_gpu_memory_debugfs.h | 42 + .../gpu/arm/midgard/mali_kbase_gpuprops.c | 636 + .../gpu/arm/midgard/mali_kbase_gpuprops.h | 135 + .../arm/midgard/mali_kbase_gpuprops_types.h | 98 + .../drivers/gpu/arm/midgard/mali_kbase_gwt.c | 269 + .../drivers/gpu/arm/midgard/mali_kbase_gwt.h | 55 + .../drivers/gpu/arm/midgard/mali_kbase_hw.c | 425 + .../drivers/gpu/arm/midgard/mali_kbase_hw.h | 70 + .../arm/midgard/mali_kbase_hwaccess_backend.h | 45 + .../arm/midgard/mali_kbase_hwaccess_defs.h | 51 + .../midgard/mali_kbase_hwaccess_gpuprops.h | 87 + .../arm/midgard/mali_kbase_hwaccess_instr.h | 151 + .../gpu/arm/midgard/mali_kbase_hwaccess_jm.h | 302 + .../gpu/arm/midgard/mali_kbase_hwaccess_pm.h | 229 + .../arm/midgard/mali_kbase_hwaccess_time.h | 42 + .../gpu/arm/midgard/mali_kbase_hwcnt.c | 800 + .../midgard/mali_kbase_hwcnt_accumulator.h | 146 + .../arm/midgard/mali_kbase_hwcnt_backend.h | 217 + .../midgard/mali_kbase_hwcnt_backend_gpu.c | 0 .../midgard/mali_kbase_hwcnt_backend_gpu.h | 0 .../arm/midgard/mali_kbase_hwcnt_context.h | 119 + .../gpu/arm/midgard/mali_kbase_hwcnt_gpu.c | 773 + .../gpu/arm/midgard/mali_kbase_hwcnt_gpu.h | 251 + .../gpu/arm/midgard/mali_kbase_hwcnt_legacy.c | 152 + .../gpu/arm/midgard/mali_kbase_hwcnt_legacy.h | 94 + .../gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 0 .../gpu/arm/midgard/mali_kbase_hwcnt_types.c | 538 + .../gpu/arm/midgard/mali_kbase_hwcnt_types.h | 1087 + .../midgard/mali_kbase_hwcnt_virtualizer.c | 790 + .../midgard/mali_kbase_hwcnt_virtualizer.h | 145 + .../gpu/arm/midgard/mali_kbase_ioctl.h | 0 .../drivers/gpu/arm/midgard/mali_kbase_jd.c | 1804 + .../gpu/arm/midgard/mali_kbase_jd_debugfs.c | 244 + .../gpu/arm/midgard/mali_kbase_jd_debugfs.h | 45 + .../drivers/gpu/arm/midgard/mali_kbase_jm.c | 151 + .../drivers/gpu/arm/midgard/mali_kbase_jm.h | 115 + .../drivers/gpu/arm/midgard/mali_kbase_js.c | 3760 + .../drivers/gpu/arm/midgard/mali_kbase_js.h | 40 + .../gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 283 + .../gpu/arm/midgard/mali_kbase_js_ctx_attr.h | 155 + .../gpu/arm/midgard/mali_kbase_linux.h | 48 + .../drivers/gpu/arm/midgard/mali_kbase_mem.c | 4517 + .../drivers/gpu/arm/midgard/mali_kbase_mem.h | 1771 + .../gpu/arm/midgard/mali_kbase_mem_linux.c | 3030 + .../gpu/arm/midgard/mali_kbase_mem_linux.h | 464 + .../gpu/arm/midgard/mali_kbase_mem_lowlevel.h | 166 + .../gpu/arm/midgard/mali_kbase_mem_pool.c | 856 + .../arm/midgard/mali_kbase_mem_pool_debugfs.c | 191 + .../arm/midgard/mali_kbase_mem_pool_debugfs.h | 123 + .../arm/midgard/mali_kbase_mem_pool_group.c | 115 + .../arm/midgard/mali_kbase_mem_pool_group.h | 92 + .../midgard/mali_kbase_mem_profile_debugfs.c | 138 + .../midgard/mali_kbase_mem_profile_debugfs.h | 64 + .../mali_kbase_mem_profile_debugfs_buf_size.h | 39 + .../arm/midgard/mali_kbase_mipe_gen_header.h | 217 + .../gpu/arm/midgard/mali_kbase_mipe_proto.h | 127 + .../gpu/arm/midgard/mali_kbase_native_mgm.c | 153 + .../gpu/arm/midgard/mali_kbase_native_mgm.h | 39 + .../arm/midgard/mali_kbase_platform_fake.c | 124 + .../drivers/gpu/arm/midgard/mali_kbase_pm.c | 278 + .../drivers/gpu/arm/midgard/mali_kbase_pm.h | 244 + .../midgard/mali_kbase_regs_history_debugfs.c | 136 + .../midgard/mali_kbase_regs_history_debugfs.h | 55 + .../gpu/arm/midgard/mali_kbase_reset_gpu.h | 139 + .../drivers/gpu/arm/midgard/mali_kbase_smc.c | 91 + .../drivers/gpu/arm/midgard/mali_kbase_smc.h | 72 + .../gpu/arm/midgard/mali_kbase_softjobs.c | 1785 + .../gpu/arm/midgard/mali_kbase_strings.c | 28 + .../gpu/arm/midgard/mali_kbase_strings.h | 24 + .../drivers/gpu/arm/midgard/mali_kbase_sync.h | 223 + .../gpu/arm/midgard/mali_kbase_sync_android.c | 542 + .../gpu/arm/midgard/mali_kbase_sync_common.c | 49 + .../gpu/arm/midgard/mali_kbase_sync_file.c | 366 + .../gpu/arm/midgard/mali_kbase_utility.h | 55 + .../gpu/arm/midgard/mali_kbase_vinstr.c | 996 + .../gpu/arm/midgard/mali_kbase_vinstr.h | 91 + .../gpu/arm/midgard/mali_linux_trace.h | 532 + .../drivers/gpu/arm/midgard/mali_malisw.h | 109 + .../kernel/drivers/gpu/arm/midgard/mali_uk.h | 0 .../midgard/mmu/backend/mali_kbase_mmu_jm.c | 424 + .../gpu/arm/midgard/mmu/mali_kbase_mmu.c | 2205 + .../gpu/arm/midgard/mmu/mali_kbase_mmu.h | 118 + .../gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h | 107 + .../midgard/mmu/mali_kbase_mmu_hw_direct.c | 272 + .../arm/midgard/mmu/mali_kbase_mmu_internal.h | 63 + .../midgard/mmu/mali_kbase_mmu_mode_aarch64.c | 200 + .../midgard/mmu/mali_kbase_mmu_mode_lpae.c | 0 .../drivers/gpu/arm/midgard/platform/Kconfig | 30 + .../arm/midgard/platform/devicetree/Kbuild | 39 + .../midgard/platform/devicetree/mali_clock.c | 741 + .../midgard/platform/devicetree/mali_clock.h | 54 + .../devicetree/mali_kbase_config_devicetree.c | 84 + .../devicetree/mali_kbase_config_platform.h | 94 + .../devicetree/mali_kbase_runtime_pm.c | 313 + .../platform/devicetree/mali_platform.h | 25 + .../platform/devicetree/mali_scaling.h | 154 + .../midgard/platform/devicetree/meson_main2.c | 126 + .../midgard/platform/devicetree/meson_main2.h | 48 + .../arm/midgard/platform/devicetree/mpgpu.c | 326 + .../midgard/platform/devicetree/platform_gx.c | 262 + .../arm/midgard/platform/devicetree/scaling.c | 606 + .../gpu/arm/midgard/platform/vexpress/Kbuild | 24 + .../vexpress/mali_kbase_config_platform.h | 39 + .../vexpress/mali_kbase_config_vexpress.c | 69 + .../midgard/platform/vexpress_1xv7_a57/Kbuild | 24 + .../mali_kbase_config_platform.h | 39 + .../mali_kbase_config_vexpress.c | 65 + .../platform/vexpress_6xvirtex7_10mhz/Kbuild | 25 + .../mali_kbase_config_platform.h | 39 + .../mali_kbase_config_vexpress.c | 67 + .../gpu/arm/midgard/protected_mode_switcher.h | 69 + .../drivers/gpu/arm/midgard/tests/Kbuild | 23 + .../drivers/gpu/arm/midgard/tests/Kconfig | 23 + .../drivers/gpu/arm/midgard/tests/Mconfig | 38 + .../midgard/tests/include/kutf/kutf_helpers.h | 77 + .../tests/include/kutf/kutf_helpers_user.h | 179 + .../arm/midgard/tests/include/kutf/kutf_mem.h | 73 + .../tests/include/kutf/kutf_resultset.h | 181 + .../midgard/tests/include/kutf/kutf_suite.h | 569 + .../midgard/tests/include/kutf/kutf_utils.h | 60 + .../drivers/gpu/arm/midgard/tests/kutf/Kbuild | 26 + .../gpu/arm/midgard/tests/kutf/Kconfig | 0 .../gpu/arm/midgard/tests/kutf/Makefile | 0 .../gpu/arm/midgard/tests/kutf/build.bp | 36 + .../gpu/arm/midgard/tests/kutf/kutf_helpers.c | 129 + .../midgard/tests/kutf/kutf_helpers_user.c | 468 + .../gpu/arm/midgard/tests/kutf/kutf_mem.c | 108 + .../arm/midgard/tests/kutf/kutf_resultset.c | 164 + .../gpu/arm/midgard/tests/kutf/kutf_suite.c | 1219 + .../gpu/arm/midgard/tests/kutf/kutf_utils.c | 76 + .../midgard/tests/mali_kutf_irq_test/Kbuild | 26 + .../midgard/tests/mali_kutf_irq_test/Kconfig | 0 .../midgard/tests/mali_kutf_irq_test/Makefile | 0 .../midgard/tests/mali_kutf_irq_test/build.bp | 35 + .../mali_kutf_irq_test_main.c | 278 + .../arm/midgard/thirdparty/mali_kbase_mmap.c | 366 + .../tl/backend/mali_kbase_timeline_jm.c | 97 + .../gpu/arm/midgard/tl/mali_kbase_timeline.c | 274 + .../gpu/arm/midgard/tl/mali_kbase_timeline.h | 121 + .../arm/midgard/tl/mali_kbase_timeline_io.c | 329 + .../arm/midgard/tl/mali_kbase_timeline_priv.h | 66 + .../arm/midgard/tl/mali_kbase_tl_serialize.h | 125 + .../gpu/arm/midgard/tl/mali_kbase_tlstream.c | 298 + .../gpu/arm/midgard/tl/mali_kbase_tlstream.h | 166 + .../arm/midgard/tl/mali_kbase_tracepoints.c | 3194 + .../arm/midgard/tl/mali_kbase_tracepoints.h | 2381 + .../include/linux/dma-buf-test-exporter.h | 83 + .../include/linux/memory_group_manager.h | 198 + .../linux/protected_memory_allocator.h | 110 + .../include/linux/protected_mode_switcher.h | 69 + bifrost/r25p0/kernel/license.txt | 198 + ...rusted_firmware_gpu_coherency_toggle.patch | 117 + bifrost/{r25p0 => r32p1} | 0 .../ABI/testing/sysfs-device-mali | 293 + .../devicetree/bindings/arm/mali-midgard.txt | 19 +- .../bindings/arm/memory_group_manager.txt | 7 +- .../bindings/arm/priority_control_manager.txt | 48 + .../arm/protected_memory_allocator.txt | 7 +- .../devicetree/bindings/power/mali-opp.txt | 9 +- .../Documentation/dma-buf-test-exporter.txt | 10 +- dvalin/kernel/Mconfig | 31 +- dvalin/kernel/build.bp | 65 +- dvalin/kernel/drivers/base/arm/Kbuild | 34 + dvalin/kernel/drivers/base/arm/Kconfig | 64 + dvalin/kernel/drivers/base/arm/Makefile | 98 + dvalin/kernel/drivers/base/arm/Mconfig | 64 + .../drivers/base/arm/dma_buf_lock/src/Kbuild | 23 + .../base/arm/dma_buf_lock/src/Makefile | 39 + .../base/arm/dma_buf_lock/src/dma_buf_lock.c | 898 + .../base/arm/dma_buf_lock/src/dma_buf_lock.h | 46 + .../base/arm/dma_buf_test_exporter/Kbuild | 23 + .../base/arm/dma_buf_test_exporter/build.bp | 36 + .../dma-buf-test-exporter.c | 824 + .../base/arm/memory_group_manager/Kbuild | 23 + .../base/arm/memory_group_manager/build.bp | 36 + .../memory_group_manager.c | 491 + .../arm/protected_memory_allocator/Kbuild | 23 + .../arm/protected_memory_allocator/build.bp | 36 + .../protected_memory_allocator.c | 551 + dvalin/kernel/drivers/gpu/arm/Kbuild | 8 +- dvalin/kernel/drivers/gpu/arm/Kconfig | 8 +- dvalin/kernel/drivers/gpu/arm/Makefile | 21 + dvalin/kernel/drivers/gpu/arm/midgard/Kbuild | 367 +- dvalin/kernel/drivers/gpu/arm/midgard/Kconfig | 357 +- .../kernel/drivers/gpu/arm/midgard/Makefile | 201 +- dvalin/kernel/drivers/gpu/arm/midgard/Mconfig | 288 +- .../drivers/gpu/arm/midgard/arbiter/Kbuild | 11 +- .../arm/midgard/arbiter/mali_kbase_arbif.c | 209 +- .../arm/midgard/arbiter/mali_kbase_arbif.h | 44 +- .../midgard/arbiter/mali_kbase_arbiter_defs.h | 34 +- .../arbiter/mali_kbase_arbiter_interface.h | 70 +- .../midgard/arbiter/mali_kbase_arbiter_pm.c | 614 +- .../midgard/arbiter/mali_kbase_arbiter_pm.h | 87 +- .../gpu/arm/midgard/backend/gpu/Kbuild | 70 +- .../backend/gpu/mali_kbase_backend_config.h | 7 +- .../gpu/mali_kbase_cache_policy_backend.c | 9 +- .../gpu/mali_kbase_cache_policy_backend.h | 10 +- .../gpu/mali_kbase_clk_rate_trace_mgr.c | 325 + .../gpu/mali_kbase_clk_rate_trace_mgr.h | 154 + .../gpu/mali_kbase_debug_job_fault_backend.c | 11 +- .../midgard/backend/gpu/mali_kbase_devfreq.c | 207 +- .../midgard/backend/gpu/mali_kbase_devfreq.h | 23 +- .../backend/gpu/mali_kbase_device_internal.h | 0 .../backend/gpu/mali_kbase_gpuprops_backend.c | 75 +- .../backend/gpu/mali_kbase_instr_backend.c | 130 +- .../backend/gpu/mali_kbase_instr_defs.h | 18 +- .../backend/gpu/mali_kbase_instr_internal.h | 9 +- .../backend/gpu/mali_kbase_irq_internal.h | 7 +- .../backend/gpu/mali_kbase_irq_linux.c | 37 +- .../midgard/backend/gpu/mali_kbase_jm_as.c | 14 +- .../midgard/backend/gpu/mali_kbase_jm_defs.h | 20 +- .../midgard/backend/gpu/mali_kbase_jm_hw.c | 236 +- .../backend/gpu/mali_kbase_jm_internal.h | 17 +- .../midgard/backend/gpu/mali_kbase_jm_rb.c | 145 +- .../midgard/backend/gpu/mali_kbase_jm_rb.h | 8 +- .../backend/gpu/mali_kbase_js_backend.c | 61 +- .../backend/gpu/mali_kbase_js_internal.h | 8 +- .../backend/gpu/mali_kbase_l2_mmu_config.c | 47 +- .../backend/gpu/mali_kbase_l2_mmu_config.h | 25 +- .../backend/gpu/mali_kbase_pm_always_on.c | 13 +- .../backend/gpu/mali_kbase_pm_always_on.h | 9 +- .../backend/gpu/mali_kbase_pm_backend.c | 243 +- .../midgard/backend/gpu/mali_kbase_pm_ca.c | 44 +- .../midgard/backend/gpu/mali_kbase_pm_ca.h | 7 +- .../backend/gpu/mali_kbase_pm_ca_devfreq.h | 7 +- .../backend/gpu/mali_kbase_pm_coarse_demand.c | 13 +- .../backend/gpu/mali_kbase_pm_coarse_demand.h | 9 +- .../midgard/backend/gpu/mali_kbase_pm_defs.h | 244 +- .../backend/gpu/mali_kbase_pm_driver.c | 907 +- .../backend/gpu/mali_kbase_pm_internal.h | 143 +- .../backend/gpu/mali_kbase_pm_l2_states.h | 20 +- .../backend/gpu/mali_kbase_pm_mcu_states.h | 63 + .../backend/gpu/mali_kbase_pm_metrics.c | 271 +- .../backend/gpu/mali_kbase_pm_policy.c | 204 +- .../backend/gpu/mali_kbase_pm_policy.h | 7 +- .../backend/gpu/mali_kbase_pm_shader_states.h | 44 +- .../arm/midgard/backend/gpu/mali_kbase_time.c | 57 +- .../kernel/drivers/gpu/arm/midgard/build.bp | 203 +- .../drivers/gpu/arm/midgard/context/Kbuild | 27 + .../context/backend/mali_kbase_context_csf.c | 201 + .../context/backend/mali_kbase_context_jm.c | 138 +- .../arm/midgard/context/mali_kbase_context.c | 205 +- .../arm/midgard/context/mali_kbase_context.h | 35 +- .../context/mali_kbase_context_internal.h | 18 +- .../kernel/drivers/gpu/arm/midgard/csf/Kbuild | 47 + .../gpu/arm/midgard/csf/ipa_control/Kbuild | 22 + .../ipa_control/mali_kbase_csf_ipa_control.c | 925 + .../ipa_control/mali_kbase_csf_ipa_control.h | 244 + .../gpu/arm/midgard/csf/mali_kbase_csf.c | 3069 + .../gpu/arm/midgard/csf/mali_kbase_csf.h | 564 + .../csf/mali_kbase_csf_cpu_queue_debugfs.c | 191 + .../csf/mali_kbase_csf_cpu_queue_debugfs.h | 90 + .../midgard/csf/mali_kbase_csf_csg_debugfs.c | 591 + .../midgard/csf/mali_kbase_csf_csg_debugfs.h | 47 + .../gpu/arm/midgard/csf/mali_kbase_csf_defs.h | 1254 + .../arm/midgard/csf/mali_kbase_csf_firmware.c | 2337 + .../arm/midgard/csf/mali_kbase_csf_firmware.h | 811 + .../midgard/csf/mali_kbase_csf_firmware_cfg.c | 327 + .../midgard/csf/mali_kbase_csf_firmware_cfg.h | 74 + .../csf/mali_kbase_csf_firmware_no_mali.c | 1389 + .../csf/mali_kbase_csf_heap_context_alloc.c | 195 + .../csf/mali_kbase_csf_heap_context_alloc.h | 75 + .../gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c | 2258 + .../gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h | 356 + .../midgard/csf/mali_kbase_csf_kcpu_debugfs.c | 197 + .../midgard/csf/mali_kbase_csf_kcpu_debugfs.h | 37 + .../csf/mali_kbase_csf_protected_memory.c | 119 + .../csf/mali_kbase_csf_protected_memory.h | 71 + .../midgard/csf/mali_kbase_csf_reset_gpu.c | 629 + .../midgard/csf/mali_kbase_csf_scheduler.c | 5063 + .../midgard/csf/mali_kbase_csf_scheduler.h | 494 + .../midgard/csf/mali_kbase_csf_tiler_heap.c | 611 + .../midgard/csf/mali_kbase_csf_tiler_heap.h | 115 + .../csf/mali_kbase_csf_tiler_heap_debugfs.c | 106 + .../csf/mali_kbase_csf_tiler_heap_debugfs.h | 37 + .../csf/mali_kbase_csf_tiler_heap_def.h | 114 + .../arm/midgard/csf/mali_kbase_csf_timeout.c | 178 + .../arm/midgard/csf/mali_kbase_csf_timeout.h | 66 + .../midgard/csf/mali_kbase_csf_tl_reader.c | 534 + .../midgard/csf/mali_kbase_csf_tl_reader.h | 185 + .../midgard/csf/mali_kbase_csf_trace_buffer.c | 688 + .../midgard/csf/mali_kbase_csf_trace_buffer.h | 182 + .../drivers/gpu/arm/midgard/debug/Kbuild | 27 + .../mali_kbase_debug_ktrace_codes_csf.h | 278 + .../mali_kbase_debug_ktrace_codes_jm.h | 10 +- .../backend/mali_kbase_debug_ktrace_csf.c | 193 + .../backend/mali_kbase_debug_ktrace_csf.h | 203 + .../mali_kbase_debug_ktrace_defs_csf.h | 116 + .../backend/mali_kbase_debug_ktrace_defs_jm.h | 100 +- .../backend/mali_kbase_debug_ktrace_jm.c | 50 +- .../backend/mali_kbase_debug_ktrace_jm.h | 118 +- .../mali_kbase_debug_linux_ktrace_csf.h | 241 + .../mali_kbase_debug_linux_ktrace_jm.h | 52 +- .../midgard/debug/mali_kbase_debug_ktrace.c | 55 +- .../midgard/debug/mali_kbase_debug_ktrace.h | 20 +- .../debug/mali_kbase_debug_ktrace_codes.h | 24 +- .../debug/mali_kbase_debug_ktrace_defs.h | 83 +- .../debug/mali_kbase_debug_ktrace_internal.h | 7 +- .../debug/mali_kbase_debug_linux_ktrace.h | 40 +- .../drivers/gpu/arm/midgard/device/Kbuild | 33 + .../device/backend/mali_kbase_device_csf.c | 464 + .../device/backend/mali_kbase_device_hw_csf.c | 163 + .../device/backend/mali_kbase_device_hw_jm.c | 98 + .../device/backend/mali_kbase_device_jm.c | 181 +- .../arm/midgard/device/mali_kbase_device.c | 233 +- .../arm/midgard/device/mali_kbase_device.h | 126 +- .../arm/midgard/device/mali_kbase_device_hw.c | 182 + .../device/mali_kbase_device_internal.h | 24 +- .../kernel/drivers/gpu/arm/midgard/gpu/Kbuild | 27 + .../gpu/backend/mali_kbase_gpu_fault_csf.c | 104 + .../gpu/backend/mali_kbase_gpu_fault_jm.c | 13 +- .../gpu/arm/midgard/gpu/mali_kbase_gpu.c | 8 +- .../arm/midgard/gpu/mali_kbase_gpu_fault.h | 23 +- .../arm/midgard/gpu/mali_kbase_gpu_regmap.h | 416 +- .../kernel/drivers/gpu/arm/midgard/ipa/Kbuild | 25 +- .../mali_kbase_ipa_counter_common_csf.c | 457 + .../mali_kbase_ipa_counter_common_csf.h | 159 + .../mali_kbase_ipa_counter_common_jm.c | 354 + .../mali_kbase_ipa_counter_common_jm.h | 231 + .../ipa/backend/mali_kbase_ipa_counter_csf.c | 171 + .../ipa/backend/mali_kbase_ipa_counter_jm.c | 555 + .../gpu/arm/midgard/ipa/mali_kbase_ipa.c | 287 +- .../gpu/arm/midgard/ipa/mali_kbase_ipa.h | 90 +- .../arm/midgard/ipa/mali_kbase_ipa_debugfs.c | 12 +- .../arm/midgard/ipa/mali_kbase_ipa_debugfs.h | 12 +- .../arm/midgard/ipa/mali_kbase_ipa_simple.c | 53 +- .../arm/midgard/ipa/mali_kbase_ipa_simple.h | 7 +- .../gpu/arm/midgard/jm/mali_kbase_jm_defs.h | 76 +- .../gpu/arm/midgard/jm/mali_kbase_jm_js.h | 89 +- .../gpu/arm/midgard/jm/mali_kbase_js_defs.h | 411 +- .../arm/midgard/mali_base_hwconfig_features.h | 62 +- .../arm/midgard/mali_base_hwconfig_issues.h | 62 +- .../drivers/gpu/arm/midgard/mali_kbase.h | 165 +- .../arm/midgard/mali_kbase_as_fault_debugfs.c | 11 +- .../arm/midgard/mali_kbase_as_fault_debugfs.h | 9 +- .../drivers/gpu/arm/midgard/mali_kbase_bits.h | 16 +- .../gpu/arm/midgard/mali_kbase_cache_policy.c | 12 +- .../gpu/arm/midgard/mali_kbase_cache_policy.h | 11 +- .../drivers/gpu/arm/midgard/mali_kbase_caps.h | 61 + .../gpu/arm/midgard/mali_kbase_ccswe.c | 100 + .../gpu/arm/midgard/mali_kbase_ccswe.h | 96 + .../gpu/arm/midgard/mali_kbase_config.c | 68 +- .../gpu/arm/midgard/mali_kbase_config.h | 294 +- .../arm/midgard/mali_kbase_config_defaults.h | 26 +- .../gpu/arm/midgard/mali_kbase_core_linux.c | 1930 +- .../arm/midgard/mali_kbase_cs_experimental.h | 20 +- .../gpu/arm/midgard/mali_kbase_ctx_sched.c | 84 +- .../gpu/arm/midgard/mali_kbase_ctx_sched.h | 44 +- .../gpu/arm/midgard/mali_kbase_debug.c | 9 +- .../gpu/arm/midgard/mali_kbase_debug.h | 79 +- .../arm/midgard/mali_kbase_debug_job_fault.c | 36 +- .../arm/midgard/mali_kbase_debug_job_fault.h | 10 +- .../arm/midgard/mali_kbase_debug_mem_view.c | 26 +- .../arm/midgard/mali_kbase_debug_mem_view.h | 7 +- .../arm/midgard/mali_kbase_debugfs_helper.c | 104 +- .../arm/midgard/mali_kbase_debugfs_helper.h | 53 +- .../drivers/gpu/arm/midgard/mali_kbase_defs.h | 573 +- .../arm/midgard/mali_kbase_disjoint_events.c | 7 +- .../gpu/arm/midgard/mali_kbase_dma_fence.c | 59 +- .../gpu/arm/midgard/mali_kbase_dma_fence.h | 26 +- .../gpu/arm/midgard/mali_kbase_dummy_job_wa.c | 15 +- .../gpu/arm/midgard/mali_kbase_dummy_job_wa.h | 36 +- .../gpu/arm/midgard/mali_kbase_dvfs_debugfs.c | 98 + .../gpu/arm/midgard/mali_kbase_dvfs_debugfs.h | 35 + .../gpu/arm/midgard/mali_kbase_event.c | 30 +- .../gpu/arm/midgard/mali_kbase_fence.c | 73 +- .../gpu/arm/midgard/mali_kbase_fence.h | 17 +- .../gpu/arm/midgard/mali_kbase_fence_defs.h | 15 +- .../gpu/arm/midgard/mali_kbase_fence_ops.c | 83 + .../gpu/arm/midgard/mali_kbase_gator.h | 7 +- .../midgard/mali_kbase_gpu_memory_debugfs.c | 22 +- .../midgard/mali_kbase_gpu_memory_debugfs.h | 26 +- .../gpu/arm/midgard/mali_kbase_gpuprops.c | 315 +- .../gpu/arm/midgard/mali_kbase_gpuprops.h | 67 +- .../arm/midgard/mali_kbase_gpuprops_types.h | 89 +- .../drivers/gpu/arm/midgard/mali_kbase_gwt.c | 11 +- .../drivers/gpu/arm/midgard/mali_kbase_gwt.h | 9 +- .../drivers/gpu/arm/midgard/mali_kbase_hw.c | 230 +- .../drivers/gpu/arm/midgard/mali_kbase_hw.h | 23 +- .../arm/midgard/mali_kbase_hwaccess_backend.h | 8 +- .../arm/midgard/mali_kbase_hwaccess_defs.h | 15 +- .../midgard/mali_kbase_hwaccess_gpuprops.h | 39 +- .../arm/midgard/mali_kbase_hwaccess_instr.h | 22 +- .../gpu/arm/midgard/mali_kbase_hwaccess_jm.h | 12 +- .../gpu/arm/midgard/mali_kbase_hwaccess_pm.h | 75 +- .../arm/midgard/mali_kbase_hwaccess_time.h | 30 +- .../gpu/arm/midgard/mali_kbase_hwcnt.c | 103 +- .../midgard/mali_kbase_hwcnt_accumulator.h | 7 +- .../arm/midgard/mali_kbase_hwcnt_backend.h | 102 +- .../midgard/mali_kbase_hwcnt_backend_csf.c | 1864 + .../midgard/mali_kbase_hwcnt_backend_csf.h | 162 + .../midgard/mali_kbase_hwcnt_backend_csf_if.h | 311 + .../mali_kbase_hwcnt_backend_csf_if_fw.c | 787 + .../mali_kbase_hwcnt_backend_csf_if_fw.h | 50 + .../arm/midgard/mali_kbase_hwcnt_backend_jm.c | 793 + .../arm/midgard/mali_kbase_hwcnt_backend_jm.h | 60 + .../arm/midgard/mali_kbase_hwcnt_context.h | 46 +- .../gpu/arm/midgard/mali_kbase_hwcnt_gpu.c | 760 +- .../gpu/arm/midgard/mali_kbase_hwcnt_gpu.h | 314 +- .../gpu/arm/midgard/mali_kbase_hwcnt_legacy.c | 11 +- .../gpu/arm/midgard/mali_kbase_hwcnt_legacy.h | 7 +- .../gpu/arm/midgard/mali_kbase_hwcnt_types.c | 129 +- .../gpu/arm/midgard/mali_kbase_hwcnt_types.h | 125 +- .../midgard/mali_kbase_hwcnt_virtualizer.c | 26 +- .../midgard/mali_kbase_hwcnt_virtualizer.h | 23 +- .../drivers/gpu/arm/midgard/mali_kbase_jd.c | 331 +- .../gpu/arm/midgard/mali_kbase_jd_debugfs.c | 56 +- .../gpu/arm/midgard/mali_kbase_jd_debugfs.h | 12 +- .../drivers/gpu/arm/midgard/mali_kbase_jm.c | 16 +- .../drivers/gpu/arm/midgard/mali_kbase_jm.h | 12 +- .../drivers/gpu/arm/midgard/mali_kbase_js.c | 422 +- .../drivers/gpu/arm/midgard/mali_kbase_js.h | 12 +- .../gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 39 +- .../gpu/arm/midgard/mali_kbase_js_ctx_attr.h | 47 +- .../gpu/arm/midgard/mali_kbase_kinstr_jm.c | 894 + .../gpu/arm/midgard/mali_kbase_kinstr_jm.h | 275 + .../gpu/arm/midgard/mali_kbase_linux.h | 12 +- .../drivers/gpu/arm/midgard/mali_kbase_mem.c | 1160 +- .../drivers/gpu/arm/midgard/mali_kbase_mem.h | 518 +- .../gpu/arm/midgard/mali_kbase_mem_linux.c | 683 +- .../gpu/arm/midgard/mali_kbase_mem_linux.h | 36 +- .../gpu/arm/midgard/mali_kbase_mem_lowlevel.h | 13 +- .../gpu/arm/midgard/mali_kbase_mem_pool.c | 44 +- .../arm/midgard/mali_kbase_mem_pool_debugfs.c | 7 +- .../arm/midgard/mali_kbase_mem_pool_debugfs.h | 7 +- .../arm/midgard/mali_kbase_mem_pool_group.c | 7 +- .../arm/midgard/mali_kbase_mem_pool_group.h | 7 +- .../midgard/mali_kbase_mem_profile_debugfs.c | 26 +- .../midgard/mali_kbase_mem_profile_debugfs.h | 23 +- .../mali_kbase_mem_profile_debugfs_buf_size.h | 8 +- .../arm/midgard/mali_kbase_mipe_gen_header.h | 40 +- .../gpu/arm/midgard/mali_kbase_mipe_proto.h | 7 +- .../gpu/arm/midgard/mali_kbase_native_mgm.c | 7 +- .../gpu/arm/midgard/mali_kbase_native_mgm.h | 9 +- .../arm/midgard/mali_kbase_platform_fake.c | 15 +- .../drivers/gpu/arm/midgard/mali_kbase_pm.c | 55 +- .../drivers/gpu/arm/midgard/mali_kbase_pm.h | 34 +- .../midgard/mali_kbase_regs_history_debugfs.c | 138 +- .../midgard/mali_kbase_regs_history_debugfs.h | 40 +- .../gpu/arm/midgard/mali_kbase_reset_gpu.h | 162 +- .../drivers/gpu/arm/midgard/mali_kbase_smc.c | 9 +- .../drivers/gpu/arm/midgard/mali_kbase_smc.h | 11 +- .../gpu/arm/midgard/mali_kbase_softjobs.c | 167 +- .../gpu/arm/midgard/mali_kbase_strings.c | 10 +- .../gpu/arm/midgard/mali_kbase_strings.h | 7 +- .../drivers/gpu/arm/midgard/mali_kbase_sync.h | 28 +- .../gpu/arm/midgard/mali_kbase_sync_android.c | 67 +- .../gpu/arm/midgard/mali_kbase_sync_common.c | 11 +- .../gpu/arm/midgard/mali_kbase_sync_file.c | 36 +- .../arm/midgard/mali_kbase_trace_gpu_mem.c | 221 + .../arm/midgard/mali_kbase_trace_gpu_mem.h | 100 + .../gpu/arm/midgard/mali_kbase_utility.h | 11 +- .../gpu/arm/midgard/mali_kbase_vinstr.c | 311 +- .../gpu/arm/midgard/mali_kbase_vinstr.h | 7 +- .../gpu/arm/midgard/mali_linux_trace.h | 47 +- .../drivers/gpu/arm/midgard/mali_malisw.h | 60 +- .../midgard/mali_power_gpu_frequency_trace.c | 26 + .../midgard/mali_power_gpu_frequency_trace.h | 68 + .../kernel/drivers/gpu/arm/midgard/mmu/Kbuild | 30 + .../midgard/mmu/backend/mali_kbase_mmu_csf.c | 565 + .../midgard/mmu/backend/mali_kbase_mmu_jm.c | 90 +- .../gpu/arm/midgard/mmu/mali_kbase_mmu.c | 220 +- .../gpu/arm/midgard/mmu/mali_kbase_mmu.h | 45 +- .../gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h | 12 +- .../midgard/mmu/mali_kbase_mmu_hw_direct.c | 67 +- .../arm/midgard/mmu/mali_kbase_mmu_internal.h | 49 +- .../midgard/mmu/mali_kbase_mmu_mode_aarch64.c | 25 +- .../drivers/gpu/arm/midgard/platform/Kconfig | 10 +- .../arm/midgard/platform/devicetree/Kbuild | 25 +- .../devicetree/mali_kbase_clk_rate_trace.c | 105 + .../devicetree/mali_kbase_config_devicetree.c | 28 +- .../devicetree/mali_kbase_config_platform.c | 43 + .../devicetree/mali_kbase_config_platform.h | 54 +- .../devicetree/mali_kbase_runtime_pm.c | 26 +- .../gpu/arm/midgard/platform/vexpress/Kbuild | 11 +- .../vexpress/mali_kbase_config_platform.h | 7 +- .../vexpress/mali_kbase_config_vexpress.c | 22 +- .../midgard/platform/vexpress_1xv7_a57/Kbuild | 11 +- .../mali_kbase_config_platform.h | 7 +- .../mali_kbase_config_vexpress.c | 20 +- .../platform/vexpress_6xvirtex7_10mhz/Kbuild | 13 +- .../mali_kbase_config_platform.h | 7 +- .../mali_kbase_config_vexpress.c | 22 +- .../gpu/arm/midgard/protected_mode_switcher.h | 31 +- .../drivers/gpu/arm/midgard/tests/Kbuild | 17 +- .../drivers/gpu/arm/midgard/tests/Kconfig | 46 +- .../drivers/gpu/arm/midgard/tests/Mconfig | 81 +- .../drivers/gpu/arm/midgard/tests/build.bp | 40 + .../midgard/tests/include/kutf/kutf_helpers.h | 15 +- .../tests/include/kutf/kutf_helpers_user.h | 25 +- .../arm/midgard/tests/include/kutf/kutf_mem.h | 7 +- .../tests/include/kutf/kutf_resultset.h | 7 +- .../midgard/tests/include/kutf/kutf_suite.h | 29 +- .../midgard/tests/include/kutf/kutf_utils.h | 7 +- .../drivers/gpu/arm/midgard/tests/kutf/Kbuild | 21 +- .../gpu/arm/midgard/tests/kutf/build.bp | 24 +- .../gpu/arm/midgard/tests/kutf/kutf_helpers.c | 13 +- .../midgard/tests/kutf/kutf_helpers_user.c | 28 +- .../gpu/arm/midgard/tests/kutf/kutf_mem.c | 7 +- .../arm/midgard/tests/kutf/kutf_resultset.c | 7 +- .../gpu/arm/midgard/tests/kutf/kutf_suite.c | 23 +- .../gpu/arm/midgard/tests/kutf/kutf_utils.c | 7 +- .../mali_kutf_clk_rate_trace/kernel/Kbuild | 25 + .../mali_kutf_clk_rate_trace/kernel/build.bp | 43 + .../kernel/mali_kutf_clk_rate_trace_test.c | 957 + .../mali_kutf_clk_rate_trace_test.h | 151 + .../midgard/tests/mali_kutf_irq_test/Kbuild | 13 +- .../midgard/tests/mali_kutf_irq_test/build.bp | 21 +- .../mali_kutf_irq_test_main.c | 15 +- .../drivers/gpu/arm/midgard/thirdparty/Kbuild | 21 + .../arm/midgard/thirdparty/mali_kbase_mmap.c | 77 +- .../kernel/drivers/gpu/arm/midgard/tl/Kbuild | 32 + .../tl/backend/mali_kbase_timeline_csf.c | 171 + .../tl/backend/mali_kbase_timeline_jm.c | 23 +- .../gpu/arm/midgard/tl/mali_kbase_timeline.c | 142 +- .../gpu/arm/midgard/tl/mali_kbase_timeline.h | 57 +- .../arm/midgard/tl/mali_kbase_timeline_io.c | 177 +- .../arm/midgard/tl/mali_kbase_timeline_priv.h | 26 +- .../arm/midgard/tl/mali_kbase_tl_serialize.h | 7 +- .../gpu/arm/midgard/tl/mali_kbase_tlstream.c | 43 +- .../gpu/arm/midgard/tl/mali_kbase_tlstream.h | 22 +- .../arm/midgard/tl/mali_kbase_tracepoints.c | 585 +- .../arm/midgard/tl/mali_kbase_tracepoints.h | 1159 +- .../include/linux/dma-buf-test-exporter.h | 8 +- .../include/linux/memory_group_manager.h | 7 +- .../include/linux/priority_control_manager.h | 77 + .../linux/protected_memory_allocator.h | 7 +- .../include/linux/protected_mode_switcher.h | 7 +- .../arm/midgard/csf/mali_base_csf_kernel.h | 765 + .../csf/mali_gpu_csf_control_registers.h | 32 + .../arm/midgard/csf/mali_gpu_csf_registers.h | 1488 + .../arm/midgard/csf/mali_kbase_csf_ioctl.h | 433 + .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 335 + .../gpu/backend/mali_kbase_gpu_regmap_jm.h | 287 + .../midgard/gpu/mali_kbase_gpu_coherency.h | 30 + .../gpu/arm/midgard/gpu/mali_kbase_gpu_id.h | 122 + .../arm/midgard/gpu/mali_kbase_gpu_regmap.h | 434 + .../gpu/arm/midgard/jm/mali_base_jm_kernel.h | 1200 + .../gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 223 + .../uapi/gpu/arm/midgard/mali_base_kernel.h | 826 + .../uapi/gpu/arm/midgard/mali_base_mem_priv.h | 58 + .../gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 105 + .../uapi/gpu/arm/midgard/mali_kbase_ioctl.h | 836 + .../arm/midgard/mali_kbase_kinstr_jm_reader.h | 69 + .../include/uapi/gpu/arm/midgard/mali_uk.h | 70 + 708 files changed, 247179 insertions(+), 7855 deletions(-) create mode 100644 0001-update-r32p1-gpu-driver.patch create mode 100644 bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt create mode 100644 bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt create mode 100644 bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt create mode 100644 bifrost/r25p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt create mode 100644 bifrost/r25p0/kernel/Documentation/dma-buf-test-exporter.txt create mode 100644 bifrost/r25p0/kernel/Mconfig create mode 100644 bifrost/r25p0/kernel/build.bp rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_lock/src/Kbuild (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_lock/src/Makefile (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_test_exporter/Kbuild (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_test_exporter/Kconfig (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_test_exporter/Makefile (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_test_exporter/build.bp (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/memory_group_manager/Kbuild (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/memory_group_manager/Kconfig (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/memory_group_manager/Makefile (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/memory_group_manager/build.bp (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/memory_group_manager/memory_group_manager.c (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/protected_memory_allocator/Kbuild (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/protected_memory_allocator/Kconfig (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/protected_memory_allocator/Makefile (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/protected_memory_allocator/build.bp (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/Kconfig create mode 100755 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kconfig create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Makefile rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/Makefile.kbase (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Mconfig create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h create mode 100755 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/build.bp create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h (100%) create mode 100755 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h create mode 100755 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mali_uk.h (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h create mode 100755 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig (100%) rename {dvalin => bifrost/r25p0}/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile (100%) create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c create mode 100644 bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h create mode 100644 bifrost/r25p0/kernel/include/linux/dma-buf-test-exporter.h create mode 100644 bifrost/r25p0/kernel/include/linux/memory_group_manager.h create mode 100644 bifrost/r25p0/kernel/include/linux/protected_memory_allocator.h create mode 100644 bifrost/r25p0/kernel/include/linux/protected_mode_switcher.h create mode 100644 bifrost/r25p0/kernel/license.txt create mode 100644 bifrost/r25p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch rename bifrost/{r25p0 => r32p1} (100%) create mode 100644 dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali create mode 100644 dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt create mode 100644 dvalin/kernel/drivers/base/arm/Kbuild create mode 100644 dvalin/kernel/drivers/base/arm/Kconfig create mode 100644 dvalin/kernel/drivers/base/arm/Makefile create mode 100644 dvalin/kernel/drivers/base/arm/Mconfig create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c create mode 100644 dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild create mode 100644 dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp create mode 100644 dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c create mode 100644 dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild create mode 100644 dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp create mode 100644 dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c create mode 100644 dvalin/kernel/drivers/gpu/arm/Makefile create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c create mode 100644 dvalin/kernel/include/linux/priority_control_manager.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h diff --git a/0001-update-r32p1-gpu-driver.patch b/0001-update-r32p1-gpu-driver.patch new file mode 100644 index 0000000..ca10415 --- /dev/null +++ b/0001-update-r32p1-gpu-driver.patch @@ -0,0 +1,94611 @@ +From 39f791b03c5e534884f13ecfb9f86306fe7324dd Mon Sep 17 00:00:00 2001 +From: binqi zhang +Date: Thu, 12 Aug 2021 16:34:29 +0800 +Subject: [PATCH] update r32p1 gpu driver + +Change-Id: Ic8672b57c42e98ed97c65b48925280fbb38de0ee +--- + .../ABI/testing/sysfs-device-mali | 293 + + .../devicetree/bindings/arm/mali-midgard.txt | 19 +- + .../bindings/arm/memory_group_manager.txt | 7 +- + .../bindings/arm/priority_control_manager.txt | 48 + + .../arm/protected_memory_allocator.txt | 7 +- + .../devicetree/bindings/power/mali-opp.txt | 9 +- + .../Documentation/dma-buf-test-exporter.txt | 10 +- + dvalin/kernel/Mconfig | 31 +- + dvalin/kernel/build.bp | 65 +- + dvalin/kernel/drivers/base/arm/Kbuild | 34 + + dvalin/kernel/drivers/base/arm/Kconfig | 64 + + dvalin/kernel/drivers/base/arm/Makefile | 98 + + dvalin/kernel/drivers/base/arm/Mconfig | 64 + + .../base/{ => arm}/dma_buf_lock/src/Kbuild | 9 +- + .../base/{ => arm}/dma_buf_lock/src/Makefile | 17 +- + .../{ => arm}/dma_buf_lock/src/dma_buf_lock.c | 132 +- + .../{ => arm}/dma_buf_lock/src/dma_buf_lock.h | 7 +- + .../base/arm/dma_buf_test_exporter/Kbuild | 23 + + .../base/arm/dma_buf_test_exporter/build.bp | 36 + + .../dma-buf-test-exporter.c | 106 +- + .../memory_group_manager}/Kbuild | 11 +- + .../base/arm/memory_group_manager/build.bp | 36 + + .../memory_group_manager.c | 14 +- + .../arm/protected_memory_allocator/Kbuild | 23 + + .../arm/protected_memory_allocator/build.bp | 36 + + .../protected_memory_allocator.c | 551 ++ + .../base/dma_buf_test_exporter/Kconfig | 26 - + .../base/dma_buf_test_exporter/Makefile | 36 - + .../base/dma_buf_test_exporter/build.bp | 26 - + .../base/memory_group_manager/Makefile | 35 - + .../base/memory_group_manager/build.bp | 22 - + .../base/protected_memory_allocator/Makefile | 35 - + .../base/protected_memory_allocator/build.bp | 26 - + .../protected_memory_allocator.c | 308 - + dvalin/kernel/drivers/gpu/arm/Kbuild | 8 +- + dvalin/kernel/drivers/gpu/arm/Kconfig | 8 +- + .../Kbuild => gpu/arm/Makefile} | 9 +- + dvalin/kernel/drivers/gpu/arm/midgard/Kbuild | 367 +- + dvalin/kernel/drivers/gpu/arm/midgard/Kconfig | 357 +- + .../kernel/drivers/gpu/arm/midgard/Makefile | 201 +- + dvalin/kernel/drivers/gpu/arm/midgard/Mconfig | 288 +- + .../drivers/gpu/arm/midgard/arbiter/Kbuild | 11 +- + .../arm/midgard/arbiter/mali_kbase_arbif.c | 209 +- + .../arm/midgard/arbiter/mali_kbase_arbif.h | 44 +- + .../midgard/arbiter/mali_kbase_arbiter_defs.h | 34 +- + .../arbiter/mali_kbase_arbiter_interface.h | 70 +- + .../midgard/arbiter/mali_kbase_arbiter_pm.c | 614 +- + .../midgard/arbiter/mali_kbase_arbiter_pm.h | 87 +- + .../gpu/arm/midgard/backend/gpu/Kbuild | 70 +- + .../backend/gpu/mali_kbase_backend_config.h | 7 +- + .../gpu/mali_kbase_cache_policy_backend.c | 9 +- + .../gpu/mali_kbase_cache_policy_backend.h | 10 +- + .../gpu/mali_kbase_clk_rate_trace_mgr.c | 325 ++ + .../gpu/mali_kbase_clk_rate_trace_mgr.h | 154 + + .../gpu/mali_kbase_debug_job_fault_backend.c | 11 +- + .../midgard/backend/gpu/mali_kbase_devfreq.c | 207 +- + .../midgard/backend/gpu/mali_kbase_devfreq.h | 23 +- + .../backend/gpu/mali_kbase_device_hw.c | 388 -- + .../backend/gpu/mali_kbase_device_internal.h | 127 - + .../backend/gpu/mali_kbase_gpuprops_backend.c | 75 +- + .../backend/gpu/mali_kbase_instr_backend.c | 130 +- + .../backend/gpu/mali_kbase_instr_defs.h | 18 +- + .../backend/gpu/mali_kbase_instr_internal.h | 9 +- + .../backend/gpu/mali_kbase_irq_internal.h | 7 +- + .../backend/gpu/mali_kbase_irq_linux.c | 37 +- + .../midgard/backend/gpu/mali_kbase_jm_as.c | 14 +- + .../midgard/backend/gpu/mali_kbase_jm_defs.h | 20 +- + .../midgard/backend/gpu/mali_kbase_jm_hw.c | 236 +- + .../backend/gpu/mali_kbase_jm_internal.h | 17 +- + .../midgard/backend/gpu/mali_kbase_jm_rb.c | 145 +- + .../midgard/backend/gpu/mali_kbase_jm_rb.h | 8 +- + .../backend/gpu/mali_kbase_js_backend.c | 61 +- + .../backend/gpu/mali_kbase_js_internal.h | 8 +- + .../backend/gpu/mali_kbase_l2_mmu_config.c | 47 +- + .../backend/gpu/mali_kbase_l2_mmu_config.h | 25 +- + .../backend/gpu/mali_kbase_pm_always_on.c | 13 +- + .../backend/gpu/mali_kbase_pm_always_on.h | 9 +- + .../backend/gpu/mali_kbase_pm_backend.c | 243 +- + .../midgard/backend/gpu/mali_kbase_pm_ca.c | 44 +- + .../midgard/backend/gpu/mali_kbase_pm_ca.h | 7 +- + .../backend/gpu/mali_kbase_pm_ca_devfreq.h | 7 +- + .../backend/gpu/mali_kbase_pm_coarse_demand.c | 13 +- + .../backend/gpu/mali_kbase_pm_coarse_demand.h | 9 +- + .../midgard/backend/gpu/mali_kbase_pm_defs.h | 244 +- + .../backend/gpu/mali_kbase_pm_driver.c | 907 ++- + .../backend/gpu/mali_kbase_pm_internal.h | 143 +- + .../backend/gpu/mali_kbase_pm_l2_states.h | 20 +- + .../backend/gpu/mali_kbase_pm_mcu_states.h | 63 + + .../backend/gpu/mali_kbase_pm_metrics.c | 271 +- + .../backend/gpu/mali_kbase_pm_policy.c | 204 +- + .../backend/gpu/mali_kbase_pm_policy.h | 7 +- + .../backend/gpu/mali_kbase_pm_shader_states.h | 44 +- + .../arm/midgard/backend/gpu/mali_kbase_time.c | 57 +- + .../kernel/drivers/gpu/arm/midgard/build.bp | 203 +- + .../arm/midgard/context/Kbuild} | 19 +- + .../context/backend/mali_kbase_context_csf.c | 201 + + .../context/backend/mali_kbase_context_jm.c | 138 +- + .../arm/midgard/context/mali_kbase_context.c | 205 +- + .../arm/midgard/context/mali_kbase_context.h | 35 +- + .../context/mali_kbase_context_internal.h | 18 +- + .../kernel/drivers/gpu/arm/midgard/csf/Kbuild | 47 + + .../ipa_control/Kbuild} | 11 +- + .../ipa_control/mali_kbase_csf_ipa_control.c | 925 +++ + .../ipa_control/mali_kbase_csf_ipa_control.h | 244 + + .../gpu/arm/midgard/csf/mali_kbase_csf.c | 3069 ++++++++++ + .../gpu/arm/midgard/csf/mali_kbase_csf.h | 564 ++ + .../csf/mali_kbase_csf_cpu_queue_debugfs.c | 191 + + .../csf/mali_kbase_csf_cpu_queue_debugfs.h | 90 + + .../midgard/csf/mali_kbase_csf_csg_debugfs.c | 591 ++ + .../midgard/csf/mali_kbase_csf_csg_debugfs.h | 47 + + .../gpu/arm/midgard/csf/mali_kbase_csf_defs.h | 1254 ++++ + .../arm/midgard/csf/mali_kbase_csf_firmware.c | 2337 ++++++++ + .../arm/midgard/csf/mali_kbase_csf_firmware.h | 811 +++ + .../midgard/csf/mali_kbase_csf_firmware_cfg.c | 327 ++ + .../midgard/csf/mali_kbase_csf_firmware_cfg.h | 74 + + .../csf/mali_kbase_csf_firmware_no_mali.c | 1389 +++++ + .../csf/mali_kbase_csf_heap_context_alloc.c | 195 + + .../csf/mali_kbase_csf_heap_context_alloc.h | 75 + + .../gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c | 2258 ++++++++ + .../gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h | 356 ++ + .../midgard/csf/mali_kbase_csf_kcpu_debugfs.c | 197 + + .../midgard/csf/mali_kbase_csf_kcpu_debugfs.h | 37 + + .../csf/mali_kbase_csf_protected_memory.c | 119 + + .../csf/mali_kbase_csf_protected_memory.h | 71 + + .../midgard/csf/mali_kbase_csf_reset_gpu.c | 629 ++ + .../midgard/csf/mali_kbase_csf_scheduler.c | 5063 +++++++++++++++++ + .../midgard/csf/mali_kbase_csf_scheduler.h | 494 ++ + .../midgard/csf/mali_kbase_csf_tiler_heap.c | 611 ++ + .../midgard/csf/mali_kbase_csf_tiler_heap.h | 115 + + .../csf/mali_kbase_csf_tiler_heap_debugfs.c | 106 + + .../csf/mali_kbase_csf_tiler_heap_debugfs.h | 37 + + .../csf/mali_kbase_csf_tiler_heap_def.h | 114 + + .../arm/midgard/csf/mali_kbase_csf_timeout.c | 178 + + .../arm/midgard/csf/mali_kbase_csf_timeout.h | 66 + + .../midgard/csf/mali_kbase_csf_tl_reader.c | 534 ++ + .../midgard/csf/mali_kbase_csf_tl_reader.h | 185 + + .../midgard/csf/mali_kbase_csf_trace_buffer.c | 688 +++ + .../midgard/csf/mali_kbase_csf_trace_buffer.h | 182 + + .../drivers/gpu/arm/midgard/debug/Kbuild | 27 + + .../mali_kbase_debug_ktrace_codes_csf.h | 278 + + .../mali_kbase_debug_ktrace_codes_jm.h | 10 +- + .../backend/mali_kbase_debug_ktrace_csf.c | 193 + + .../backend/mali_kbase_debug_ktrace_csf.h | 203 + + .../mali_kbase_debug_ktrace_defs_csf.h | 116 + + .../backend/mali_kbase_debug_ktrace_defs_jm.h | 100 +- + .../backend/mali_kbase_debug_ktrace_jm.c | 50 +- + .../backend/mali_kbase_debug_ktrace_jm.h | 118 +- + .../mali_kbase_debug_linux_ktrace_csf.h | 241 + + .../mali_kbase_debug_linux_ktrace_jm.h | 52 +- + .../midgard/debug/mali_kbase_debug_ktrace.c | 55 +- + .../midgard/debug/mali_kbase_debug_ktrace.h | 20 +- + .../debug/mali_kbase_debug_ktrace_codes.h | 24 +- + .../debug/mali_kbase_debug_ktrace_defs.h | 83 +- + .../debug/mali_kbase_debug_ktrace_internal.h | 7 +- + .../debug/mali_kbase_debug_linux_ktrace.h | 40 +- + .../{tests/kutf/Makefile => device/Kbuild} | 30 +- + .../device/backend/mali_kbase_device_csf.c | 464 ++ + .../device/backend/mali_kbase_device_hw_csf.c | 163 + + .../device/backend/mali_kbase_device_hw_jm.c | 98 + + .../device/backend/mali_kbase_device_jm.c | 181 +- + .../arm/midgard/device/mali_kbase_device.c | 233 +- + .../arm/midgard/device/mali_kbase_device.h | 126 +- + .../arm/midgard/device/mali_kbase_device_hw.c | 182 + + .../device/mali_kbase_device_internal.h | 24 +- + .../{tests/kutf/Kconfig => gpu/Kbuild} | 19 +- + .../gpu/backend/mali_kbase_gpu_fault_csf.c | 104 + + .../gpu/backend/mali_kbase_gpu_fault_jm.c | 13 +- + .../gpu/arm/midgard/gpu/mali_kbase_gpu.c | 8 +- + .../arm/midgard/gpu/mali_kbase_gpu_fault.h | 23 +- + .../arm/midgard/gpu/mali_kbase_gpu_regmap.h | 416 +- + .../kernel/drivers/gpu/arm/midgard/ipa/Kbuild | 25 +- + .../mali_kbase_ipa_counter_common_csf.c | 457 ++ + .../mali_kbase_ipa_counter_common_csf.h | 159 + + .../mali_kbase_ipa_counter_common_jm.c} | 20 +- + .../mali_kbase_ipa_counter_common_jm.h} | 40 +- + .../ipa/backend/mali_kbase_ipa_counter_csf.c | 171 + + .../mali_kbase_ipa_counter_jm.c} | 113 +- + .../gpu/arm/midgard/ipa/mali_kbase_ipa.c | 287 +- + .../gpu/arm/midgard/ipa/mali_kbase_ipa.h | 90 +- + .../arm/midgard/ipa/mali_kbase_ipa_debugfs.c | 12 +- + .../arm/midgard/ipa/mali_kbase_ipa_debugfs.h | 12 +- + .../arm/midgard/ipa/mali_kbase_ipa_simple.c | 53 +- + .../arm/midgard/ipa/mali_kbase_ipa_simple.h | 7 +- + .../gpu/arm/midgard/jm/mali_kbase_jm_defs.h | 76 +- + .../gpu/arm/midgard/jm/mali_kbase_jm_js.h | 89 +- + .../gpu/arm/midgard/jm/mali_kbase_js_defs.h | 411 +- + .../arm/midgard/mali_base_hwconfig_features.h | 62 +- + .../arm/midgard/mali_base_hwconfig_issues.h | 62 +- + .../drivers/gpu/arm/midgard/mali_kbase.h | 165 +- + .../arm/midgard/mali_kbase_as_fault_debugfs.c | 11 +- + .../arm/midgard/mali_kbase_as_fault_debugfs.h | 9 +- + .../drivers/gpu/arm/midgard/mali_kbase_bits.h | 16 +- + .../gpu/arm/midgard/mali_kbase_cache_policy.c | 12 +- + .../gpu/arm/midgard/mali_kbase_cache_policy.h | 11 +- + .../drivers/gpu/arm/midgard/mali_kbase_caps.h | 61 + + .../gpu/arm/midgard/mali_kbase_ccswe.c | 100 + + .../gpu/arm/midgard/mali_kbase_ccswe.h | 96 + + .../gpu/arm/midgard/mali_kbase_config.c | 68 +- + .../gpu/arm/midgard/mali_kbase_config.h | 294 +- + .../arm/midgard/mali_kbase_config_defaults.h | 26 +- + .../gpu/arm/midgard/mali_kbase_core_linux.c | 1969 +++++-- + .../arm/midgard/mali_kbase_cs_experimental.h | 20 +- + .../gpu/arm/midgard/mali_kbase_ctx_sched.c | 84 +- + .../gpu/arm/midgard/mali_kbase_ctx_sched.h | 44 +- + .../gpu/arm/midgard/mali_kbase_debug.c | 9 +- + .../gpu/arm/midgard/mali_kbase_debug.h | 79 +- + .../arm/midgard/mali_kbase_debug_job_fault.c | 36 +- + .../arm/midgard/mali_kbase_debug_job_fault.h | 10 +- + .../arm/midgard/mali_kbase_debug_mem_view.c | 26 +- + .../arm/midgard/mali_kbase_debug_mem_view.h | 7 +- + .../arm/midgard/mali_kbase_debugfs_helper.c | 104 +- + .../arm/midgard/mali_kbase_debugfs_helper.h | 53 +- + .../drivers/gpu/arm/midgard/mali_kbase_defs.h | 573 +- + .../arm/midgard/mali_kbase_disjoint_events.c | 7 +- + .../gpu/arm/midgard/mali_kbase_dma_fence.c | 59 +- + .../gpu/arm/midgard/mali_kbase_dma_fence.h | 26 +- + .../gpu/arm/midgard/mali_kbase_dummy_job_wa.c | 15 +- + .../gpu/arm/midgard/mali_kbase_dummy_job_wa.h | 36 +- + .../gpu/arm/midgard/mali_kbase_dvfs_debugfs.c | 98 + + .../gpu/arm/midgard/mali_kbase_dvfs_debugfs.h | 35 + + .../gpu/arm/midgard/mali_kbase_event.c | 30 +- + .../gpu/arm/midgard/mali_kbase_fence.c | 73 +- + .../gpu/arm/midgard/mali_kbase_fence.h | 17 +- + .../gpu/arm/midgard/mali_kbase_fence_defs.h | 15 +- + .../gpu/arm/midgard/mali_kbase_fence_ops.c | 83 + + .../gpu/arm/midgard/mali_kbase_gator.h | 7 +- + .../midgard/mali_kbase_gpu_memory_debugfs.c | 22 +- + .../midgard/mali_kbase_gpu_memory_debugfs.h | 26 +- + .../gpu/arm/midgard/mali_kbase_gpuprops.c | 315 +- + .../gpu/arm/midgard/mali_kbase_gpuprops.h | 67 +- + .../arm/midgard/mali_kbase_gpuprops_types.h | 89 +- + .../drivers/gpu/arm/midgard/mali_kbase_gwt.c | 11 +- + .../drivers/gpu/arm/midgard/mali_kbase_gwt.h | 9 +- + .../drivers/gpu/arm/midgard/mali_kbase_hw.c | 230 +- + .../drivers/gpu/arm/midgard/mali_kbase_hw.h | 23 +- + .../arm/midgard/mali_kbase_hwaccess_backend.h | 8 +- + .../arm/midgard/mali_kbase_hwaccess_defs.h | 15 +- + .../midgard/mali_kbase_hwaccess_gpuprops.h | 39 +- + .../arm/midgard/mali_kbase_hwaccess_instr.h | 22 +- + .../gpu/arm/midgard/mali_kbase_hwaccess_jm.h | 12 +- + .../gpu/arm/midgard/mali_kbase_hwaccess_pm.h | 75 +- + .../arm/midgard/mali_kbase_hwaccess_time.h | 30 +- + .../gpu/arm/midgard/mali_kbase_hwcnt.c | 103 +- + .../midgard/mali_kbase_hwcnt_accumulator.h | 7 +- + .../arm/midgard/mali_kbase_hwcnt_backend.h | 102 +- + .../midgard/mali_kbase_hwcnt_backend_csf.c | 1864 ++++++ + .../midgard/mali_kbase_hwcnt_backend_csf.h | 162 + + .../midgard/mali_kbase_hwcnt_backend_csf_if.h | 311 + + .../mali_kbase_hwcnt_backend_csf_if_fw.c | 787 +++ + .../mali_kbase_hwcnt_backend_csf_if_fw.h | 50 + + .../midgard/mali_kbase_hwcnt_backend_gpu.c | 510 -- + .../arm/midgard/mali_kbase_hwcnt_backend_jm.c | 793 +++ + ...nd_gpu.h => mali_kbase_hwcnt_backend_jm.h} | 23 +- + .../arm/midgard/mali_kbase_hwcnt_context.h | 46 +- + .../gpu/arm/midgard/mali_kbase_hwcnt_gpu.c | 760 +-- + .../gpu/arm/midgard/mali_kbase_hwcnt_gpu.h | 314 +- + .../gpu/arm/midgard/mali_kbase_hwcnt_legacy.c | 11 +- + .../gpu/arm/midgard/mali_kbase_hwcnt_legacy.h | 7 +- + .../gpu/arm/midgard/mali_kbase_hwcnt_types.c | 129 +- + .../gpu/arm/midgard/mali_kbase_hwcnt_types.h | 125 +- + .../midgard/mali_kbase_hwcnt_virtualizer.c | 26 +- + .../midgard/mali_kbase_hwcnt_virtualizer.h | 23 +- + .../drivers/gpu/arm/midgard/mali_kbase_jd.c | 331 +- + .../gpu/arm/midgard/mali_kbase_jd_debugfs.c | 56 +- + .../gpu/arm/midgard/mali_kbase_jd_debugfs.h | 12 +- + .../drivers/gpu/arm/midgard/mali_kbase_jm.c | 16 +- + .../drivers/gpu/arm/midgard/mali_kbase_jm.h | 12 +- + .../drivers/gpu/arm/midgard/mali_kbase_js.c | 422 +- + .../drivers/gpu/arm/midgard/mali_kbase_js.h | 12 +- + .../gpu/arm/midgard/mali_kbase_js_ctx_attr.c | 39 +- + .../gpu/arm/midgard/mali_kbase_js_ctx_attr.h | 47 +- + .../gpu/arm/midgard/mali_kbase_kinstr_jm.c | 894 +++ + .../gpu/arm/midgard/mali_kbase_kinstr_jm.h | 275 + + .../gpu/arm/midgard/mali_kbase_linux.h | 12 +- + .../drivers/gpu/arm/midgard/mali_kbase_mem.c | 1160 ++-- + .../drivers/gpu/arm/midgard/mali_kbase_mem.h | 518 +- + .../gpu/arm/midgard/mali_kbase_mem_linux.c | 683 ++- + .../gpu/arm/midgard/mali_kbase_mem_linux.h | 36 +- + .../gpu/arm/midgard/mali_kbase_mem_lowlevel.h | 13 +- + .../gpu/arm/midgard/mali_kbase_mem_pool.c | 44 +- + .../arm/midgard/mali_kbase_mem_pool_debugfs.c | 7 +- + .../arm/midgard/mali_kbase_mem_pool_debugfs.h | 7 +- + .../arm/midgard/mali_kbase_mem_pool_group.c | 7 +- + .../arm/midgard/mali_kbase_mem_pool_group.h | 7 +- + .../midgard/mali_kbase_mem_profile_debugfs.c | 26 +- + .../midgard/mali_kbase_mem_profile_debugfs.h | 23 +- + .../mali_kbase_mem_profile_debugfs_buf_size.h | 8 +- + .../arm/midgard/mali_kbase_mipe_gen_header.h | 40 +- + .../gpu/arm/midgard/mali_kbase_mipe_proto.h | 7 +- + .../gpu/arm/midgard/mali_kbase_native_mgm.c | 7 +- + .../gpu/arm/midgard/mali_kbase_native_mgm.h | 9 +- + .../arm/midgard/mali_kbase_platform_fake.c | 15 +- + .../drivers/gpu/arm/midgard/mali_kbase_pm.c | 55 +- + .../drivers/gpu/arm/midgard/mali_kbase_pm.h | 34 +- + .../midgard/mali_kbase_regs_history_debugfs.c | 138 +- + .../midgard/mali_kbase_regs_history_debugfs.h | 40 +- + .../gpu/arm/midgard/mali_kbase_reset_gpu.h | 162 +- + .../drivers/gpu/arm/midgard/mali_kbase_smc.c | 9 +- + .../drivers/gpu/arm/midgard/mali_kbase_smc.h | 11 +- + .../gpu/arm/midgard/mali_kbase_softjobs.c | 167 +- + .../gpu/arm/midgard/mali_kbase_strings.c | 10 +- + .../gpu/arm/midgard/mali_kbase_strings.h | 7 +- + .../drivers/gpu/arm/midgard/mali_kbase_sync.h | 28 +- + .../gpu/arm/midgard/mali_kbase_sync_android.c | 67 +- + .../gpu/arm/midgard/mali_kbase_sync_common.c | 11 +- + .../gpu/arm/midgard/mali_kbase_sync_file.c | 36 +- + .../arm/midgard/mali_kbase_trace_gpu_mem.c | 221 + + .../arm/midgard/mali_kbase_trace_gpu_mem.h | 100 + + .../gpu/arm/midgard/mali_kbase_utility.h | 11 +- + .../gpu/arm/midgard/mali_kbase_vinstr.c | 311 +- + .../gpu/arm/midgard/mali_kbase_vinstr.h | 7 +- + .../gpu/arm/midgard/mali_linux_trace.h | 47 +- + .../drivers/gpu/arm/midgard/mali_malisw.h | 60 +- + ...gpu.h => mali_power_gpu_frequency_trace.c} | 21 +- + .../midgard/mali_power_gpu_frequency_trace.h | 68 + + .../Kconfig => gpu/arm/midgard/mmu/Kbuild} | 22 +- + .../midgard/mmu/backend/mali_kbase_mmu_csf.c | 565 ++ + .../midgard/mmu/backend/mali_kbase_mmu_jm.c | 90 +- + .../gpu/arm/midgard/mmu/mali_kbase_mmu.c | 220 +- + .../gpu/arm/midgard/mmu/mali_kbase_mmu.h | 45 +- + .../gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h | 12 +- + .../midgard/mmu/mali_kbase_mmu_hw_direct.c | 67 +- + .../arm/midgard/mmu/mali_kbase_mmu_internal.h | 49 +- + .../midgard/mmu/mali_kbase_mmu_mode_aarch64.c | 25 +- + .../midgard/mmu/mali_kbase_mmu_mode_lpae.c | 215 - + .../drivers/gpu/arm/midgard/platform/Kconfig | 10 +- + .../arm/midgard/platform/devicetree/Kbuild | 7 +- + .../midgard/platform/devicetree/Kbuild.rej | 17 + + .../devicetree/mali_kbase_clk_rate_trace.c | 105 + + .../devicetree/mali_kbase_config_devicetree.c | 39 +- + .../devicetree/mali_kbase_config_platform.c | 43 + + .../devicetree/mali_kbase_config_platform.h | 54 +- + .../mali_kbase_config_platform.h.rej | 42 + + .../devicetree/mali_kbase_runtime_pm.c | 26 +- + .../gpu/arm/midgard/platform/vexpress/Kbuild | 11 +- + .../vexpress/mali_kbase_config_platform.h | 7 +- + .../vexpress/mali_kbase_config_vexpress.c | 22 +- + .../midgard/platform/vexpress_1xv7_a57/Kbuild | 11 +- + .../mali_kbase_config_platform.h | 7 +- + .../mali_kbase_config_vexpress.c | 20 +- + .../platform/vexpress_6xvirtex7_10mhz/Kbuild | 13 +- + .../mali_kbase_config_platform.h | 7 +- + .../mali_kbase_config_vexpress.c | 22 +- + .../gpu/arm/midgard/protected_mode_switcher.h | 31 +- + .../drivers/gpu/arm/midgard/tests/Kbuild | 17 +- + .../drivers/gpu/arm/midgard/tests/Kconfig | 46 +- + .../drivers/gpu/arm/midgard/tests/Mconfig | 81 +- + .../drivers/gpu/arm/midgard/tests/build.bp | 40 + + .../midgard/tests/include/kutf/kutf_helpers.h | 15 +- + .../tests/include/kutf/kutf_helpers_user.h | 25 +- + .../arm/midgard/tests/include/kutf/kutf_mem.h | 7 +- + .../tests/include/kutf/kutf_resultset.h | 7 +- + .../midgard/tests/include/kutf/kutf_suite.h | 29 +- + .../midgard/tests/include/kutf/kutf_utils.h | 7 +- + .../drivers/gpu/arm/midgard/tests/kutf/Kbuild | 21 +- + .../gpu/arm/midgard/tests/kutf/build.bp | 24 +- + .../gpu/arm/midgard/tests/kutf/kutf_helpers.c | 13 +- + .../midgard/tests/kutf/kutf_helpers_user.c | 28 +- + .../gpu/arm/midgard/tests/kutf/kutf_mem.c | 7 +- + .../arm/midgard/tests/kutf/kutf_resultset.c | 7 +- + .../gpu/arm/midgard/tests/kutf/kutf_suite.c | 23 +- + .../gpu/arm/midgard/tests/kutf/kutf_utils.c | 7 +- + .../mali_kutf_clk_rate_trace/kernel/Kbuild | 25 + + .../mali_kutf_clk_rate_trace/kernel/build.bp | 43 + + .../kernel/mali_kutf_clk_rate_trace_test.c | 957 ++++ + .../mali_kutf_clk_rate_trace_test.h | 151 + + .../midgard/tests/mali_kutf_irq_test/Kbuild | 13 +- + .../midgard/tests/mali_kutf_irq_test/Kconfig | 29 - + .../midgard/tests/mali_kutf_irq_test/Makefile | 51 - + .../midgard/tests/mali_kutf_irq_test/build.bp | 21 +- + .../mali_kutf_irq_test_main.c | 15 +- + .../arm/midgard/thirdparty}/Kbuild | 9 +- + .../arm/midgard/thirdparty/mali_kbase_mmap.c | 77 +- + .../kernel/drivers/gpu/arm/midgard/tl/Kbuild | 32 + + .../tl/backend/mali_kbase_timeline_csf.c | 171 + + .../tl/backend/mali_kbase_timeline_jm.c | 23 +- + .../gpu/arm/midgard/tl/mali_kbase_timeline.c | 142 +- + .../gpu/arm/midgard/tl/mali_kbase_timeline.h | 57 +- + .../arm/midgard/tl/mali_kbase_timeline_io.c | 177 +- + .../arm/midgard/tl/mali_kbase_timeline_priv.h | 26 +- + .../arm/midgard/tl/mali_kbase_tl_serialize.h | 7 +- + .../gpu/arm/midgard/tl/mali_kbase_tlstream.c | 43 +- + .../gpu/arm/midgard/tl/mali_kbase_tlstream.h | 22 +- + .../arm/midgard/tl/mali_kbase_tracepoints.c | 585 +- + .../arm/midgard/tl/mali_kbase_tracepoints.h | 1159 +++- + .../include/linux/dma-buf-test-exporter.h | 8 +- + .../include/linux/memory_group_manager.h | 7 +- + .../include/linux/priority_control_manager.h | 77 + + .../linux/protected_memory_allocator.h | 7 +- + .../include/linux/protected_mode_switcher.h | 7 +- + .../arm/midgard/csf/mali_base_csf_kernel.h | 765 +++ + .../csf/mali_gpu_csf_control_registers.h | 32 + + .../arm/midgard/csf/mali_gpu_csf_registers.h | 1488 +++++ + .../arm/midgard/csf/mali_kbase_csf_ioctl.h | 433 ++ + .../gpu/backend/mali_kbase_gpu_regmap_csf.h | 335 ++ + .../gpu/backend/mali_kbase_gpu_regmap_jm.h | 47 +- + .../midgard/gpu/mali_kbase_gpu_coherency.h | 13 +- + .../gpu/arm/midgard/gpu/mali_kbase_gpu_id.h | 64 +- + .../arm/midgard/gpu/mali_kbase_gpu_regmap.h | 434 ++ + .../gpu/arm/midgard/jm/mali_base_jm_kernel.h | 285 +- + .../gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h | 107 +- + .../uapi}/gpu/arm/midgard/mali_base_kernel.h | 461 +- + .../gpu/arm/midgard/mali_base_mem_priv.h | 23 +- + .../gpu/arm/midgard/mali_kbase_hwcnt_reader.h | 76 +- + .../uapi}/gpu/arm/midgard/mali_kbase_ioctl.h | 212 +- + .../arm/midgard/mali_kbase_kinstr_jm_reader.h | 69 + + .../uapi}/gpu/arm/midgard/mali_uk.h | 46 +- + 407 files changed, 58537 insertions(+), 10409 deletions(-) + create mode 100644 dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali + create mode 100644 dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt + create mode 100644 dvalin/kernel/drivers/base/arm/Kbuild + create mode 100644 dvalin/kernel/drivers/base/arm/Kconfig + create mode 100644 dvalin/kernel/drivers/base/arm/Makefile + create mode 100644 dvalin/kernel/drivers/base/arm/Mconfig + rename dvalin/kernel/drivers/base/{ => arm}/dma_buf_lock/src/Kbuild (78%) + rename dvalin/kernel/drivers/base/{ => arm}/dma_buf_lock/src/Makefile (71%) + rename dvalin/kernel/drivers/base/{ => arm}/dma_buf_lock/src/dma_buf_lock.c (90%) + rename dvalin/kernel/drivers/base/{ => arm}/dma_buf_lock/src/dma_buf_lock.h (88%) + create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild + create mode 100644 dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp + rename dvalin/kernel/drivers/base/{ => arm}/dma_buf_test_exporter/dma-buf-test-exporter.c (89%) + rename dvalin/kernel/drivers/base/{dma_buf_test_exporter => arm/memory_group_manager}/Kbuild (74%) + create mode 100644 dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp + rename dvalin/kernel/drivers/base/{ => arm}/memory_group_manager/memory_group_manager.c (98%) + create mode 100644 dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild + create mode 100644 dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp + create mode 100644 dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c + delete mode 100644 dvalin/kernel/drivers/base/dma_buf_test_exporter/Kconfig + delete mode 100644 dvalin/kernel/drivers/base/dma_buf_test_exporter/Makefile + delete mode 100644 dvalin/kernel/drivers/base/dma_buf_test_exporter/build.bp + delete mode 100644 dvalin/kernel/drivers/base/memory_group_manager/Makefile + delete mode 100644 dvalin/kernel/drivers/base/memory_group_manager/build.bp + delete mode 100644 dvalin/kernel/drivers/base/protected_memory_allocator/Makefile + delete mode 100644 dvalin/kernel/drivers/base/protected_memory_allocator/build.bp + delete mode 100644 dvalin/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c + rename dvalin/kernel/drivers/{base/protected_memory_allocator/Kbuild => gpu/arm/Makefile} (77%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h + delete mode 100755 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c + delete mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h + rename dvalin/kernel/drivers/{base/memory_group_manager/Kconfig => gpu/arm/midgard/context/Kbuild} (63%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild + rename dvalin/kernel/drivers/gpu/arm/midgard/{Makefile.kbase => csf/ipa_control/Kbuild} (75%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h + rename dvalin/kernel/drivers/gpu/arm/midgard/{tests/kutf/Makefile => device/Kbuild} (56%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c + rename dvalin/kernel/drivers/gpu/arm/midgard/{tests/kutf/Kconfig => gpu/Kbuild} (65%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h + rename dvalin/kernel/drivers/gpu/arm/midgard/ipa/{mali_kbase_ipa_vinstr_common.c => backend/mali_kbase_ipa_counter_common_jm.c} (95%) + rename dvalin/kernel/drivers/gpu/arm/midgard/ipa/{mali_kbase_ipa_vinstr_common.h => backend/mali_kbase_ipa_counter_common_jm.h} (85%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c + rename dvalin/kernel/drivers/gpu/arm/midgard/ipa/{mali_kbase_ipa_vinstr_g7x.c => backend/mali_kbase_ipa_counter_jm.c} (83%) + mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h + delete mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c + rename dvalin/kernel/drivers/gpu/arm/midgard/{mali_kbase_hwcnt_backend_gpu.h => mali_kbase_hwcnt_backend_jm.h} (75%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h + rename dvalin/kernel/drivers/gpu/arm/midgard/{gpu/mali_kbase_gpu.h => mali_power_gpu_frequency_trace.c} (67%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h + rename dvalin/kernel/drivers/{base/protected_memory_allocator/Kconfig => gpu/arm/midgard/mmu/Kbuild} (61%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c + delete mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c + mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild.rej + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c + mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c + mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h.rej + mode change 100644 => 100755 dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h + delete mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig + delete mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile + rename dvalin/kernel/drivers/{base/memory_group_manager => gpu/arm/midgard/thirdparty}/Kbuild (82%) + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild + create mode 100644 dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c + create mode 100644 dvalin/kernel/include/linux/priority_control_manager.h + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h (89%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h (75%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h (74%) + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/jm/mali_base_jm_kernel.h (75%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h (57%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/mali_base_kernel.h (72%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/mali_base_mem_priv.h (80%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/mali_kbase_hwcnt_reader.h (52%) + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/mali_kbase_ioctl.h (83%) + create mode 100644 dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h + rename dvalin/kernel/{drivers => include/uapi}/gpu/arm/midgard/mali_uk.h (69%) + +diff --git a/dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali b/dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali +new file mode 100644 +index 0000000..99f8ae5 +--- /dev/null ++++ b/dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali +@@ -0,0 +1,293 @@ ++/* ++ * ++ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation) and any use by you of this program is subject to the terms ++ * of such GNU licence. ++ * ++ * A copy of the licence is included with the program) and can also be obtained ++ * from Free Software Foundation) Inc.) 51 Franklin Street) Fifth Floor) ++ * Boston) MA 02110-1301) USA. ++ * ++ */ ++ ++What: /sys/class/misc/mali%u/device/core_mask ++Description: ++ This attribute is used to restrict the number of shader cores ++ available in this instance, is useful for debugging purposes. ++ Reading this attribute provides us mask of all cores available. ++ Writing to it will set the current core mask. Doesn't ++ allow disabling all the cores present in this instance. ++ ++What: /sys/class/misc/mali%u/device/debug_command ++Description: ++ This attribute is used to issue debug commands that supported ++ by the driver. On reading it provides the list of debug commands ++ that are supported, and writing back one of those commands will ++ enable that debug option. ++ ++What: /sys/class/misc/mali%u/device/dvfs_period ++Description: ++ This is used to set the DVFS sampling period to be used by the ++ driver, On reading it provides the current DVFS sampling period, ++ on writing a value we set the DVFS sampling period. ++ ++What: /sys/class/misc/mali%u/device/dummy_job_wa_info ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU that requires a GPU workaround ++ to execute the dummy fragment job on all shader cores to ++ workaround a hang issue. ++ ++ Its a readonly attribute and on reading gives details on the ++ options used with the dummy workaround. ++ ++What: /sys/class/misc/mali%u/device/fw_timeout ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. This attribute is ++ used to set the duration value in milliseconds for the ++ waiting timeout used for a GPU status change request being ++ acknowledged by the FW. ++ ++What: /sys/class/misc/mali%u/device/gpuinfo ++Description: ++ This attribute provides description of the present Mali GPU. ++ Its a read only attribute provides details like GPU family, the ++ number of cores, the hardware version and the raw product id. ++ ++What: /sys/class/misc/mali%u/device/idle_hysteresis_time ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. This attribute is ++ used to set the duration value in milliseconds for the ++ configuring hysteresis field for determining GPU idle detection. ++ ++What: /sys/class/misc/mali%u/device/js_ctx_scheduling_mode ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU. This attribute is used to set ++ context scheduling priority for a job slot. ++ ++ On Reading it provides the currently set job slot context ++ priority. ++ ++ Writing 0 to this attribute sets it to the mode were ++ higher priority atoms will be scheduled first, regardless of ++ the context they belong to. Newly-runnable higher priority atoms ++ can preempt lower priority atoms currently running on the GPU, ++ even if they belong to a different context. ++ ++ Writing 1 to this attribute set it to the mode were the ++ highest-priority atom will be chosen from each context in turn ++ using a round-robin algorithm, so priority only has an effect ++ within the context an atom belongs to. Newly-runnable higher ++ priority atoms can preempt the lower priority atoms currently ++ running on the GPU, but only if they belong to the same context. ++ ++What: /sys/class/misc/mali%u/device/js_scheduling_period ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU. Used to set the job scheduler ++ tick period in nano-seconds. The Job Scheduler determines the ++ jobs that are run on the GPU, and for how long, Job Scheduler ++ makes decisions at a regular time interval determined by value ++ in js_scheduling_period. ++ ++What: /sys/class/misc/mali%u/device/js_softstop_always ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU. Soft-stops are disabled when ++ only a single context is present, this attribute is used to ++ enable soft-stop when only a single context is present can be ++ used for debug and unit-testing purposes. ++ ++What: /sys/class/misc/mali%u/device/js_timeouts ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU. It used to set the soft stop ++ and hard stop times for the job scheduler. ++ ++ Writing value 0 causes no change, or -1 to restore the ++ default timeout. ++ ++ The format used to set js_timeouts is ++ " ++ ++ " ++ ++ ++What: /sys/class/misc/mali%u/device/lp_mem_pool_max_size ++Description: ++ This attribute is used to set the maximum number of large pages ++ memory pools that the driver can contain. Large pages are of ++ size 2MB. On read it displays all the max size of all memory ++ pools and can be used to modify each individual pools as well. ++ ++What: /sys/class/misc/mali%u/device/lp_mem_pool_size ++Description: ++ This attribute is used to set the number of large memory pages ++ which should be populated, changing this value may cause ++ existing pages to be removed from the pool, or new pages to be ++ created and then added to the pool. On read it will provide ++ pool size for all available pools and we can modify individual ++ pool. ++ ++What: /sys/class/misc/mali%u/device/mem_pool_max_size ++Description: ++ This attribute is used to set the maximum number of small pages ++ for memory pools that the driver can contain. Here small pages ++ are of size 4KB. On read it will display the max size for all ++ available pools and allows us to set max size of ++ individual pools. ++ ++What: /sys/class/misc/mali%u/device/mem_pool_size ++Description: ++ This attribute is used to set the number of small memory pages ++ which should be populated, changing this value may cause ++ existing pages to be removed from the pool, or new pages to ++ be created and then added to the pool. On read it will provide ++ pool size for all available pools and we can modify individual ++ pool. ++ ++What: /sys/class/misc/mali%u/device/device/mempool/ctx_default_max_size ++Description: ++ This attribute is used to set maximum memory pool size for ++ all the memory pool so that the maximum amount of free memory ++ that each pool can hold is identical. ++ ++What: /sys/class/misc/mali%u/device/device/mempool/lp_max_size ++Description: ++ This attribute is used to set the maximum number of large pages ++ for all memory pools that the driver can contain. ++ Large pages are of size 2MB. ++ ++What: /sys/class/misc/mali%u/device/device/mempool/max_size ++Description: ++ This attribute is used to set the maximum number of small pages ++ for all the memory pools that the driver can contain. ++ Here small pages are of size 4KB. ++ ++What: /sys/class/misc/mali%u/device/pm_poweroff ++Description: ++ This attribute contains the current values, represented as the ++ following space-separated integers: ++ • PM_GPU_POWEROFF_TICK_NS. ++ • PM_POWEROFF_TICK_SHADER. ++ • PM_POWEROFF_TICK_GPU. ++ ++ Example: ++ echo 100000 4 4 > /sys/class/misc/mali0/device/pm_poweroff ++ ++ Sets the following new values: 100,000ns tick, four ticks ++ for shader power down, and four ticks for GPU power down. ++ ++What: /sys/class/misc/mali%u/device/power_policy ++Description: ++ This attribute is used to find the current power policy been ++ used, reading will list the power policies available and ++ enclosed in square bracket is the current one been selected. ++ ++ Example: ++ cat /sys/class/misc/mali0/device/power_policy ++ [demand] coarse_demand always_on ++ ++ To switch to a different policy at runtime write the valid entry ++ name back to the attribute. ++ ++ Example: ++ echo "coarse_demand" > /sys/class/misc/mali0/device/power_policy ++ ++What: /sys/class/misc/mali%u/device/progress_timeout ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. This attribute ++ is used to set the progress timeout value and read the current ++ progress timeout value. ++ ++ Progress timeout value is the maximum number of GPU cycles ++ without forward progress to allow to elapse before terminating a ++ GPU command queue group. ++ ++What: /sys/class/misc/mali%u/device/reset_timeout ++Description: ++ This attribute is used to set the number of milliseconds to ++ wait for the soft stop to complete for the GPU jobs before ++ proceeding with the GPU reset. ++ ++What: /sys/class/misc/mali%u/device/soft_job_timeout ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU. It used to set the timeout ++ value for waiting for any soft event to complete. ++ ++What: /sys/class/misc/mali%u/device/scheduling/serialize_jobs ++Description: ++ This attribute is available only with platform device that ++ supports a Job Manager based GPU. ++ ++ Various options available under this are: ++ • none - for disabling serialization. ++ • intra-slot - Serialize atoms within a slot, only one ++ atom per job slot. ++ • inter-slot - Serialize atoms between slots, only one ++ job slot running at any time. ++ • full - it a combination of both inter and intra slot, ++ so only one atom and one job slot running ++ at any time. ++ • full-reset - full serialization and Reset the GPU after ++ each atom completion ++ ++ These options are useful for debugging and investigating ++ failures and gpu hangs to narrow down atoms that could cause ++ troubles. ++ ++What: /sys/class/misc/mali%u/device/firmware_config/Compute iterator count/* ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. Its a read-only attribute ++ which indicates the maximum number of Compute iterators ++ supported by the GPU. ++ ++What: /sys/class/misc/mali%u/device/firmware_config/CSHWIF count/* ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. Its a read-only ++ attribute which indicates the maximum number of CSHWIFs ++ supported by the GPU. ++ ++What: /sys/class/misc/mali%u/device/firmware_config/Fragment iterator count/* ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. Its a read-only ++ attribute which indicates the maximum number of ++ Fragment iterators supported by the GPU. ++ ++What: /sys/class/misc/mali%u/device/firmware_config/Scoreboard set count/* ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. Its a read-only ++ attribute which indicates the maximum number of ++ Scoreboard set supported by the GPU. ++ ++What: /sys/class/misc/mali%u/device/firmware_config/Tiler iterator count/* ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. Its a read-only ++ attribute which indicates the maximum number of Tiler iterators ++ supported by the GPU. ++ ++What: /sys/class/misc/mali%u/device/firmware_config/Log verbosity/* ++Description: ++ This attribute is available only with mali platform ++ device-driver that supports a CSF GPU. ++ ++ Used to enable firmware logs, logging levels valid values ++ are indicated using 'min and 'max' attribute values ++ values that are read-only. ++ ++ Log level can be set using the 'cur' read, write attribute, ++ we can use a valid log level value from min and max range values ++ and set a valid desired log level for firmware logs. +diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt +index dd8f733..a74d569 100644 +--- a/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt ++++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + * ARM Mali Midgard / Bifrost devices +@@ -46,12 +45,12 @@ Documentation/devicetree/bindings/regulator/regulator.txt for details. + This is optional. + - operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt + for details. +-- quirks_jm : Used to write to the JM_CONFIG register or equivalent. ++- quirks_gpu : Used to write to the JM_CONFIG or CSF_CONFIG register. + Should be used with care. Options passed here are used to override + certain default behavior. Note: This will override 'idvs-group-size' + field in devicetree and module param 'corestack_driver_control', +- therefore if 'quirks_jm' is used then 'idvs-group-size' and +- 'corestack_driver_control' value should be incorporated into 'quirks_jm'. ++ therefore if 'quirks_gpu' is used then 'idvs-group-size' and ++ 'corestack_driver_control' value should be incorporated into 'quirks_gpu'. + - quirks_sc : Used to write to the SHADER_CONFIG register. + Should be used with care. Options passed here are used to override + certain default behavior. +@@ -64,8 +63,8 @@ for details. + - power_model : Sets the power model parameters. Defined power models include: + "mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model", + "mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model", +- "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model" +- and "mali-tbex-power-model". ++ "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model", ++ "mali-tbex-power-model" and "mali-tbax-power-model". + - mali-simple-power-model: this model derives the GPU power usage based + on the GPU voltage scaled by the system temperature. Note: it was + designed for the Juno platform, and may not be suitable for others. +@@ -98,6 +97,8 @@ for details. + are used at different points so care should be taken to configure + both power models in the device tree (specifically dynamic-coefficient, + static-coefficient and scale) to best match the platform. ++- power_policy : Sets the GPU power policy at probe time. Available options are ++ "coarse_demand" and "always_on". If not set, then "coarse_demand" is used. + - system-coherency : Sets the coherency protocol to be used for coherent + accesses made from the GPU. + If not set then no coherency is used. +diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt +index fda8f00..634973f 100644 +--- a/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt ++++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + * Arm memory group manager for Mali GPU device drivers +diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt +new file mode 100644 +index 0000000..c7dd14f +--- /dev/null ++++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt +@@ -0,0 +1,48 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++* Arm priority control manager for Mali GPU device drivers ++ ++Required properties: ++ ++- compatible: Must be "arm,priority-control-manager" ++ ++An example node: ++ ++ gpu_priority_control_manager: priority-control-manager { ++ compatible = "arm,priority-control-manager"; ++ }; ++ ++It must be referenced by the GPU as well, see priority-control-manager: ++ ++ gpu: gpu@0x6e000000 { ++ compatible = "arm,mali-midgard"; ++ reg = <0x0 0x6e000000 0x0 0x200000>; ++ interrupts = <0 168 4>, <0 168 4>, <0 168 4>; ++ interrupt-names = "JOB", "MMU", "GPU"; ++ clocks = <&scpi_dvfs 2>; ++ clock-names = "clk_mali"; ++ system-coherency = <31>; ++ priority-control-manager = <&gpu_priority_control_manager>; ++ operating-points = < ++ /* KHz uV */ ++ 50000 820000 ++ >; ++ }; +diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt +index f054348..89a3cc7 100644 +--- a/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt ++++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + * Arm protected memory allocator for Mali GPU device drivers +diff --git a/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt +index 49ed773..b9c0743 100644 +--- a/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt ++++ b/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2017, 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2017, 2019-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + * ARM Mali Midgard OPP +@@ -54,7 +53,7 @@ Optional properties: + + - opp-core-count: Number of cores to use for this OPP. If this is present then + the driver will build a core mask using the available core mask provided by +- the GPU hardware. ++ the GPU hardware. An opp-core-count value of 0 is not permitted. + + If neither this nor opp-core-mask are present then all shader cores will be + used for this OPP. +diff --git a/dvalin/kernel/Documentation/dma-buf-test-exporter.txt b/dvalin/kernel/Documentation/dma-buf-test-exporter.txt +index 8d8cbc9..b01020c 100644 +--- a/dvalin/kernel/Documentation/dma-buf-test-exporter.txt ++++ b/dvalin/kernel/Documentation/dma-buf-test-exporter.txt +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2013, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,7 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- + + ===================== + dma-buf-test-exporter +@@ -42,5 +40,3 @@ It supports being compiled as a module both in-tree and out-of-tree. + + See include/linux/dma-buf-test-exporter.h for the ioctl interface. + See Documentation/dma-buf-sharing.txt for details on dma_buf. +- +- +diff --git a/dvalin/kernel/Mconfig b/dvalin/kernel/Mconfig +index e451591..217715c 100644 +--- a/dvalin/kernel/Mconfig ++++ b/dvalin/kernel/Mconfig +@@ -1,27 +1,26 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# +-# (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. + # +-# A copy of the licence is included with the program, and can also be obtained +-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +-# Boston, MA 02110-1301, USA. ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. + # + # + +-source "kernel/drivers/gpu/arm/midgard/Mconfig" +-source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig" ++menu "Kernel menu" + +-config DMA_BUF_SYNC_IOCTL_SUPPORTED +- bool "Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" +- depends on BACKEND_KERNEL +- default y ++source "kernel/drivers/base/arm/Mconfig" ++source "kernel/drivers/gpu/arm/midgard/Mconfig" + +-config BUILD_CSF_ONLY_MODULE +- bool "Build CSF GPU specific kernel modules" +- depends on BUILD_KERNEL_MODULES && GPU_HAS_CSF +- default y ++endmenu +diff --git a/dvalin/kernel/build.bp b/dvalin/kernel/build.bp +index 2bc725f..c97da2c 100644 +--- a/dvalin/kernel/build.bp ++++ b/dvalin/kernel/build.bp +@@ -1,15 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +@@ -25,6 +31,7 @@ bob_install_group { + + bob_defaults { + name: "kernel_defaults", ++ build_by_default: false, + enabled: false, + exclude_srcs: [ + "**/*.mod.c", +@@ -33,6 +40,7 @@ bob_defaults { + "include", + ], + build_kernel_modules: { ++ build_by_default: true, + enabled: true, + kernel_dir: "{{.kernel_dir}}", + kernel_cross_compile: "{{.kernel_compiler}}", +@@ -42,6 +50,8 @@ bob_defaults { + kernel_ld: "{{.kernel_ld}}", + }, + install_group: "IG_kernel_modules", ++ add_to_alias: ["kernel"], ++ owner: "{{.android_module_owner}}", + cflags: [ + "-Wall", + ], +@@ -54,49 +64,12 @@ bob_defaults { + "optional", + ], + }, +- kbuild_options: [ +- // Start of CS experimental features definitions. +- // If there is nothing below, definition should be added as follows: +- // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" +- // experimental_feature above comes from Mconfig in +- // /product/base/ +- // However, in Mconfig, experimental_feature should be looked up (for +- // similar explanation to this one) as ALLCAPS, i.e. +- // EXPERIMENTAL_FEATURE. +- // +- // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it +- // is an umbrella feature that would be open for inappropriate use +- // (catch-all for experimental CS code without separating it into +- // different features). +- "MALI_JIT_PRESSURE_LIMIT={{.jit_pressure_limit}}", +- "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}", +- ], +-} +- +-bob_defaults { +- name: "kutf_includes", +- local_include_dirs: [ +- "drivers/gpu/arm/midgard/tests/include", +- ], +-} +- +-bob_defaults { +- name: "kernel_test_includes", +- defaults: ["kutf_includes"], +- local_include_dirs: [ +- "drivers/gpu/arm", +- "drivers/gpu/arm/midgard", +- "drivers/gpu/arm/midgard/backend/gpu", +- "drivers/gpu/arm/midgard/debug", +- "drivers/gpu/arm/midgard/debug/backend", +- ], + } + ++// Alias for all kernel modules. `kernel_defaults` uses `add_to_alias` to ++// ensure any modules using that are included in this alias; any ++// bob_kernel_modules not using those defaults must explicitly use ++// `add_to_alias` or be listed here. + bob_alias { + name: "kernel", +- srcs: [ +- "dma-buf-test-exporter", +- "memory_group_manager", +- "mali_kbase", +- ], + } +diff --git a/dvalin/kernel/drivers/base/arm/Kbuild b/dvalin/kernel/drivers/base/arm/Kbuild +new file mode 100644 +index 0000000..b0fbf93 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/Kbuild +@@ -0,0 +1,34 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++# ++# ccflags ++# ++ccflags-y += -I$(src)/../../../include ++ ++subdir-ccflags-y += $(ccflags-y) ++ ++# ++# Kernel modules ++# ++obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma_buf_test_exporter/ ++obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) += memory_group_manager/ ++obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) += protected_memory_allocator/ ++ +diff --git a/dvalin/kernel/drivers/base/arm/Kconfig b/dvalin/kernel/drivers/base/arm/Kconfig +new file mode 100644 +index 0000000..75d5434 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/Kconfig +@@ -0,0 +1,64 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++menuconfig MALI_BASE_MODULES ++ bool "Mali Base extra modules" ++ default n ++ help ++ Enable this option to build support for a Arm Mali base modules. ++ Those modules provide extra features or debug interfaces and, ++ are optional for the use of the Mali GPU modules. ++ ++config DMA_SHARED_BUFFER_TEST_EXPORTER ++ bool "Build dma-buf framework test exporter module" ++ depends on MALI_BASE_MODULES && DMA_SHARED_BUFFER ++ default y ++ help ++ This option will build the dma-buf framework test exporter module. ++ Usable to help test importers. ++ ++ Modules: ++ - dma-buf-test-exporter.ko ++ ++config MALI_MEMORY_GROUP_MANAGER ++ bool "Build Mali Memory Group Manager module" ++ depends on MALI_BASE_MODULES ++ default y ++ help ++ This option will build the memory group manager module. ++ This is an example implementation for allocation and release of pages ++ for memory pools managed by Mali GPU device drivers. ++ ++ Modules: ++ - memory_group_manager.ko ++ ++config MALI_PROTECTED_MEMORY_ALLOCATOR ++ bool "Build Mali Protected Memory Allocator module" ++ depends on MALI_BASE_MODULES && MALI_CSF_SUPPORT ++ default y ++ help ++ This option will build the protected memory allocator module. ++ This is an example implementation for allocation and release of pages ++ of secure memory intended to be used by the firmware ++ of Mali GPU device drivers. ++ ++ Modules: ++ - protected_memory_allocator.ko ++ +diff --git a/dvalin/kernel/drivers/base/arm/Makefile b/dvalin/kernel/drivers/base/arm/Makefile +new file mode 100644 +index 0000000..0bd6ab5 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/Makefile +@@ -0,0 +1,98 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++# ++# Paths ++# ++KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build ++KDIR ?= $(KERNEL_SRC) ++ ++ifeq ($(KDIR),) ++ $(error Must specify KDIR to point to the kernel to target)) ++endif ++ ++vars := ++# ++# Default configuration values ++# ++CONFIG_MALI_BASE_MODULES ?= n ++ ++ifeq ($(CONFIG_MALI_BASE_MODULES),y) ++ CONFIG_MALI_CSF_SUPPORT ?= n ++ ++ ifneq ($(CONFIG_DMA_SHARED_BUFFER),n) ++ CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y ++ else ++ # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n ++ CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n ++ endif ++ ++ CONFIG_MALI_MEMORY_GROUP_MANAGER ?= y ++ ++ ifneq ($(CONFIG_MALI_CSF_SUPPORT), n) ++ CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= y ++ endif ++ ++else ++ # Prevent misuse when CONFIG_MALI_BASE_MODULES=n ++ CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n ++ CONFIG_MALI_MEMORY_GROUP_MANAGER = n ++ CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n ++ ++endif ++ ++CONFIGS := \ ++ CONFIG_MALI_BASE_MODULES \ ++ CONFIG_MALI_CSF_SUPPORT \ ++ CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \ ++ CONFIG_MALI_MEMORY_GROUP_MANAGER \ ++ CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ++ ++ ++# ++# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build ++# ++# Generate the list of CONFIGs and values. ++# $(value config) is the name of the CONFIG option. ++# $(value $(value config)) is its value (y, m). ++# When the CONFIG is not set to y or m, it defaults to n. ++MAKE_ARGS := $(foreach config,$(CONFIGS), \ ++ $(if $(filter y m,$(value $(value config))), \ ++ $(value config)=$(value $(value config)), \ ++ $(value config)=n)) ++ ++# ++# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build ++# ++# Generate the list of CONFIGs defines with values from CONFIGS. ++# $(value config) is the name of the CONFIG option. ++# When set to y or m, the CONFIG gets defined to 1. ++EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ ++ $(if $(filter y m,$(value $(value config))), \ ++ -D$(value config)=1)) ++ ++all: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules ++ ++modules_install: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install ++ ++clean: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean +diff --git a/dvalin/kernel/drivers/base/arm/Mconfig b/dvalin/kernel/drivers/base/arm/Mconfig +new file mode 100644 +index 0000000..d21a72e +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/Mconfig +@@ -0,0 +1,64 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++menuconfig MALI_BASE_MODULES ++ bool "Mali Base extra modules" ++ default y if BACKEND_KERNEL ++ help ++ Enable this option to build support for a Arm Mali base modules. ++ Those modules provide extra features or debug interfaces and, ++ are optional for the use of the Mali GPU modules. ++ ++config DMA_SHARED_BUFFER_TEST_EXPORTER ++ bool "Build dma-buf framework test exporter module" ++ depends on MALI_BASE_MODULES ++ default y ++ help ++ This option will build the dma-buf framework test exporter module. ++ Usable to help test importers. ++ ++ Modules: ++ - dma-buf-test-exporter.ko ++ ++config MALI_MEMORY_GROUP_MANAGER ++ bool "Build Mali Memory Group Manager module" ++ depends on MALI_BASE_MODULES ++ default y ++ help ++ This option will build the memory group manager module. ++ This is an example implementation for allocation and release of pages ++ for memory pools managed by Mali GPU device drivers. ++ ++ Modules: ++ - memory_group_manager.ko ++ ++config MALI_PROTECTED_MEMORY_ALLOCATOR ++ bool "Build Mali Protected Memory Allocator module" ++ depends on MALI_BASE_MODULES && GPU_HAS_CSF ++ default y ++ help ++ This option will build the protected memory allocator module. ++ This is an example implementation for allocation and release of pages ++ of secure memory intended to be used by the firmware ++ of Mali GPU device drivers. ++ ++ Modules: ++ - protected_memory_allocator.ko ++ +diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/Kbuild b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild +similarity index 78% +rename from dvalin/kernel/drivers/base/dma_buf_lock/src/Kbuild +rename to dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild +index ddf1bb5..c7ae332 100644 +--- a/dvalin/kernel/drivers/base/dma_buf_lock/src/Kbuild ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,8 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + +-ifneq ($(CONFIG_DMA_SHARED_BUFFER),) ++ifeq ($(CONFIG_DMA_SHARED_BUFFER), y) + obj-m := dma_buf_lock.o + endif +diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/Makefile b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile +similarity index 71% +rename from dvalin/kernel/drivers/base/dma_buf_lock/src/Makefile +rename to dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile +index 3b10406..451d2d7 100644 +--- a/dvalin/kernel/drivers/base/dma_buf_lock/src/Makefile ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + # linux build system bootstrap for out-of-tree module +@@ -24,15 +23,17 @@ + # default to building for the host + ARCH ?= $(shell uname -m) + +-ifeq ($(KDIR),) +-$(error Must specify KDIR to point to the kernel to target)) +-endif ++# Handle Android Common Kernel source naming ++KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build ++KDIR ?= $(KERNEL_SRC) + + all: dma_buf_lock + + dma_buf_lock: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include" ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../../include" + + clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + ++modules_install: ++ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules_install +diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c +similarity index 90% +rename from dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c +rename to dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c +index 529ce71..f5ab1ed 100644 +--- a/dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2013, 2017-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -29,7 +28,11 @@ + #include + #include + #include ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + #include ++#else ++#include ++#endif + #include + #include + #include +@@ -37,7 +40,7 @@ + #include + #include + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + + #include + +@@ -60,7 +63,7 @@ + + #include + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + #define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ + (a)->status ?: 1 \ + : 0) +@@ -101,7 +104,7 @@ static struct file_operations dma_buf_lock_fops = + + typedef struct dma_buf_lock_resource + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence fence; + #else + struct dma_fence fence; +@@ -127,7 +130,7 @@ typedef struct dma_buf_lock_resource + * @node: List head for linking this callback to the lock resource + */ + struct dma_buf_lock_fence_cb { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence_cb fence_cb; + struct fence *fence; + #else +@@ -151,7 +154,7 @@ static void dma_buf_lock_dounlock(struct kref *ref); + static DEFINE_SPINLOCK(dma_buf_lock_fence_lock); + + static const char * +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_buf_lock_fence_get_driver_name(struct fence *fence) + #else + dma_buf_lock_fence_get_driver_name(struct dma_fence *fence) +@@ -161,7 +164,7 @@ dma_buf_lock_fence_get_driver_name(struct dma_fence *fence) + } + + static const char * +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_buf_lock_fence_get_timeline_name(struct fence *fence) + #else + dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence) +@@ -171,7 +174,7 @@ dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence) + } + + static bool +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_buf_lock_fence_enable_signaling(struct fence *fence) + #else + dma_buf_lock_fence_enable_signaling(struct dma_fence *fence) +@@ -180,7 +183,7 @@ dma_buf_lock_fence_enable_signaling(struct dma_fence *fence) + return true; + } + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + const struct fence_ops dma_buf_lock_fence_ops = { + .wait = fence_default_wait, + #else +@@ -235,7 +238,7 @@ dma_buf_lock_fence_work(struct work_struct *pwork) + } + + static void +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb) + #else + dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +@@ -256,14 +259,13 @@ dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb) + atomic_set(&resource->locked, 1); + wake_up(&resource->wait); + +- if (resource->exclusive) { ++ if (resource->exclusive) + /* Warn if the work was already queued */ + WARN_ON(!schedule_work(&resource->work)); +- } + } + } + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + static int + dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource, + struct fence *fence, +@@ -317,12 +319,19 @@ dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource, + return err; + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + static int + dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource, + struct reservation_object *resv, + bool exclusive) ++#else ++static int ++dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource, ++ struct dma_resv *resv, ++ bool exclusive) ++#endif + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *excl_fence = NULL; + struct fence **shared_fences = NULL; + #else +@@ -332,7 +341,12 @@ dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource, + unsigned int shared_count = 0; + int err, i; + +- err = reservation_object_get_fences_rcu(resv, ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) ++ err = reservation_object_get_fences_rcu( ++#else ++ err = dma_resv_get_fences_rcu( ++#endif ++ resv, + &excl_fence, + &shared_count, + &shared_fences); +@@ -393,7 +407,11 @@ static int + dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource, + struct ww_acquire_ctx *ctx) + { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *content_resv = NULL; ++#else ++ struct dma_resv *content_resv = NULL; ++#endif + unsigned int content_resv_idx = 0; + unsigned int r; + int err = 0; +@@ -472,21 +490,16 @@ static unsigned int dma_buf_lock_handle_poll(struct file *file, + #if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_handle_poll\n"); + #endif +- if (1 == atomic_read(&resource->locked)) +- { ++ if (atomic_read(&resource->locked) == 1) { + /* Resources have been locked */ + ret = POLLIN | POLLRDNORM; + if (resource->exclusive) +- { + ret |= POLLOUT | POLLWRNORM; +- } + } + else + { +- if (!poll_does_not_wait(wait)) +- { ++ if (!poll_does_not_wait(wait)) + poll_wait(file, &resource->wait, wait); +- } + } + #if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_handle_poll : return %i\n", ret); +@@ -525,29 +538,19 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + int i; + int ret; + +- if (NULL == request->list_of_dma_buf_fds) +- { ++ if (request->list_of_dma_buf_fds == NULL) + return -EINVAL; +- } + if (request->count <= 0) +- { + return -EINVAL; +- } + if (request->count > DMA_BUF_LOCK_BUF_MAX) +- { + return -EINVAL; +- } + if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE && + request->exclusive != DMA_BUF_LOCK_EXCLUSIVE) +- { + return -EINVAL; +- } + + resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL); +- if (NULL == resource) +- { ++ if (resource == NULL) + return -ENOMEM; +- } + + atomic_set(&resource->locked, 0); + kref_init(&resource->refcount); +@@ -559,8 +562,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + size = request->count * sizeof(int); + resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL); + +- if (NULL == resource->list_of_dma_buf_fds) +- { ++ if (resource->list_of_dma_buf_fds == NULL) { + kfree(resource); + return -ENOMEM; + } +@@ -569,8 +571,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + size = sizeof(struct dma_buf *) * request->count; + resource->dma_bufs = kmalloc(size, GFP_KERNEL); + +- if (NULL == resource->dma_bufs) +- { ++ if (resource->dma_bufs == NULL) { + kfree(resource->list_of_dma_buf_fds); + kfree(resource); + return -ENOMEM; +@@ -578,8 +579,9 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + + /* Copy requested list of dma_buf_fds from user space */ + size = request->count * sizeof(int); +- if (0 != copy_from_user(resource->list_of_dma_buf_fds, (void __user *)request->list_of_dma_buf_fds, size)) +- { ++ if (copy_from_user(resource->list_of_dma_buf_fds, ++ (void __user *)request->list_of_dma_buf_fds, ++ size) != 0) { + kfree(resource->list_of_dma_buf_fds); + kfree(resource->dma_bufs); + kfree(resource); +@@ -587,9 +589,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + } + #if DMA_BUF_LOCK_DEBUG + for (i = 0; i < request->count; i++) +- { + printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]); +- } + #endif + + /* Initialize the fence associated with dma_buf_lock resource */ +@@ -620,8 +620,7 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + } + + /*Check the reservation object associated with dma_buf */ +- if (NULL == resource->dma_bufs[i]->resv) +- { ++ if (resource->dma_bufs[i]->resv == NULL) { + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); +@@ -680,10 +679,18 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + kref_get(&resource->refcount); + + for (i = 0; i < request->count; i++) { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *resv = resource->dma_bufs[i]->resv; +- ++#else ++ struct dma_resv *resv = resource->dma_bufs[i]->resv; ++#endif + if (!test_bit(i, &resource->exclusive)) { ++ ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + ret = reservation_object_reserve_shared(resv); ++#else ++ ret = dma_resv_reserve_shared(resv, 0); ++#endif + if (ret) { + #if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret); +@@ -701,7 +708,11 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + break; + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + reservation_object_add_shared_fence(resv, &resource->fence); ++#else ++ dma_resv_add_shared_fence(resv, &resource->fence); ++#endif + } else { + ret = dma_buf_lock_add_fence_reservation_callback(resource, + resv, +@@ -713,7 +724,11 @@ static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) + break; + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + reservation_object_add_excl_fence(resv, &resource->fence); ++#else ++ dma_resv_add_excl_fence(resv, &resource->fence); ++#endif + } + } + +@@ -783,27 +798,21 @@ static int __init dma_buf_lock_init(void) + #endif + err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name); + +- if (0 == err) +- { ++ if (err == 0) { + cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops); + + err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1); + +- if (0 == err) +- { ++ if (err == 0) { + dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name); + if (IS_ERR(dma_buf_lock_class)) +- { + err = PTR_ERR(dma_buf_lock_class); +- } + else + { + struct device *mdev; + mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name); + if (!IS_ERR(mdev)) +- { + return 0; +- } + + err = PTR_ERR(mdev); + class_destroy(dma_buf_lock_class); +@@ -836,7 +845,7 @@ static void __exit dma_buf_lock_exit(void) + } + else + { +- dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, ++ dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, + dma_buf_lock_resource, link); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); +@@ -862,26 +871,17 @@ static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned i + int size = _IOC_SIZE(cmd); + + if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC) +- { + return -ENOTTY; +- +- } + if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR)) +- { + return -ENOTTY; +- } + + switch (cmd) + { + case DMA_BUF_LOCK_FUNC_LOCK_ASYNC: + if (size != sizeof(dma_buf_lock_k_request)) +- { + return -ENOTTY; +- } + if (copy_from_user(&request, (void __user *)arg, size)) +- { + return -EFAULT; +- } + #if DMA_BUF_LOCK_DEBUG + printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count); + #endif +diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h +similarity index 88% +rename from dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h +rename to dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h +index f2ae575..104af1f 100644 +--- a/dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _DMA_BUF_LOCK_H +diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild +new file mode 100644 +index 0000000..0e20cb4 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++ifeq ($(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER), y) ++obj-m += dma-buf-test-exporter.o ++endif +diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp +new file mode 100644 +index 0000000..a49fb81 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++bob_kernel_module { ++ name: "dma-buf-test-exporter", ++ defaults: [ ++ "kernel_defaults" ++ ], ++ srcs: [ ++ "Kbuild", ++ "dma-buf-test-exporter.c", ++ ], ++ enabled: false, ++ dma_shared_buffer_test_exporter: { ++ kbuild_options: ["CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=y"], ++ enabled: true, ++ }, ++} +diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c +similarity index 89% +rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c +rename to dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c +index 3eb34c0..ccf73cc 100644 +--- a/dvalin/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c ++++ b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -31,18 +30,16 @@ + #include + #include + #include +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) + #include + #endif + #include +-#endif + + /* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */ + #define DMA_BUF_TE_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ + + /* Since kernel version 5.0 CONFIG_ARCH_NO_SG_CHAIN replaced CONFIG_ARCH_HAS_SG_CHAIN */ +-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) ++#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE + #if (!defined(ARCH_HAS_SG_CHAIN) && !defined(CONFIG_ARCH_HAS_SG_CHAIN)) + #define NO_SG_CHAIN + #endif +@@ -77,7 +74,7 @@ struct dma_buf_te_attachment { + + static struct miscdevice te_device; + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)) ++#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) + static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment) + #else + static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *attachment) +@@ -206,30 +203,29 @@ static void dma_buf_te_release(struct dma_buf *buf) + /* no need for locking */ + + if (alloc->contiguous) { +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE) + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, + alloc->contig_dma_addr, + DMA_ATTR_WRITE_COMBINE); +- +-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++#else + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs); +-#else +- dma_free_writecombine(te_device.this_device, +- alloc->nr_pages * PAGE_SIZE, +- alloc->contig_cpu_addr, alloc->contig_dma_addr); + #endif + } else { + for (i = 0; i < alloc->nr_pages; i++) + __free_page(alloc->pages[i]); + } ++#if (KERNEL_VERSION(4, 12, 0) <= LINUX_VERSION_CODE) ++ kvfree(alloc->pages); ++#else + kfree(alloc->pages); ++#endif + kfree(alloc); + } + +@@ -316,7 +312,7 @@ static void dma_buf_te_mmap_close(struct vm_area_struct *vma) + mutex_unlock(&dma_buf->lock); + } + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) + #elif KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE + static int dma_buf_te_mmap_fault(struct vm_fault *vmf) +@@ -328,7 +324,7 @@ static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf) + struct dma_buf *dmabuf; + struct page *pageptr; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + dmabuf = vma->vm_private_data; + #else + dmabuf = vmf->vma->vm_private_data; +@@ -362,11 +358,7 @@ static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) + if (alloc->fail_mmap) + return -ENOMEM; + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; +-#else +- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTEXPAND; +-#endif + vma->vm_ops = &dma_buf_te_vm_ops; + vma->vm_private_data = dmabuf; + +@@ -378,7 +370,7 @@ static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) + return 0; + } + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) ++#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE + static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num) + { + /* IGNORE */ +@@ -419,19 +411,19 @@ static struct dma_buf_ops dma_buf_te_ops = { + .mmap = dma_buf_te_mmap, + .begin_cpu_access = dma_buf_te_begin_cpu_access, + .end_cpu_access = dma_buf_te_end_cpu_access, +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) ++#if KERNEL_VERSION(4, 12, 0) > LINUX_VERSION_CODE + .kmap = dma_buf_te_kmap, + .kunmap = dma_buf_te_kunmap, + + /* nop handlers for mandatory functions we ignore */ + .kmap_atomic = dma_buf_te_kmap_atomic + #else +-#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) ++#if KERNEL_VERSION(5, 6, 0) > LINUX_VERSION_CODE + .map = dma_buf_te_kmap, + .unmap = dma_buf_te_kunmap, + #endif + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) ++#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE + /* nop handlers for mandatory functions we ignore */ + .map_atomic = dma_buf_te_kmap_atomic + #endif +@@ -481,7 +473,8 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + /* Whilst it is possible to allocate larger buffer, we won't be able to + * map it during actual usage (mmap() still succeeds). We fail here so + * userspace code can deal with it early than having driver failure +- * later on. */ ++ * later on. ++ */ + if (max_nr_pages > SG_MAX_SINGLE_ALLOC) + max_nr_pages = SG_MAX_SINGLE_ALLOC; + #endif /* NO_SG_CHAIN */ +@@ -493,7 +486,7 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + } + + alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL); +- if (NULL == alloc) { ++ if (alloc == NULL) { + dev_err(te_device.this_device, "%s: couldn't alloc object", __func__); + goto no_alloc_object; + } +@@ -501,7 +494,12 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + alloc->nr_pages = alloc_req.size; + alloc->contiguous = contiguous; + ++#if (KERNEL_VERSION(4, 12, 0) <= LINUX_VERSION_CODE) ++ alloc->pages = kvzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL); ++#else + alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL); ++#endif ++ + if (!alloc->pages) { + dev_err(te_device.this_device, + "%s: couldn't alloc %zu page structures", +@@ -512,14 +510,13 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + if (contiguous) { + dma_addr_t dma_aux; + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE) + alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + &alloc->contig_dma_addr, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); +- +-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++#else + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); +@@ -527,11 +524,6 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + alloc->nr_pages * PAGE_SIZE, + &alloc->contig_dma_addr, + GFP_KERNEL | __GFP_ZERO, &attrs); +-#else +- alloc->contig_cpu_addr = dma_alloc_writecombine(te_device.this_device, +- alloc->nr_pages * PAGE_SIZE, +- &alloc->contig_dma_addr, +- GFP_KERNEL | __GFP_ZERO); + #endif + if (!alloc->contig_cpu_addr) { + dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %zu pages", +@@ -546,7 +538,7 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + } else { + for (i = 0; i < alloc->nr_pages; i++) { + alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); +- if (NULL == alloc->pages[i]) { ++ if (alloc->pages[i] == NULL) { + dev_err(te_device.this_device, "%s: couldn't alloc page", __func__); + goto no_page; + } +@@ -554,13 +546,10 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + } + + /* alloc ready, let's export it */ +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)) + { + struct dma_buf_export_info export_info = { + .exp_name = "dma_buf_te", +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)) + .owner = THIS_MODULE, +-#endif + .ops = &dma_buf_te_ops, + .size = alloc->nr_pages << PAGE_SHIFT, + .flags = O_CLOEXEC | O_RDWR, +@@ -569,13 +558,6 @@ static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, + + dma_buf = dma_buf_export(&export_info); + } +-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) +- dma_buf = dma_buf_export(alloc, &dma_buf_te_ops, +- alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR, NULL); +-#else +- dma_buf = dma_buf_export(alloc, &dma_buf_te_ops, +- alloc->nr_pages << PAGE_SHIFT, O_CLOEXEC|O_RDWR); +-#endif + + if (IS_ERR_OR_NULL(dma_buf)) { + dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__); +@@ -598,32 +580,30 @@ no_export: + /* i still valid */ + no_page: + if (contiguous) { +- +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)) ++#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE) + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, + alloc->contig_dma_addr, + DMA_ATTR_WRITE_COMBINE); +- +-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) ++#else + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs); +-#else +- dma_free_writecombine(te_device.this_device, +- alloc->nr_pages * PAGE_SIZE, +- alloc->contig_cpu_addr, alloc->contig_dma_addr); + #endif + } else { + while (i-- > 0) + __free_page(alloc->pages[i]); + } + free_page_struct: ++#if (KERNEL_VERSION(4, 12, 0) <= LINUX_VERSION_CODE) ++ kvfree(alloc->pages); ++#else + kfree(alloc->pages); ++#endif + free_alloc_object: + kfree(alloc); + no_alloc_object: +@@ -727,17 +707,17 @@ static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value) + } + + ret = dma_buf_begin_cpu_access(dma_buf, +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +- 0, dma_buf->size, ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE ++ 0, dma_buf->size, + #endif +- DMA_BIDIRECTIONAL); ++ DMA_BIDIRECTIONAL); + if (ret) + goto no_cpu_access; + + for_each_sg(sgt->sgl, sg, sgt->nents, count) { + for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) { + void *addr = NULL; +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + addr = dma_buf_te_kmap(dma_buf, i >> PAGE_SHIFT); + #else + addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT); +@@ -747,7 +727,7 @@ static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value) + goto no_kmap; + } + memset(addr, value, PAGE_SIZE); +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + dma_buf_te_kunmap(dma_buf, i >> PAGE_SHIFT, addr); + #else + dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr); +@@ -758,10 +738,10 @@ static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value) + + no_kmap: + dma_buf_end_cpu_access(dma_buf, +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +- 0, dma_buf->size, ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE ++ 0, dma_buf->size, + #endif +- DMA_BIDIRECTIONAL); ++ DMA_BIDIRECTIONAL); + no_cpu_access: + dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL); + no_import: +diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild +similarity index 74% +rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/Kbuild +rename to dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild +index c382b79..99ce311 100644 +--- a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kbuild ++++ b/dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,8 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + +-ifneq ($(CONFIG_DMA_SHARED_BUFFER),) +-obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma-buf-test-exporter.o ++ifeq ($(CONFIG_MALI_MEMORY_GROUP_MANAGER), y) ++obj-m := memory_group_manager.o + endif +diff --git a/dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp b/dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp +new file mode 100644 +index 0000000..23db183 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++bob_kernel_module { ++ name: "memory_group_manager", ++ defaults: [ ++ "kernel_defaults" ++ ], ++ srcs: [ ++ "Kbuild", ++ "memory_group_manager.c", ++ ], ++ enabled: false, ++ mali_memory_group_manager: { ++ kbuild_options: ["CONFIG_MALI_MEMORY_GROUP_MANAGER=y"], ++ enabled: true, ++ }, ++} +diff --git a/dvalin/kernel/drivers/base/memory_group_manager/memory_group_manager.c b/dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c +similarity index 98% +rename from dvalin/kernel/drivers/base/memory_group_manager/memory_group_manager.c +rename to dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c +index 44f848a..a70fe70 100644 +--- a/dvalin/kernel/drivers/base/memory_group_manager/memory_group_manager.c ++++ b/dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -26,7 +25,7 @@ + #include + #include + #include +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + #include + #endif + #include +@@ -92,12 +91,12 @@ struct mgm_group { + struct mgm_groups { + struct mgm_group groups[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct device *dev; +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *mgm_debugfs_root; + #endif + }; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + static int mgm_size_get(void *data, u64 *val) + { +@@ -475,7 +474,6 @@ static struct platform_driver memory_group_manager_driver = { + .remove = memory_group_manager_remove, + .driver = { + .name = "physical-memory-group-manager", +- .owner = THIS_MODULE, + .of_match_table = of_match_ptr(memory_group_manager_dt_ids), + /* + * Prevent the mgm_dev from being unbound and freed, as other's +diff --git a/dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild +new file mode 100644 +index 0000000..25295a9 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++ifeq ($(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR), y) ++obj-m := protected_memory_allocator.o ++endif +diff --git a/dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp +new file mode 100644 +index 0000000..4c56154 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++bob_kernel_module { ++ name: "protected_memory_allocator", ++ defaults: [ ++ "kernel_defaults" ++ ], ++ srcs: [ ++ "Kbuild", ++ "protected_memory_allocator.c", ++ ], ++ enabled: false, ++ mali_protected_memory_allocator: { ++ kbuild_options: ["CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR=y"], ++ enabled: true, ++ }, ++} +diff --git a/dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c +new file mode 100644 +index 0000000..6684210 +--- /dev/null ++++ b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c +@@ -0,0 +1,551 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Size of a bitfield element in bytes */ ++#define BITFIELD_ELEM_SIZE sizeof(u64) ++ ++/* We can track whether or not 64 pages are currently allocated in a u64 */ ++#define PAGES_PER_BITFIELD_ELEM (BITFIELD_ELEM_SIZE * BITS_PER_BYTE) ++ ++/* Order 6 (ie, 64) corresponds to the number of pages held in a bitfield */ ++#define ORDER_OF_PAGES_PER_BITFIELD_ELEM 6 ++ ++/** ++ * struct simple_pma_device - Simple implementation of a protected memory ++ * allocator device ++ * ++ * @pma_dev: Protected memory allocator device pointer ++ * @dev: Device pointer ++ * @alloc_pages_bitfield_arr: Status of all the physical memory pages within the ++ * protected memory region, one bit per page ++ * @rmem_base: Base address of the reserved memory region ++ * @rmem_size: Size of the reserved memory region, in pages ++ * @num_free_pages: Number of free pages in the memory region ++ * @rmem_lock: Lock to serialize the allocation and freeing of ++ * physical pages from the protected memory region ++ */ ++struct simple_pma_device { ++ struct protected_memory_allocator_device pma_dev; ++ struct device *dev; ++ u64 *allocated_pages_bitfield_arr; ++ phys_addr_t rmem_base; ++ size_t rmem_size; ++ size_t num_free_pages; ++ spinlock_t rmem_lock; ++}; ++ ++/** ++ * Number of elements in array 'allocated_pages_bitfield_arr'. If the number of ++ * pages required does not divide exactly by PAGES_PER_BITFIELD_ELEM, adds an ++ * extra page for the remainder. ++ */ ++#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \ ++ ((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + \ ++ num_pages) / PAGES_PER_BITFIELD_ELEM) ++ ++/** ++ * Allocate a power-of-two number of pages, N, where ++ * 0 <= N <= ORDER_OF_PAGES_PER_BITFIELD_ELEM - 1. ie, Up to 32 pages. The routine ++ * fills-in a pma structure and sets the appropriate bits in the allocated-pages ++ * bitfield array but assumes the caller has already determined that these are ++ * already clear. ++ * ++ * This routine always works within only a single allocated-pages bitfield element. ++ * It can be thought of as the 'small-granularity' allocator. ++ */ ++static void small_granularity_alloc(struct simple_pma_device *const epma_dev, ++ size_t alloc_bitfield_idx, size_t start_bit, ++ size_t order, ++ struct protected_memory_allocation *pma) ++{ ++ size_t i; ++ size_t page_idx; ++ u64 *bitfield; ++ size_t alloc_pages_bitfield_size; ++ ++ if (WARN_ON(!epma_dev) || ++ WARN_ON(!pma)) ++ return; ++ ++ WARN(epma_dev->rmem_size == 0, "%s: rmem_size is 0", __func__); ++ alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); ++ ++ WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size, ++ "%s: idx>bf_size: %zu %zu", __FUNCTION__, ++ alloc_bitfield_idx, alloc_pages_bitfield_size); ++ ++ WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM, ++ "%s: start=%zu order=%zu ppbe=%zu", ++ __FUNCTION__, start_bit, order, PAGES_PER_BITFIELD_ELEM); ++ ++ bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx]; ++ ++ for (i = 0; i < (1 << order); i++) { ++ /* Check the pages represented by this bit are actually free */ ++ WARN (*bitfield & (1ULL << (start_bit + i)), ++ "in %s: page not free: %zu %zu %.16llx %zu\n", ++ __FUNCTION__, i, order, *bitfield, alloc_pages_bitfield_size); ++ ++ /* Mark the pages as now allocated */ ++ *bitfield |= (1ULL << (start_bit + i)); ++ } ++ ++ /* Compute the page index */ ++ page_idx = (alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM) + start_bit; ++ ++ /* Fill-in the allocation struct for the caller */ ++ pma->pa = epma_dev->rmem_base + (page_idx << PAGE_SHIFT); ++ pma->order = order; ++} ++ ++/** ++ * Allocate a power-of-two number of pages, N, where ++ * N >= ORDER_OF_PAGES_PER_BITFIELD_ELEM. ie, 64 pages or more. The routine fills-in ++ * a pma structure and sets the appropriate bits in the allocated-pages bitfield array ++ * but assumes the caller has already determined that these are already clear. ++ * ++ * Unlike small_granularity_alloc, this routine can work with multiple 64-page groups, ++ * ie multiple elements from the allocated-pages bitfield array. However, it always ++ * works with complete sets of these 64-page groups. It can therefore be thought of ++ * as the 'large-granularity' allocator. ++ */ ++static void large_granularity_alloc(struct simple_pma_device *const epma_dev, ++ size_t start_alloc_bitfield_idx, ++ size_t order, ++ struct protected_memory_allocation *pma) ++{ ++ size_t i; ++ size_t num_pages_to_alloc = (size_t)1 << order; ++ size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM; ++ size_t start_page_idx = start_alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM; ++ ++ if (WARN_ON(!epma_dev) || ++ WARN_ON(!pma)) ++ return; ++ ++ /* ++ * Are there anough bitfield array elements (groups of 64 pages) ++ * between the start element and the end of the bitfield array ++ * to fulfill the request? ++ */ ++ WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size), ++ "%s: start=%zu order=%zu ms=%zu", ++ __FUNCTION__, start_alloc_bitfield_idx, order, epma_dev->rmem_size); ++ ++ for (i = 0; i < num_bitfield_elements_needed; i++) { ++ u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i]; ++ ++ /* We expect all pages that relate to this bitfield element to be free */ ++ WARN((*bitfield != 0), ++ "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n", ++ __FUNCTION__, i, order, *bitfield); ++ ++ /* Mark all the pages for this element as not free */ ++ *bitfield = ~0ULL; ++ } ++ ++ /* Fill-in the allocation struct for the caller */ ++ pma->pa = epma_dev->rmem_base + (start_page_idx << PAGE_SHIFT); ++ pma->order = order; ++} ++ ++static struct protected_memory_allocation *simple_pma_alloc_page( ++ struct protected_memory_allocator_device *pma_dev, unsigned int order) ++{ ++ struct simple_pma_device *const epma_dev = ++ container_of(pma_dev, struct simple_pma_device, pma_dev); ++ struct protected_memory_allocation *pma; ++ size_t num_pages_to_alloc; ++ ++ u64 *bitfields = epma_dev->allocated_pages_bitfield_arr; ++ size_t i; ++ size_t bit; ++ size_t count; ++ ++ dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n", ++ __func__, (void *)pma_dev, order); ++ ++ /* This is an example function that follows an extremely simple logic ++ * and is very likely to fail to allocate memory if put under stress. ++ * ++ * The simple_pma_device maintains an array of u64s, with one bit used ++ * to track the status of each page. ++ * ++ * In order to create a memory allocation, the allocator looks for an ++ * adjacent group of cleared bits. This does leave the algorithm open ++ * to fragmentation issues, but is deemed sufficient for now. ++ * If successful, the allocator shall mark all the pages as allocated ++ * and increment the offset accordingly. ++ * ++ * Allocations of 64 pages or more (order 6) can be allocated only with ++ * 64-page alignment, in order to keep the algorithm as simple as ++ * possible. ie, starting from bit 0 of any 64-bit page-allocation ++ * bitfield. For this, the large-granularity allocator is utilised. ++ * ++ * Allocations of lower-order can only be allocated entirely within the ++ * same group of 64 pages, with the small-ganularity allocator (ie ++ * always from the same 64-bit page-allocation bitfield) - again, to ++ * keep things as simple as possible, but flexible to meet ++ * current needs. ++ */ ++ ++ num_pages_to_alloc = (size_t)1 << order; ++ ++ pma = devm_kzalloc(epma_dev->dev, sizeof(*pma), GFP_KERNEL); ++ if (!pma) { ++ dev_err(epma_dev->dev, "Failed to alloc pma struct"); ++ return NULL; ++ } ++ ++ spin_lock(&epma_dev->rmem_lock); ++ ++ if (epma_dev->num_free_pages < num_pages_to_alloc) { ++ dev_err(epma_dev->dev, "not enough free pages\n"); ++ devm_kfree(epma_dev->dev, pma); ++ spin_unlock(&epma_dev->rmem_lock); ++ return NULL; ++ } ++ ++ /* ++ * For order 0-5 (ie, 1 to 32 pages) we always allocate within the same set of 64 pages ++ * Currently, most allocations will be very small (1 page), so the more likely path ++ * here is order < ORDER_OF_PAGES_PER_BITFIELD_ELEM. ++ */ ++ if (likely(order < ORDER_OF_PAGES_PER_BITFIELD_ELEM)) { ++ size_t alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); ++ ++ for (i = 0; i < alloc_pages_bitmap_size; i++) { ++ count = 0; ++ ++ for (bit = 0; bit < PAGES_PER_BITFIELD_ELEM; bit++) { ++ if (0 == (bitfields[i] & (1ULL << bit))) { ++ if ((count + 1) >= num_pages_to_alloc) { ++ /* ++ * We've found enough free, consecutive pages with which to ++ * make an allocation ++ */ ++ small_granularity_alloc( ++ epma_dev, i, ++ bit - count, order, ++ pma); ++ ++ epma_dev->num_free_pages -= ++ num_pages_to_alloc; ++ ++ spin_unlock( ++ &epma_dev->rmem_lock); ++ return pma; ++ } ++ ++ /* So far so good, but we need more set bits yet */ ++ count++; ++ } else { ++ /* ++ * We found an allocated page, so nothing we've seen so far can be used. ++ * Keep looking. ++ */ ++ count = 0; ++ } ++ } ++ } ++ } else { ++ /** ++ * For allocations of order ORDER_OF_PAGES_PER_BITFIELD_ELEM and above (>= 64 pages), we know ++ * we'll only get allocations for whole groups of 64 pages, which hugely simplifies the task. ++ */ ++ size_t alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); ++ ++ /* How many 64-bit bitfield elements will be needed for the allocation? */ ++ size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM; ++ ++ count = 0; ++ ++ for (i = 0; i < alloc_pages_bitmap_size; i++) { ++ /* Are all the pages free for the i'th u64 bitfield element? */ ++ if (bitfields[i] == 0) { ++ count += PAGES_PER_BITFIELD_ELEM; ++ ++ if (count >= (1 << order)) { ++ size_t start_idx = (i + 1) - num_bitfield_elements_needed; ++ ++ large_granularity_alloc(epma_dev, ++ start_idx, ++ order, pma); ++ ++ epma_dev->num_free_pages -= 1 << order; ++ spin_unlock(&epma_dev->rmem_lock); ++ return pma; ++ } ++ } ++ else ++ { ++ count = 0; ++ } ++ } ++ } ++ ++ spin_unlock(&epma_dev->rmem_lock); ++ devm_kfree(epma_dev->dev, pma); ++ ++ dev_err(epma_dev->dev, "not enough contiguous pages (need %zu), total free pages left %zu\n", ++ num_pages_to_alloc, epma_dev->num_free_pages); ++ return NULL; ++} ++ ++static phys_addr_t simple_pma_get_phys_addr( ++ struct protected_memory_allocator_device *pma_dev, ++ struct protected_memory_allocation *pma) ++{ ++ struct simple_pma_device *const epma_dev = ++ container_of(pma_dev, struct simple_pma_device, pma_dev); ++ ++ dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", ++ __func__, (void *)pma_dev, (void *)pma, ++ (unsigned long long)pma->pa); ++ ++ return pma->pa; ++} ++ ++static void simple_pma_free_page( ++ struct protected_memory_allocator_device *pma_dev, ++ struct protected_memory_allocation *pma) ++{ ++ struct simple_pma_device *const epma_dev = ++ container_of(pma_dev, struct simple_pma_device, pma_dev); ++ size_t num_pages_in_allocation; ++ size_t offset; ++ size_t i; ++ size_t bitfield_idx; ++ size_t bitfield_start_bit; ++ size_t page_num; ++ u64 *bitfield; ++ size_t alloc_pages_bitmap_size; ++ size_t num_bitfield_elems_used_by_alloc; ++ ++ WARN_ON(pma == NULL); ++ ++ dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", ++ __func__, (void *)pma_dev, (void *)pma, ++ (unsigned long long)pma->pa); ++ ++ WARN_ON(pma->pa < epma_dev->rmem_base); ++ ++ /* This is an example function that follows an extremely simple logic ++ * and is vulnerable to abuse. ++ */ ++ offset = (pma->pa - epma_dev->rmem_base); ++ num_pages_in_allocation = (size_t)1 << pma->order; ++ ++ /* The number of bitfield elements used by the allocation */ ++ num_bitfield_elems_used_by_alloc = num_pages_in_allocation / PAGES_PER_BITFIELD_ELEM; ++ ++ /* The page number of the first page of the allocation, relative to rmem_base */ ++ page_num = offset >> PAGE_SHIFT; ++ ++ /* Which u64 bitfield refers to this page? */ ++ bitfield_idx = page_num / PAGES_PER_BITFIELD_ELEM; ++ ++ alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); ++ ++ /* Is the allocation within expected bounds? */ ++ WARN_ON((bitfield_idx + num_bitfield_elems_used_by_alloc) >= alloc_pages_bitmap_size); ++ ++ spin_lock(&epma_dev->rmem_lock); ++ ++ if (pma->order < ORDER_OF_PAGES_PER_BITFIELD_ELEM) { ++ bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx]; ++ ++ /* Which bit within that u64 bitfield is the lsb covering this allocation? */ ++ bitfield_start_bit = page_num % PAGES_PER_BITFIELD_ELEM; ++ ++ /* Clear the bits for the pages we're now freeing */ ++ *bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit); ++ } ++ else { ++ WARN(page_num % PAGES_PER_BITFIELD_ELEM, ++ "%s: Expecting allocs of order >= %d to be %zu-page aligned\n", ++ __FUNCTION__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM); ++ ++ for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) { ++ bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i]; ++ ++ /* We expect all bits to be set (all pages allocated) */ ++ WARN((*bitfield != ~0), ++ "%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n", ++ __FUNCTION__, offset, num_pages_in_allocation, *bitfield); ++ ++ /* ++ * Now clear all the bits in the bitfield element to mark all the pages ++ * it refers to as free. ++ */ ++ *bitfield = 0ULL; ++ } ++ } ++ ++ epma_dev->num_free_pages += num_pages_in_allocation; ++ spin_unlock(&epma_dev->rmem_lock); ++ devm_kfree(epma_dev->dev, pma); ++} ++ ++static int protected_memory_allocator_probe(struct platform_device *pdev) ++{ ++ struct simple_pma_device *epma_dev; ++ struct device_node *np; ++ phys_addr_t rmem_base; ++ size_t rmem_size; ++ size_t alloc_bitmap_pages_arr_size; ++#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE) ++ struct reserved_mem *rmem; ++#endif ++ ++ np = pdev->dev.of_node; ++ ++ if (!np) { ++ dev_err(&pdev->dev, "device node pointer not set\n"); ++ return -ENODEV; ++ } ++ ++ np = of_parse_phandle(np, "memory-region", 0); ++ if (!np) { ++ dev_err(&pdev->dev, "memory-region node not set\n"); ++ return -ENODEV; ++ } ++ ++#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE) ++ rmem = of_reserved_mem_lookup(np); ++ if (rmem) { ++ rmem_base = rmem->base; ++ rmem_size = rmem->size >> PAGE_SHIFT; ++ } else ++#endif ++ { ++ of_node_put(np); ++ dev_err(&pdev->dev, "could not read reserved memory-region\n"); ++ return -ENODEV; ++ } ++ ++ of_node_put(np); ++ epma_dev = devm_kzalloc(&pdev->dev, sizeof(*epma_dev), GFP_KERNEL); ++ if (!epma_dev) ++ return -ENOMEM; ++ ++ epma_dev->pma_dev.ops.pma_alloc_page = simple_pma_alloc_page; ++ epma_dev->pma_dev.ops.pma_get_phys_addr = simple_pma_get_phys_addr; ++ epma_dev->pma_dev.ops.pma_free_page = simple_pma_free_page; ++ epma_dev->pma_dev.owner = THIS_MODULE; ++ epma_dev->dev = &pdev->dev; ++ epma_dev->rmem_base = rmem_base; ++ epma_dev->rmem_size = rmem_size; ++ epma_dev->num_free_pages = rmem_size; ++ spin_lock_init(&epma_dev->rmem_lock); ++ ++ alloc_bitmap_pages_arr_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); ++ ++ epma_dev->allocated_pages_bitfield_arr = devm_kzalloc(&pdev->dev, ++ alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL); ++ ++ if (!epma_dev->allocated_pages_bitfield_arr) { ++ dev_err(&pdev->dev, "failed to allocate resources\n"); ++ devm_kfree(&pdev->dev, epma_dev); ++ return -ENOMEM; ++ } ++ ++ if (epma_dev->rmem_size % PAGES_PER_BITFIELD_ELEM) { ++ size_t extra_pages = ++ alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM - ++ epma_dev->rmem_size; ++ size_t last_bitfield_index = alloc_bitmap_pages_arr_size - 1; ++ ++ /* Mark the extra pages (that lie outside the reserved range) as ++ * always in use. ++ */ ++ epma_dev->allocated_pages_bitfield_arr[last_bitfield_index] = ++ ((1ULL << extra_pages) - 1) << ++ (PAGES_PER_BITFIELD_ELEM - extra_pages); ++ } ++ ++ platform_set_drvdata(pdev, &epma_dev->pma_dev); ++ dev_info(&pdev->dev, ++ "Protected memory allocator probed successfully\n"); ++ dev_info(&pdev->dev, "Protected memory region: base=%llx num pages=%zu\n", ++ (unsigned long long)rmem_base, rmem_size); ++ ++ return 0; ++} ++ ++static int protected_memory_allocator_remove(struct platform_device *pdev) ++{ ++ struct protected_memory_allocator_device *pma_dev = ++ platform_get_drvdata(pdev); ++ struct simple_pma_device *epma_dev; ++ struct device *dev; ++ ++ if (!pma_dev) ++ return -EINVAL; ++ ++ epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev); ++ dev = epma_dev->dev; ++ ++ if (epma_dev->num_free_pages < epma_dev->rmem_size) { ++ dev_warn(&pdev->dev, "Leaking %zu pages of protected memory\n", ++ epma_dev->rmem_size - epma_dev->num_free_pages); ++ } ++ ++ platform_set_drvdata(pdev, NULL); ++ devm_kfree(dev, epma_dev->allocated_pages_bitfield_arr); ++ devm_kfree(dev, epma_dev); ++ ++ dev_info(&pdev->dev, ++ "Protected memory allocator removed successfully\n"); ++ ++ return 0; ++} ++ ++static const struct of_device_id protected_memory_allocator_dt_ids[] = { ++ { .compatible = "arm,protected-memory-allocator" }, ++ { /* sentinel */ } ++}; ++MODULE_DEVICE_TABLE(of, protected_memory_allocator_dt_ids); ++ ++static struct platform_driver protected_memory_allocator_driver = { ++ .probe = protected_memory_allocator_probe, ++ .remove = protected_memory_allocator_remove, ++ .driver = { ++ .name = "simple_protected_memory_allocator", ++ .of_match_table = of_match_ptr(protected_memory_allocator_dt_ids), ++ } ++}; ++ ++module_platform_driver(protected_memory_allocator_driver); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("ARM Ltd."); ++MODULE_VERSION("1.0"); +diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kconfig +deleted file mode 100644 +index 66ca1bc..0000000 +--- a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kconfig ++++ /dev/null +@@ -1,26 +0,0 @@ +-# +-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +-# +-# This program is free software and is provided to you under the terms of the +-# GNU General Public License version 2 as published by the Free Software +-# Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, you can access it online at +-# http://www.gnu.org/licenses/gpl-2.0.html. +-# +-# SPDX-License-Identifier: GPL-2.0 +-# +-# +- +-config DMA_SHARED_BUFFER_TEST_EXPORTER +- tristate "Test exporter for the dma-buf framework" +- depends on DMA_SHARED_BUFFER +- help +- This option enables the test exporter usable to help test importerts. +diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Makefile b/dvalin/kernel/drivers/base/dma_buf_test_exporter/Makefile +deleted file mode 100644 +index 528582c..0000000 +--- a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Makefile ++++ /dev/null +@@ -1,36 +0,0 @@ +-# +-# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. +-# +-# This program is free software and is provided to you under the terms of the +-# GNU General Public License version 2 as published by the Free Software +-# Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, you can access it online at +-# http://www.gnu.org/licenses/gpl-2.0.html. +-# +-# SPDX-License-Identifier: GPL-2.0 +-# +-# +- +-# linux build system bootstrap for out-of-tree module +- +-# default to building for the host +-ARCH ?= $(shell uname -m) +- +-ifeq ($(KDIR),) +-$(error Must specify KDIR to point to the kernel to target)) +-endif +- +-all: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m +- +-clean: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +- +diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/build.bp b/dvalin/kernel/drivers/base/dma_buf_test_exporter/build.bp +deleted file mode 100644 +index 7b0bd5d..0000000 +--- a/dvalin/kernel/drivers/base/dma_buf_test_exporter/build.bp ++++ /dev/null +@@ -1,26 +0,0 @@ +-/* +- * +- * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. +- * +- */ +- +-bob_kernel_module { +- name: "dma-buf-test-exporter", +- srcs: [ +- "Kbuild", +- "dma-buf-test-exporter.c", +- ], +- kbuild_options: [ +- "CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=m", +- ], +- defaults: ["kernel_defaults"], +-} +diff --git a/dvalin/kernel/drivers/base/memory_group_manager/Makefile b/dvalin/kernel/drivers/base/memory_group_manager/Makefile +deleted file mode 100644 +index a5bceae..0000000 +--- a/dvalin/kernel/drivers/base/memory_group_manager/Makefile ++++ /dev/null +@@ -1,35 +0,0 @@ +-# +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +-# +-# This program is free software and is provided to you under the terms of the +-# GNU General Public License version 2 as published by the Free Software +-# Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, you can access it online at +-# http://www.gnu.org/licenses/gpl-2.0.html. +-# +-# SPDX-License-Identifier: GPL-2.0 +-# +-# +- +-# linux build system bootstrap for out-of-tree module +- +-# default to building for the host +-ARCH ?= $(shell uname -m) +- +-ifeq ($(KDIR),) +-$(error Must specify KDIR to point to the kernel to target)) +-endif +- +-all: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" modules CONFIG_MALI_MEMORY_GROUP_MANAGER=m +- +-clean: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +diff --git a/dvalin/kernel/drivers/base/memory_group_manager/build.bp b/dvalin/kernel/drivers/base/memory_group_manager/build.bp +deleted file mode 100644 +index 04dbfd3..0000000 +--- a/dvalin/kernel/drivers/base/memory_group_manager/build.bp ++++ /dev/null +@@ -1,22 +0,0 @@ +-/* +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. +- */ +- +-bob_kernel_module { +- name: "memory_group_manager", +- srcs: [ +- "Kbuild", +- "memory_group_manager.c", +- ], +- kbuild_options: ["CONFIG_MALI_MEMORY_GROUP_MANAGER=m"], +- defaults: ["kernel_defaults"], +-} +diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/Makefile b/dvalin/kernel/drivers/base/protected_memory_allocator/Makefile +deleted file mode 100644 +index 17b2600..0000000 +--- a/dvalin/kernel/drivers/base/protected_memory_allocator/Makefile ++++ /dev/null +@@ -1,35 +0,0 @@ +-# +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +-# +-# This program is free software and is provided to you under the terms of the +-# GNU General Public License version 2 as published by the Free Software +-# Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, you can access it online at +-# http://www.gnu.org/licenses/gpl-2.0.html. +-# +-# SPDX-License-Identifier: GPL-2.0 +-# +-# +- +-# linux build system bootstrap for out-of-tree module +- +-# default to building for the host +-ARCH ?= $(shell uname -m) +- +-ifeq ($(KDIR),) +-$(error Must specify KDIR to point to the kernel to target)) +-endif +- +-all: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" modules CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR=m +- +-clean: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/build.bp b/dvalin/kernel/drivers/base/protected_memory_allocator/build.bp +deleted file mode 100644 +index 165b17b..0000000 +--- a/dvalin/kernel/drivers/base/protected_memory_allocator/build.bp ++++ /dev/null +@@ -1,26 +0,0 @@ +-/* +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. +- */ +- +-bob_kernel_module { +- name: "protected_memory_allocator", +- srcs: [ +- "Kbuild", +- "protected_memory_allocator.c", +- ], +- kbuild_options: ["CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR=m"], +- defaults: ["kernel_defaults"], +- enabled: false, +- build_csf_only_module: { +- enabled: true, +- }, +-} +diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c b/dvalin/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c +deleted file mode 100644 +index bb0b1dd..0000000 +--- a/dvalin/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c ++++ /dev/null +@@ -1,308 +0,0 @@ +-/* +- * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-/** +- * struct simple_pma_device - Simple implementation of a protected memory +- * allocator device +- * +- * @pma_dev: Protected memory allocator device pointer +- * @dev: Device pointer +- * @alloc_pages: Status of all the physical memory pages within the +- * protected memory region; true for allocated pages +- * @rmem_base: Base address of the reserved memory region +- * @rmem_size: Size of the reserved memory region, in pages +- * @free_pa_offset: Offset of the lowest physical address within the protected +- * memory region that is currently associated with a free page +- * @num_free_pages: Number of free pages in the memory region +- */ +-struct simple_pma_device { +- struct protected_memory_allocator_device pma_dev; +- struct device *dev; +- bool *alloc_pages; +- phys_addr_t rmem_base; +- size_t rmem_size; +- size_t free_pa_offset; +- size_t num_free_pages; +-}; +- +-static struct protected_memory_allocation *simple_pma_alloc_page( +- struct protected_memory_allocator_device *pma_dev, unsigned int order) +-{ +- struct simple_pma_device *const epma_dev = +- container_of(pma_dev, struct simple_pma_device, pma_dev); +- struct protected_memory_allocation *pma; +- size_t num_pages; +- size_t i; +- +- dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n", +- __func__, (void *)pma_dev, order); +- +- /* This is an example function that follows an extremely simple logic +- * and is very likely to fail to allocate memory if put under stress. +- * +- * The simple_pma_device maintains an array of booleans to track +- * the status of every page and an offset to the free page to use +- * for the next allocation. The offset starts from 0 and can only grow, +- * and be reset when the end of the memory region is reached. +- * +- * In order to create a memory allocation, the allocator simply looks +- * at the offset and verifies whether there are enough free pages +- * after it to accommodate the allocation request. If successful, +- * the allocator shall mark all the pages as allocated and increment +- * the offset accordingly. +- * +- * The allocator does not look for any other free pages inside the +- * memory region, even if plenty of free memory is available. +- * Free memory pages are counted and the offset is ignored if the +- * memory region is fully allocated. +- */ +- +- /* The only candidate for allocation is the sub-region starting +- * from the free_pa_offset. Verify that enough contiguous pages +- * are available and that they are all free. +- */ +- num_pages = (size_t)1 << order; +- +- if (epma_dev->num_free_pages < num_pages) +- dev_err(epma_dev->dev, "not enough free pages\n"); +- +- if (epma_dev->free_pa_offset + num_pages > epma_dev->rmem_size) { +- dev_err(epma_dev->dev, "not enough contiguous pages\n"); +- return NULL; +- } +- +- for (i = 0; i < num_pages; i++) +- if (epma_dev->alloc_pages[epma_dev->free_pa_offset + i]) +- break; +- +- if (i < num_pages) { +- dev_err(epma_dev->dev, "free pages are not contiguous\n"); +- return NULL; +- } +- +- /* Memory allocation is successful. Mark pages as allocated. +- * Update the free_pa_offset if free pages are still available: +- * increment the free_pa_offset accordingly, and then making sure +- * that it points at the next free page, potentially wrapping over +- * the end of the memory region. +- */ +- pma = devm_kzalloc(epma_dev->dev, sizeof(*pma), GFP_KERNEL); +- if (!pma) +- return NULL; +- +- pma->pa = epma_dev->rmem_base + (epma_dev->free_pa_offset << PAGE_SHIFT); +- pma->order = order; +- +- for (i = 0; i < num_pages; i++) +- epma_dev->alloc_pages[epma_dev->free_pa_offset + i] = true; +- +- epma_dev->num_free_pages -= num_pages; +- +- if (epma_dev->num_free_pages) { +- epma_dev->free_pa_offset += num_pages; +- i = 0; +- while (epma_dev->alloc_pages[epma_dev->free_pa_offset + i]) { +- epma_dev->free_pa_offset++; +- if (epma_dev->free_pa_offset > epma_dev->rmem_size) +- epma_dev->free_pa_offset = 0; +- } +- } +- +- return pma; +-} +- +-static phys_addr_t simple_pma_get_phys_addr( +- struct protected_memory_allocator_device *pma_dev, +- struct protected_memory_allocation *pma) +-{ +- struct simple_pma_device *const epma_dev = +- container_of(pma_dev, struct simple_pma_device, pma_dev); +- +- dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", +- __func__, (void *)pma_dev, (void *)pma, pma->pa); +- +- return pma->pa; +-} +- +-static void simple_pma_free_page( +- struct protected_memory_allocator_device *pma_dev, +- struct protected_memory_allocation *pma) +-{ +- struct simple_pma_device *const epma_dev = +- container_of(pma_dev, struct simple_pma_device, pma_dev); +- size_t num_pages; +- size_t offset; +- size_t i; +- +- dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", +- __func__, (void *)pma_dev, (void *)pma, pma->pa); +- +- /* This is an example function that follows an extremely simple logic +- * and is vulnerable to abuse. For instance, double frees won't be +- * detected. +- * +- * If memory is full, must update the free_pa_offset that is currently +- * pointing at an allocated page. +- * +- * Increase the number of free pages and mark them as free. +- */ +- offset = (pma->pa - epma_dev->rmem_base) >> PAGE_SHIFT; +- num_pages = (size_t)1 << pma->order; +- +- if (epma_dev->num_free_pages == 0) +- epma_dev->free_pa_offset = offset; +- +- epma_dev->num_free_pages += num_pages; +- for (i = 0; i < num_pages; i++) +- epma_dev->alloc_pages[offset + i] = false; +- +- devm_kfree(epma_dev->dev, pma); +-} +- +-static int protected_memory_allocator_probe(struct platform_device *pdev) +-{ +- struct simple_pma_device *epma_dev; +- struct device_node *np; +- phys_addr_t rmem_base; +- size_t rmem_size; +-#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE) +- struct reserved_mem *rmem; +-#endif +- +- np = pdev->dev.of_node; +- +- if (!np) { +- dev_err(&pdev->dev, "device node pointer not set\n"); +- return -ENODEV; +- } +- +- np = of_parse_phandle(np, "memory-region", 0); +- if (!np) { +- dev_err(&pdev->dev, "memory-region node not set\n"); +- return -ENODEV; +- } +- +-#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE) +- rmem = of_reserved_mem_lookup(np); +- if (rmem) { +- rmem_base = rmem->base; +- rmem_size = rmem->size >> PAGE_SHIFT; +- } else +-#endif +- { +- of_node_put(np); +- dev_err(&pdev->dev, "could not read reserved memory-region\n"); +- return -ENODEV; +- } +- +- of_node_put(np); +- epma_dev = devm_kzalloc(&pdev->dev, sizeof(*epma_dev), GFP_KERNEL); +- if (!epma_dev) +- return -ENOMEM; +- +- epma_dev->pma_dev.ops.pma_alloc_page = simple_pma_alloc_page; +- epma_dev->pma_dev.ops.pma_get_phys_addr = simple_pma_get_phys_addr; +- epma_dev->pma_dev.ops.pma_free_page = simple_pma_free_page; +- epma_dev->pma_dev.owner = THIS_MODULE; +- epma_dev->dev = &pdev->dev; +- epma_dev->rmem_base = rmem_base; +- epma_dev->rmem_size = rmem_size; +- epma_dev->free_pa_offset = 0; +- epma_dev->num_free_pages = rmem_size; +- +- epma_dev->alloc_pages = devm_kzalloc(&pdev->dev, +- sizeof(bool) * epma_dev->rmem_size, GFP_KERNEL); +- +- if (!epma_dev->alloc_pages) { +- dev_err(&pdev->dev, "failed to allocate resources\n"); +- devm_kfree(&pdev->dev, epma_dev); +- return -ENOMEM; +- } +- +- platform_set_drvdata(pdev, &epma_dev->pma_dev); +- dev_info(&pdev->dev, +- "Protected memory allocator probed successfully\n"); +- dev_info(&pdev->dev, "Protected memory region: base=%llx num pages=%zu\n", +- rmem_base, rmem_size); +- +- return 0; +-} +- +-static int protected_memory_allocator_remove(struct platform_device *pdev) +-{ +- struct protected_memory_allocator_device *pma_dev = +- platform_get_drvdata(pdev); +- struct simple_pma_device *epma_dev; +- struct device *dev; +- +- if (!pma_dev) +- return -EINVAL; +- +- epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev); +- dev = epma_dev->dev; +- +- if (epma_dev->num_free_pages < epma_dev->rmem_size) { +- dev_warn(&pdev->dev, "Leaking %zu pages of protected memory\n", +- epma_dev->rmem_size - epma_dev->num_free_pages); +- } +- +- platform_set_drvdata(pdev, NULL); +- devm_kfree(dev, epma_dev->alloc_pages); +- devm_kfree(dev, epma_dev); +- +- dev_info(&pdev->dev, +- "Protected memory allocator removed successfully\n"); +- +- return 0; +-} +- +-static const struct of_device_id protected_memory_allocator_dt_ids[] = { +- { .compatible = "arm,protected-memory-allocator" }, +- { /* sentinel */ } +-}; +-MODULE_DEVICE_TABLE(of, protected_memory_allocator_dt_ids); +- +-static struct platform_driver protected_memory_allocator_driver = { +- .probe = protected_memory_allocator_probe, +- .remove = protected_memory_allocator_remove, +- .driver = { +- .name = "simple_protected_memory_allocator", +- .owner = THIS_MODULE, +- .of_match_table = of_match_ptr(protected_memory_allocator_dt_ids), +- } +-}; +- +-module_platform_driver(protected_memory_allocator_driver); +- +-MODULE_LICENSE("GPL"); +-MODULE_AUTHOR("ARM Ltd."); +-MODULE_VERSION("1.0"); +diff --git a/dvalin/kernel/drivers/gpu/arm/Kbuild b/dvalin/kernel/drivers/gpu/arm/Kbuild +index 1a6fa3c..52ea5fb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,9 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- + + obj-$(CONFIG_MALI_MIDGARD) += midgard/ +diff --git a/dvalin/kernel/drivers/gpu/arm/Kconfig b/dvalin/kernel/drivers/gpu/arm/Kconfig +index 693b86f..2da8c98 100644 +--- a/dvalin/kernel/drivers/gpu/arm/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/Kconfig +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012, 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,7 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- + + menu "ARM GPU Configuration" + source "drivers/gpu/arm/midgard/Kconfig" +diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/Kbuild b/dvalin/kernel/drivers/gpu/arm/Makefile +similarity index 77% +rename from dvalin/kernel/drivers/base/protected_memory_allocator/Kbuild +rename to dvalin/kernel/drivers/gpu/arm/Makefile +index 241aeb9..ea9ecc7 100644 +--- a/dvalin/kernel/drivers/base/protected_memory_allocator/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/Makefile +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + +-obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) := protected_memory_allocator.o +\ No newline at end of file ++include midgard/Makefile +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild +index fa52548..d3c4ee1 100755 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,202 +16,240 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # ++ ++# make $(src) as absolute path if it is not already, by prefixing $(srctree) ++# This is to prevent any build issue due to wrong path. ++src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) ++ + # ++# Prevent misuse when Kernel configurations are not present by default ++# in out-of-tree builds ++# ++ifneq ($(CONFIG_ANDROID),n) ++ifeq ($(CONFIG_GPU_TRACEPOINTS),n) ++ $(error CONFIG_GPU_TRACEPOINTS must be set in Kernel configuration) ++endif ++endif + ++ifeq ($(CONFIG_DMA_SHARED_BUFFER),n) ++ $(error CONFIG_DMA_SHARED_BUFFER must be set in Kernel configuration) ++endif + +-# Driver version string which is returned to userspace via an ioctl +-MALI_RELEASE_NAME ?= "r25p0-01rel0" ++ifeq ($(CONFIG_PM_DEVFREQ),n) ++ $(error CONFIG_PM_DEVFREQ must be set in Kernel configuration) ++endif + +-# Paths required for build +-# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +-src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) +-KBASE_PATH = $(src) +-KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy +-UMP_PATH = $(src)/../../../base ++ifeq ($(CONFIG_DEVFREQ_THERMAL),n) ++ $(error CONFIG_DEVFREQ_THERMAL must be set in Kernel configuration) ++endif ++ ++ifeq ($(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND),n) ++ $(error CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND must be set in Kernel configuration) ++endif ++ ++ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y) ++ ifneq ($(CONFIG_DEBUG_FS), y) ++ $(error CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS depends on CONFIG_DEBUG_FS to be set in Kernel configuration) ++ endif ++endif ++ ++ifeq ($(CONFIG_MALI_FENCE_DEBUG), y) ++ ifneq ($(CONFIG_SYNC), y) ++ ifneq ($(CONFIG_SYNC_FILE), y) ++ $(error CONFIG_MALI_FENCE_DEBUG depends on CONFIG_SYNC || CONFIG_SYNC_FILE to be set in Kernel configuration) ++ endif ++ endif ++endif + ++# ++# Configurations ++# ++ ++# Driver version string which is returned to userspace via an ioctl ++MALI_RELEASE_NAME ?= '"r32p1-01bet0"' + # Set up defaults if not defined by build system +-MALI_CUSTOMER_RELEASE ?= 1 +-MALI_USE_CSF ?= 0 +-MALI_UNIT_TEST ?= 0 +-MALI_KERNEL_TEST_API ?= 0 ++ifeq ($(CONFIG_MALI_DEBUG), y) ++ MALI_UNIT_TEST = 1 ++ MALI_CUSTOMER_RELEASE ?= 0 ++else ++ MALI_UNIT_TEST ?= 0 ++ MALI_CUSTOMER_RELEASE ?= 1 ++endif + MALI_COVERAGE ?= 0 ++ + CONFIG_MALI_PLATFORM_NAME ?= "devicetree" ++ ++# Kconfig passes in the name with quotes for in-tree builds - remove them. ++MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ MALI_JIT_PRESSURE_LIMIT_BASE = 0 ++ MALI_USE_CSF = 1 ++else ++ MALI_JIT_PRESSURE_LIMIT_BASE ?= 1 ++ MALI_USE_CSF ?= 0 ++endif ++ ++ifneq ($(CONFIG_MALI_KUTF), n) ++ MALI_KERNEL_TEST_API ?= 1 ++else ++ MALI_KERNEL_TEST_API ?= 0 ++endif ++ + # Experimental features (corresponding -D definition should be appended to +-# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, ++# ccflags-y below, e.g. for MALI_EXPERIMENTAL_FEATURE, + # -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) + # + # Experimental features must default to disabled, e.g.: + # MALI_EXPERIMENTAL_FEATURE ?= 0 +-MALI_JIT_PRESSURE_LIMIT ?= 0 + MALI_INCREMENTAL_RENDERING ?= 0 + +-# Set up our defines, which will be passed to gcc +-DEFINES = \ +- -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ +- -DMALI_USE_CSF=$(MALI_USE_CSF) \ +- -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ +- -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ +- -DMALI_COVERAGE=$(MALI_COVERAGE) \ +- -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ +- -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \ +- -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) ++# ++# ccflags ++# ++ccflags-y = \ ++ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ ++ -DMALI_USE_CSF=$(MALI_USE_CSF) \ ++ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ ++ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ ++ -DMALI_COVERAGE=$(MALI_COVERAGE) \ ++ -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ ++ -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ ++ -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \ ++ -DMALI_KBASE_BUILD \ ++ -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) ++ + + ifeq ($(KBUILD_EXTMOD),) + # in-tree +-DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) ++ ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) + else + # out-of-tree +-DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) +-endif +- +-DEFINES += -I$(srctree)/drivers/staging/android +-#meson graphics start +-ldflags-y += --strip-debug +-#meson graphics end +- +-DEFINES += -DMALI_KBASE_BUILD +- +-# Use our defines when compiling +-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +-subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +- +-SRC := \ +- context/mali_kbase_context.c \ +- debug/mali_kbase_debug_ktrace.c \ +- device/mali_kbase_device.c \ +- mali_kbase_cache_policy.c \ +- mali_kbase_mem.c \ +- mali_kbase_mem_pool_group.c \ +- mali_kbase_native_mgm.c \ +- mali_kbase_ctx_sched.c \ +- mali_kbase_jm.c \ +- mali_kbase_gpuprops.c \ +- mali_kbase_pm.c \ +- mali_kbase_config.c \ +- mali_kbase_vinstr.c \ +- mali_kbase_hwcnt.c \ +- mali_kbase_hwcnt_backend_gpu.c \ +- mali_kbase_hwcnt_gpu.c \ +- mali_kbase_hwcnt_legacy.c \ +- mali_kbase_hwcnt_types.c \ +- mali_kbase_hwcnt_virtualizer.c \ +- mali_kbase_softjobs.c \ +- mali_kbase_hw.c \ +- mali_kbase_debug.c \ +- mali_kbase_gpu_memory_debugfs.c \ +- mali_kbase_mem_linux.c \ +- mali_kbase_core_linux.c \ +- mali_kbase_mem_profile_debugfs.c \ +- mmu/mali_kbase_mmu.c \ +- mmu/mali_kbase_mmu_hw_direct.c \ +- mmu/mali_kbase_mmu_mode_lpae.c \ +- mmu/mali_kbase_mmu_mode_aarch64.c \ +- mali_kbase_disjoint_events.c \ +- mali_kbase_debug_mem_view.c \ +- mali_kbase_smc.c \ +- mali_kbase_mem_pool.c \ +- mali_kbase_mem_pool_debugfs.c \ +- mali_kbase_debugfs_helper.c \ +- mali_kbase_strings.c \ +- mali_kbase_as_fault_debugfs.c \ +- mali_kbase_regs_history_debugfs.c \ +- thirdparty/mali_kbase_mmap.c \ +- tl/mali_kbase_timeline.c \ +- tl/mali_kbase_timeline_io.c \ +- tl/mali_kbase_tlstream.c \ +- tl/mali_kbase_tracepoints.c \ +- gpu/mali_kbase_gpu.c +- +-ifeq ($(MALI_USE_CSF),1) +- SRC += \ +- debug/backend/mali_kbase_debug_ktrace_csf.c \ +- device/backend/mali_kbase_device_csf.c \ +- gpu/backend/mali_kbase_gpu_fault_csf.c \ +- tl/backend/mali_kbase_timeline_csf.c \ +- mmu/backend/mali_kbase_mmu_csf.c \ +- context/backend/mali_kbase_context_csf.c +-else +- SRC += \ +- mali_kbase_dummy_job_wa.c \ +- mali_kbase_debug_job_fault.c \ +- mali_kbase_event.c \ +- mali_kbase_jd.c \ +- mali_kbase_jd_debugfs.c \ +- mali_kbase_js.c \ +- mali_kbase_js_ctx_attr.c \ +- debug/backend/mali_kbase_debug_ktrace_jm.c \ +- device/backend/mali_kbase_device_jm.c \ +- gpu/backend/mali_kbase_gpu_fault_jm.c \ +- tl/backend/mali_kbase_timeline_jm.c \ +- mmu/backend/mali_kbase_mmu_jm.c \ +- context/backend/mali_kbase_context_jm.c ++ ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) + endif + +-ifeq ($(CONFIG_MALI_CINSTR_GWT),y) +- SRC += mali_kbase_gwt.c +-endif ++ccflags-y += \ ++ -I$(srctree)/include/linux \ ++ -I$(srctree)/drivers/staging/android \ ++ -I$(src) \ ++ -I$(src)/platform/$(MALI_PLATFORM_DIR) \ ++ -I$(src)/../../../base \ ++ -I$(src)/../../../../include + +-ifeq ($(MALI_UNIT_TEST),1) +- SRC += tl/mali_kbase_timeline_test.c +-endif ++subdir-ccflags-y += $(ccflags-y) + +-ifeq ($(MALI_CUSTOMER_RELEASE),0) +- SRC += mali_kbase_regs_dump_debugfs.c +-endif ++# ++# Kernel Modules ++# ++obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o ++obj-$(CONFIG_MALI_ARBITRATION) += arbitration/ ++obj-$(CONFIG_MALI_KUTF) += tests/ ++ ++mali_kbase-y := \ ++ mali_kbase_cache_policy.o \ ++ mali_kbase_ccswe.o \ ++ mali_kbase_mem.o \ ++ mali_kbase_mem_pool_group.o \ ++ mali_kbase_native_mgm.o \ ++ mali_kbase_ctx_sched.o \ ++ mali_kbase_gpuprops.o \ ++ mali_kbase_pm.o \ ++ mali_kbase_config.o \ ++ mali_kbase_vinstr.o \ ++ mali_kbase_hwcnt.o \ ++ mali_kbase_hwcnt_gpu.o \ ++ mali_kbase_hwcnt_legacy.o \ ++ mali_kbase_hwcnt_types.o \ ++ mali_kbase_hwcnt_virtualizer.o \ ++ mali_kbase_softjobs.o \ ++ mali_kbase_hw.o \ ++ mali_kbase_debug.o \ ++ mali_kbase_gpu_memory_debugfs.o \ ++ mali_kbase_mem_linux.o \ ++ mali_kbase_core_linux.o \ ++ mali_kbase_mem_profile_debugfs.o \ ++ mali_kbase_disjoint_events.o \ ++ mali_kbase_debug_mem_view.o \ ++ mali_kbase_smc.o \ ++ mali_kbase_mem_pool.o \ ++ mali_kbase_mem_pool_debugfs.o \ ++ mali_kbase_debugfs_helper.o \ ++ mali_kbase_strings.o \ ++ mali_kbase_as_fault_debugfs.o \ ++ mali_kbase_regs_history_debugfs.o \ ++ mali_kbase_dvfs_debugfs.o \ ++ mali_power_gpu_frequency_trace.o \ ++ mali_kbase_trace_gpu_mem.o ++ ++mali_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o + ++mali_kbase-$(CONFIG_SYNC) += \ ++ mali_kbase_sync_android.o \ ++ mali_kbase_sync_common.o + +-ccflags-y += -I$(KBASE_PATH) -I$(KBASE_PATH)/debug \ +- -I$(KBASE_PATH)/debug/backend ++mali_kbase-$(CONFIG_SYNC_FILE) += \ ++ mali_kbase_fence_ops.o \ ++ mali_kbase_sync_file.o \ ++ mali_kbase_sync_common.o ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += \ ++ mali_kbase_hwcnt_backend_csf.o \ ++ mali_kbase_hwcnt_backend_csf_if_fw.o ++else ++ mali_kbase-y += \ ++ mali_kbase_jm.o \ ++ mali_kbase_hwcnt_backend_jm.o \ ++ mali_kbase_dummy_job_wa.o \ ++ mali_kbase_debug_job_fault.o \ ++ mali_kbase_event.o \ ++ mali_kbase_jd.o \ ++ mali_kbase_jd_debugfs.o \ ++ mali_kbase_js.o \ ++ mali_kbase_js_ctx_attr.o \ ++ mali_kbase_kinstr_jm.o ++ ++ mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ ++ mali_kbase_fence_ops.o \ ++ mali_kbase_dma_fence.o \ ++ mali_kbase_fence.o ++ ++ mali_kbase-$(CONFIG_SYNC_FILE) += \ ++ mali_kbase_fence_ops.o \ ++ mali_kbase_fence.o ++endif + +-# Tell the Linux build system from which .o file to create the kernel module +-obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o + +-# Tell the Linux build system to enable building of our .c files +-mali_kbase-y := $(SRC:.c=.o) ++INCLUDE_SUBDIR = \ ++ $(src)/context/Kbuild \ ++ $(src)/debug/Kbuild \ ++ $(src)/device/Kbuild \ ++ $(src)/backend/gpu/Kbuild \ ++ $(src)/mmu/Kbuild \ ++ $(src)/tl/Kbuild \ ++ $(src)/gpu/Kbuild \ ++ $(src)/thirdparty/Kbuild \ ++ $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild + +-# Kconfig passes in the name with quotes for in-tree builds - remove them. +-platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) +-MALI_PLATFORM_DIR := platform/$(platform_name) +-ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR) +-include $(src)/$(MALI_PLATFORM_DIR)/Kbuild ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ INCLUDE_SUBDIR += $(src)/csf/Kbuild ++endif + +-ifeq ($(CONFIG_MALI_DEVFREQ),y) +- ifeq ($(CONFIG_DEVFREQ_THERMAL),y) +- include $(src)/ipa/Kbuild +- endif ++ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) ++ INCLUDE_SUBDIR += $(src)/arbiter/Kbuild + endif + +-ifeq ($(MALI_USE_CSF),1) +- include $(src)/csf/Kbuild +-else +-# empty ++ifeq ($(CONFIG_MALI_DEVFREQ),y) ++ ifeq ($(CONFIG_DEVFREQ_THERMAL),y) ++ INCLUDE_SUBDIR += $(src)/ipa/Kbuild ++ endif + endif + +-ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) +- include $(src)/arbiter/Kbuild ++ifeq ($(KBUILD_EXTMOD),) ++# in-tree ++ -include $(INCLUDE_SUBDIR) + else +-# empty ++# out-of-tree ++ include $(INCLUDE_SUBDIR) + endif +- +-mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ +- mali_kbase_dma_fence.o \ +- mali_kbase_fence.o +-mali_kbase-$(CONFIG_SYNC) += \ +- mali_kbase_sync_android.o \ +- mali_kbase_sync_common.o +-mali_kbase-$(CONFIG_SYNC_FILE) += \ +- mali_kbase_sync_file.o \ +- mali_kbase_sync_common.o \ +- mali_kbase_fence.o +- +-include $(src)/backend/gpu/Kbuild +-mali_kbase-y += $(BACKEND:.c=.o) +- +- +-ccflags-y += -I$(src)/backend/gpu +-subdir-ccflags-y += -I$(src)/backend/gpu +- +-# For kutf and mali_kutf_irq_latency_test +-obj-$(CONFIG_MALI_KUTF) += tests/ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig +index ca59dbb..5541383 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,15 +16,14 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- + + menuconfig MALI_MIDGARD + tristate "Mali Midgard series support" + select GPU_TRACEPOINTS if ANDROID + select DMA_SHARED_BUFFER ++ select PM_DEVFREQ ++ select DEVFREQ_THERMAL + default n + help + Enable this option to build support for a ARM Mali Midgard GPU. +@@ -31,13 +31,43 @@ menuconfig MALI_MIDGARD + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +-config MALI_GATOR_SUPPORT +- bool "Enable Streamline tracing support" ++if MALI_MIDGARD ++ ++config MALI_PLATFORM_NAME + depends on MALI_MIDGARD ++ string "Platform name" ++ default "devicetree" ++ help ++ Enter the name of the desired platform configuration directory to ++ include in the build. 'platform/$(MALI_PLATFORM_NAME)/Makefile' must ++ exist. ++ ++config MALI_REAL_HW ++ depends on MALI_MIDGARD ++ def_bool !MALI_NO_MALI ++ ++menu "Platform specific options" ++source "drivers/gpu/arm/midgard/platform/Kconfig" ++endmenu ++ ++config MALI_CSF_SUPPORT ++ bool "Enable Mali CSF based GPU support" ++ depends on MALI_MIDGARD=m ++ default n ++ help ++ Enables support for CSF based GPUs. ++ ++config MALI_DEVFREQ ++ bool "Enable devfreq support for Mali" ++ depends on MALI_MIDGARD && PM_DEVFREQ ++ select DEVFREQ_GOV_SIMPLE_ONDEMAND + default y + help +- Enables kbase tracing used by the Arm Streamline Performance Analyzer. +- The tracepoints are used to derive GPU activity charts in Streamline. ++ Support devfreq for Mali. ++ ++ Using the devfreq framework and, by default, the simple on-demand ++ governor, the frequency of Mali will be dynamically selected from the ++ available OPPs. + + config MALI_MIDGARD_DVFS + bool "Enable legacy DVFS" +@@ -46,28 +76,25 @@ config MALI_MIDGARD_DVFS + help + Choose this option to enable legacy DVFS in the Mali Midgard DDK. + ++config MALI_GATOR_SUPPORT ++ bool "Enable Streamline tracing support" ++ depends on MALI_MIDGARD ++ default y ++ help ++ Enables kbase tracing used by the Arm Streamline Performance Analyzer. ++ The tracepoints are used to derive GPU activity charts in Streamline. ++ + config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default y if MALI_DEBUG + default n + help +- Enables tracing in kbase. Trace log available through ++ Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +-config MALI_DEVFREQ +- bool "devfreq support for Mali" +- depends on MALI_MIDGARD && PM_DEVFREQ +- default y +- help +- Support devfreq for Mali. +- +- Using the devfreq framework and, by default, the simpleondemand +- governor, the frequency of Mali will be dynamically selected from the +- available OPPs. +- + config MALI_DMA_FENCE +- bool "DMA_BUF fence support for Mali" ++ bool "Enable DMA_BUF fence support for Mali" + depends on MALI_MIDGARD + default n + help +@@ -76,18 +103,9 @@ config MALI_DMA_FENCE + This option should only be enabled if the Linux Kernel has built in + support for DMA_BUF fences. + +-config MALI_PLATFORM_NAME +- depends on MALI_MIDGARD +- string "Platform name" +- default "devicetree" +- help +- Enter the name of the desired platform configuration directory to +- include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must +- exist. +- + config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" +- depends on MALI_MIDGARD ++ depends on MALI_MIDGARD && !MALI_CSF_SUPPORT + default n + help + Enable support for the arbiter interface in the driver. +@@ -96,18 +114,64 @@ config MALI_ARBITER_SUPPORT + + If unsure, say N. + +-# MALI_EXPERT configuration options ++config MALI_DMA_BUF_MAP_ON_DEMAND ++ bool "Enable map imported dma-bufs on demand" ++ depends on MALI_MIDGARD ++ default n ++ help ++ This option caused kbase to set up the GPU mapping of imported ++ dma-buf when needed to run atoms. This is the legacy behavior. ++ ++ This is intended for testing and the option will get removed in the ++ future. ++ ++config MALI_DMA_BUF_LEGACY_COMPAT ++ bool "Enable legacy compatibility cache flush on dma-buf map" ++ depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND ++ default n ++ help ++ This option enables compatibility with legacy dma-buf mapping ++ behavior, then the dma-buf is mapped on import, by adding cache ++ maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, ++ including a cache flush. ++ ++ This option might work-around issues related to missing cache ++ flushes in other drivers. This only has an effect for clients using ++ UK 11.18 or older. For later UK versions it is not possible. + + menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default n + help +- Enabling this option and modifying the default settings may produce a driver with performance or +- other limitations. ++ Enabling this option and modifying the default settings may produce ++ a driver with performance or other limitations. ++ ++if MALI_EXPERT ++ ++config MALI_2MB_ALLOC ++ bool "Attempt to allocate 2MB pages" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ Rather than allocating all GPU memory page-by-page, attempt to ++ allocate 2MB pages from the kernel. This reduces TLB pressure and ++ helps to prevent memory fragmentation. ++ ++ If in doubt, say N ++ ++config MALI_MEMORY_FULLY_BACKED ++ bool "Enable memory fully physically-backed" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ This option enables full physical backing of all virtual ++ memory allocations in the kernel. Notice that this build ++ option only affects allocations of grow-on-GPU-page-fault ++ memory. + + config MALI_CORESTACK +- bool "Support controlling power to the GPU core stack" ++ bool "Enable support of GPU core stack power control" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help +@@ -119,15 +183,48 @@ config MALI_CORESTACK + + If unsure, say N. + ++comment "Platform options" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ ++config MALI_NO_MALI ++ bool "Enable No Mali" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ This can be used to test the driver in a simulated environment ++ whereby the hardware is not physically present. If the hardware is physically ++ present it will not be used. This can be used to test the majority of the ++ driver without needing actual hardware or for software benchmarking. ++ All calls to the simulated hardware will complete immediately as if the hardware ++ completed the task. ++ ++config MALI_ERROR_INJECT ++ bool "Enable No Mali error injection" ++ depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI ++ default n ++ help ++ Enables insertion of errors to test module failure and recovery mechanisms. ++ ++config MALI_GEM5_BUILD ++ bool "Enable build of Mali kernel driver for GEM5" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ This option is to do a Mali GEM5 build. ++ If unsure, say N. ++ ++comment "Debug options" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ + config MALI_DEBUG +- bool "Debug build" ++ bool "Enable debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option for increased checking and reporting of errors. + + config MALI_FENCE_DEBUG +- bool "Debug sync fence usage" ++ bool "Enable debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) + default y if MALI_DEBUG + help +@@ -143,28 +240,6 @@ config MALI_FENCE_DEBUG + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + +-config MALI_NO_MALI +- bool "No Mali" +- depends on MALI_MIDGARD && MALI_EXPERT +- default n +- help +- This can be used to test the driver in a simulated environment +- whereby the hardware is not physically present. If the hardware is physically +- present it will not be used. This can be used to test the majority of the +- driver without needing actual hardware or for software benchmarking. +- All calls to the simulated hardware will complete immediately as if the hardware +- completed the task. +- +-config MALI_REAL_HW +- def_bool !MALI_NO_MALI +- +-config MALI_ERROR_INJECT +- bool "Error injection" +- depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI +- default n +- help +- Enables insertion of errors to test module failure and recovery mechanisms. +- + config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT +@@ -176,63 +251,93 @@ config MALI_SYSTEM_TRACE + minimal overhead when not in use. Enable only if you know what + you are doing. + +-config MALI_2MB_ALLOC +- bool "Attempt to allocate 2MB pages" ++comment "Instrumentation options" + depends on MALI_MIDGARD && MALI_EXPERT +- default n +- help +- Rather than allocating all GPU memory page-by-page, attempt to +- allocate 2MB pages from the kernel. This reduces TLB pressure and +- helps to prevent memory fragmentation. + +- If in doubt, say N ++choice ++ prompt "Select Performance counters set" ++ default MALI_PRFCNT_SET_PRIMARY ++ depends on MALI_MIDGARD && MALI_EXPERT + +-config MALI_PWRSOFT_765 +- bool "PWRSOFT-765 ticket" ++config MALI_PRFCNT_SET_PRIMARY ++ bool "Primary" + depends on MALI_MIDGARD && MALI_EXPERT +- default n + help +- PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged +- in kernel v4.10, however if backported into the kernel then this +- option must be manually selected. ++ Select this option to use primary set of performance counters. + +- If using kernel >= v4.10 then say N, otherwise if devfreq cooling +- changes have been backported say Y to avoid compilation errors. ++config MALI_PRFCNT_SET_SECONDARY ++ bool "Secondary" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ help ++ Select this option to use secondary set of performance counters. Kernel ++ features that depend on an access to the primary set of counters may ++ become unavailable. Enabling this option will prevent power management ++ from working optimally and may cause instrumentation tools to return ++ bogus results. + +-config MALI_MEMORY_FULLY_BACKED +- bool "Memory fully physically-backed" ++ If unsure, use MALI_PRFCNT_SET_PRIMARY. ++ ++config MALI_PRFCNT_SET_TERTIARY ++ bool "Tertiary" + depends on MALI_MIDGARD && MALI_EXPERT ++ help ++ Select this option to use tertiary set of performance counters. Kernel ++ features that depend on an access to the primary set of counters may ++ become unavailable. Enabling this option will prevent power management ++ from working optimally and may cause instrumentation tools to return ++ bogus results. ++ ++ If unsure, use MALI_PRFCNT_SET_PRIMARY. ++ ++endchoice ++ ++config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ bool "Enable runtime selection of performance counters set via debugfs" ++ depends on MALI_MIDGARD && MALI_EXPERT && DEBUG_FS + default n + help +- This option enables full physical backing of all virtual +- memory allocations in the kernel. Notice that this build +- option only affects allocations of grow-on-GPU-page-fault +- memory. ++ Select this option to make the secondary set of performance counters ++ available at runtime via debugfs. Kernel features that depend on an ++ access to the primary set of counters may become unavailable. + +-config MALI_DMA_BUF_MAP_ON_DEMAND +- bool "Map imported dma-bufs on demand" +- depends on MALI_MIDGARD ++ If no runtime debugfs option is set, the build time counter set ++ choice will be used. ++ ++ This feature is unsupported and unstable, and may break at any time. ++ Enabling this option will prevent power management from working ++ optimally and may cause instrumentation tools to return bogus results. ++ ++ No validation is done on the debugfs input. Invalid input could cause ++ performance counter errors. Valid inputs are the values accepted by ++ the SET_SELECT bits of the PRFCNT_CONFIG register as defined in the ++ architecture specification. ++ ++ If unsure, say N. ++ ++config MALI_JOB_DUMP ++ bool "Enable system level support needed for job dumping" ++ depends on MALI_MIDGARD && MALI_EXPERT + default n + help +- This option caused kbase to set up the GPU mapping of imported +- dma-buf when needed to run atoms. This is the legacy behaviour. ++ Choose this option to enable system level support needed for ++ job dumping. This is typically used for instrumentation but has ++ minimal overhead when not in use. Enable only if you know what ++ you are doing. + +- This is intended for testing and the option will get removed in the +- future. ++comment "Workarounds" ++ depends on MALI_MIDGARD && MALI_EXPERT + +-config MALI_DMA_BUF_LEGACY_COMPAT +- bool "Enable legacy compatibility cache flush on dma-buf map" +- depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND ++config MALI_PWRSOFT_765 ++ bool "Enable workaround for PWRSOFT-765" ++ depends on MALI_MIDGARD && MALI_EXPERT + default n + help +- This option enables compatibility with legacy dma-buf mapping +- behavior, then the dma-buf is mapped on import, by adding cache +- maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, +- including a cache flush. ++ PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged ++ in kernel v4.10, however if backported into the kernel then this ++ option must be manually selected. + +- This option might work-around issues related to missing cache +- flushes in other drivers. This only has an effect for clients using +- UK 11.18 or older. For later UK versions it is not possible. ++ If using kernel >= v4.10 then say N, otherwise if devfreq cooling ++ changes have been backported say Y to avoid compilation errors. + + config MALI_HW_ERRATA_1485982_NOT_AFFECTED + bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" +@@ -252,58 +357,22 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE + default n + help + This option uses an alternative workaround for GPU2017-1336. Lowering +- the GPU clock to a, platform specific, known good frequeuncy before ++ the GPU clock to a, platform specific, known good frequency before + powering down the L2 cache. The clock can be specified in the device + tree using the property, opp-mali-errata-1485982. Otherwise the + slowest clock will be selected. + +-config MALI_GEM5_BUILD +- bool "Enable build of Mali kernel driver for GEM5" +- depends on MALI_MIDGARD +- default n +- help +- This option is to do a Mali GEM5 build. +- If unsure, say N. +- +-# Instrumentation options. ++endif + +-config MALI_JOB_DUMP +- bool "Enable system level support needed for job dumping" +- depends on MALI_MIDGARD && MALI_EXPERT +- default n +- help +- Choose this option to enable system level support needed for +- job dumping. This is typically used for instrumentation but has +- minimal overhead when not in use. Enable only if you know what +- you are doing. +- +-config MALI_PRFCNT_SET_SECONDARY +- bool "Use secondary set of performance counters" +- depends on MALI_MIDGARD && MALI_EXPERT +- default n +- help +- Select this option to use secondary set of performance counters. Kernel +- features that depend on an access to the primary set of counters may +- become unavailable. Enabling this option will prevent power management +- from working optimally and may cause instrumentation tools to return +- bogus results. +- +- If unsure, say N. +- +-config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +- bool "Use secondary set of performance counters" +- depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS ++config MALI_ARBITRATION ++ bool "Enable Virtualization reference code" ++ depends on MALI_MIDGARD + default n + help +- Select this option to make the secondary set of performance counters +- available at runtime via debugfs. Kernel features that depend on an +- access to the primary set of counters may become unavailable. +- +- This feature is unsupported and unstable, and may break at any time. +- Enabling this option will prevent power management from working +- optimally and may cause instrumentation tools to return bogus results. +- ++ Enables the build of several reference modules used in the reference ++ virtualization setup for Mali + If unsure, say N. + +-source "drivers/gpu/arm/midgard/platform/Kconfig" + source "drivers/gpu/arm/midgard/tests/Kconfig" ++ ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile b/dvalin/kernel/drivers/gpu/arm/midgard/Makefile +index 53a1209..4384e80 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/Makefile +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,24 +16,200 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # ++ ++KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build ++KDIR ?= $(KERNEL_SRC) ++ ++ifeq ($(KDIR),) ++ $(error Must specify KDIR to point to the kernel to target)) ++endif ++ ++# ++# Default configuration values ++# ++# Dependency resolution is done through statements as Kconfig ++# is not supported for out-of-tree builds. + # + ++CONFIG_MALI_MIDGARD ?= m ++ifeq ($(CONFIG_MALI_MIDGARD),m) ++ CONFIG_MALI_GATOR_SUPPORT ?= y ++ CONFIG_MALI_ARBITRATION ?= n ++ CONFIG_MALI_PARTITION_MANAGER ?= n ++ ++ ifneq ($(CONFIG_MALI_NO_MALI),y) ++ # Prevent misuse when CONFIG_MALI_NO_MALI=y ++ CONFIG_MALI_REAL_HW ?= y ++ endif ++ ++ ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y) ++ # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y ++ CONFIG_MALI_DEVFREQ ?= n ++ else ++ CONFIG_MALI_DEVFREQ ?= y ++ endif ++ ++ ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) ++ # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y ++ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n ++ endif ++ ++ ifeq ($(CONFIG_BSP_HAS_HYPERVISOR),y) ++ ifneq ($(CONFIG_MALI_ARBITRATION), n) ++ CONFIG_MALI_XEN ?= m ++ endif ++ endif ++ ++ # ++ # Expert/Debug/Test released configurations ++ # ++ ifeq ($(CONFIG_MALI_EXPERT), y) ++ ifeq ($(CONFIG_MALI_NO_MALI), y) ++ CONFIG_MALI_REAL_HW = n ++ else ++ # Prevent misuse when CONFIG_MALI_NO_MALI=n ++ CONFIG_MALI_REAL_HW = y ++ CONFIG_MALI_ERROR_INJECT = n ++ endif ++ ++ ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) ++ # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y ++ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n ++ endif + +-KDIR ?= /lib/modules/$(shell uname -r)/build ++ ifeq ($(CONFIG_MALI_DEBUG), y) ++ CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y ++ CONFIG_MALI_SYSTEM_TRACE ?= y + +-BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. +-KBASE_PATH_RELATIVE = $(CURDIR) ++ ifeq ($(CONFIG_SYNC), y) ++ CONFIG_MALI_FENCE_DEBUG ?= y ++ else ++ ifeq ($(CONFIG_SYNC_FILE), y) ++ CONFIG_MALI_FENCE_DEBUG ?= y ++ else ++ CONFIG_MALI_FENCE_DEBUG = n ++ endif ++ endif ++ else ++ # Prevent misuse when CONFIG_MALI_DEBUG=n ++ CONFIG_MALI_MIDGARD_ENABLE_TRACE = n ++ CONFIG_MALI_SYSTEM_TRACE = n ++ CONFIG_MALI_FENCE_DEBUG = n ++ endif ++ else ++ # Prevent misuse when CONFIG_MALI_EXPERT=n ++ CONFIG_MALI_CORESTACK = n ++ CONFIG_MALI_2MB_ALLOC = n ++ CONFIG_MALI_PWRSOFT_765 = n ++ CONFIG_MALI_MEMORY_FULLY_BACKED = n ++ CONFIG_MALI_JOB_DUMP = n ++ CONFIG_MALI_NO_MALI = n ++ CONFIG_MALI_REAL_HW = y ++ CONFIG_MALI_ERROR_INJECT = n ++ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n ++ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n ++ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n ++ CONFIG_MALI_DEBUG = n ++ CONFIG_MALI_MIDGARD_ENABLE_TRACE = n ++ CONFIG_MALI_SYSTEM_TRACE = n ++ CONFIG_MALI_FENCE_DEBUG = n ++ endif + +-ifeq ($(CONFIG_MALI_BUSLOG),y) +-#Add bus logger symbols +-EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers ++ ifeq ($(CONFIG_MALI_DEBUG), y) ++ CONFIG_MALI_KUTF ?= y ++ ifeq ($(CONFIG_MALI_KUTF), y) ++ CONFIG_MALI_KUTF_IRQ_TEST ?= y ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y ++ else ++ # Prevent misuse when CONFIG_MALI_KUTF=n ++ CONFIG_MALI_KUTF_IRQ_TEST = n ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n ++ endif ++ else ++ # Prevent misuse when CONFIG_MALI_DEBUG=n ++ CONFIG_MALI_KUTF = n ++ CONFIG_MALI_KUTF_IRQ_TEST = n ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n ++ endif ++else ++ # Prevent misuse when CONFIG_MALI_MIDGARD=n ++ CONFIG_MALI_ARBITRATION = n ++ CONFIG_MALI_KUTF = n ++ CONFIG_MALI_KUTF_IRQ_TEST = n ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + endif + +-# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions ++# All Mali CONFIG should be listed here ++CONFIGS := \ ++ CONFIG_MALI_MIDGARD \ ++ CONFIG_MALI_CSF_SUPPORT \ ++ CONFIG_MALI_GATOR_SUPPORT \ ++ CONFIG_MALI_DMA_FENCE \ ++ CONFIG_MALI_ARBITER_SUPPORT \ ++ CONFIG_MALI_ARBITRATION \ ++ CONFIG_MALI_PARTITION_MANAGER \ ++ CONFIG_MALI_REAL_HW \ ++ CONFIG_MALI_GEM5_BUILD \ ++ CONFIG_MALI_DEVFREQ \ ++ CONFIG_MALI_MIDGARD_DVFS \ ++ CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ ++ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ ++ CONFIG_MALI_EXPERT \ ++ CONFIG_MALI_CORESTACK \ ++ CONFIG_MALI_2MB_ALLOC \ ++ CONFIG_MALI_PWRSOFT_765 \ ++ CONFIG_MALI_MEMORY_FULLY_BACKED \ ++ CONFIG_MALI_JOB_DUMP \ ++ CONFIG_MALI_NO_MALI \ ++ CONFIG_MALI_ERROR_INJECT \ ++ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ ++ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ ++ CONFIG_MALI_PRFCNT_SET_PRIMARY \ ++ CONFIG_MALI_PRFCNT_SET_SECONDARY \ ++ CONFIG_MALI_PRFCNT_SET_TERTIARY \ ++ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ ++ CONFIG_MALI_DEBUG \ ++ CONFIG_MALI_MIDGARD_ENABLE_TRACE \ ++ CONFIG_MALI_SYSTEM_TRACE \ ++ CONFIG_MALI_FENCE_DEBUG \ ++ CONFIG_MALI_KUTF \ ++ CONFIG_MALI_KUTF_IRQ_TEST \ ++ CONFIG_MALI_KUTF_CLK_RATE_TRACE \ ++ CONFIG_MALI_XEN ++ ++ ++# ++# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build ++# ++# Generate the list of CONFIGs and values. ++# $(value config) is the name of the CONFIG option. ++# $(value $(value config)) is its value (y, m). ++# When the CONFIG is not set to y or m, it defaults to n. ++MAKE_ARGS := $(foreach config,$(CONFIGS), \ ++ $(if $(filter y m,$(value $(value config))), \ ++ $(value config)=$(value $(value config)), \ ++ $(value config)=n)) ++ ++# ++# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build ++# ++# Generate the list of CONFIGs defines with values from CONFIGS. ++# $(value config) is the name of the CONFIG option. ++# When set to y or m, the CONFIG gets defined to 1. ++EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ ++ $(if $(filter y m,$(value $(value config))), \ ++ -D$(value config)=1)) ++ ++# ++# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions ++# ++ + all: +- $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules ++ ++modules_install: ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + + clean: +- $(MAKE) -C $(KDIR) M=$(CURDIR) clean ++ $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig b/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig +index b137793..d71a113 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig +@@ -1,17 +1,22 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # +-# A copy of the licence is included with the program, and can also be obtained +-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +-# Boston, MA 02110-1301, USA. ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. + # + # +- + + menuconfig MALI_MIDGARD + bool "Mali Midgard series support" +@@ -22,13 +27,44 @@ menuconfig MALI_MIDGARD + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +-config MALI_GATOR_SUPPORT +- bool "Enable Streamline tracing support" +- depends on MALI_MIDGARD && !BACKEND_USER ++config MALI_PLATFORM_NAME ++ depends on MALI_MIDGARD ++ string "Platform name" ++ default "hisilicon" if PLATFORM_HIKEY960 ++ default "hisilicon" if PLATFORM_HIKEY970 ++ default "devicetree" ++ help ++ Enter the name of the desired platform configuration directory to ++ include in the build. 'platform/$(MALI_PLATFORM_NAME)/Makefile' must ++ exist. ++ ++ When PLATFORM_CUSTOM is set, this needs to be set manually to ++ pick up the desired platform files. ++ ++config MALI_REAL_HW ++ bool ++ depends on MALI_MIDGARD + default y ++ default n if NO_MALI ++ ++config MALI_CSF_SUPPORT ++ bool "Enable Mali CSF based GPU support" ++ depends on MALI_MIDGARD ++ default y if GPU_HAS_CSF + help +- Enables kbase tracing used by the Arm Streamline Performance Analyzer. +- The tracepoints are used to derive GPU activity charts in Streamline. ++ Enables support for CSF based GPUs. ++ ++config MALI_DEVFREQ ++ bool "Enable devfreq support for Mali" ++ depends on MALI_MIDGARD ++ default y if PLATFORM_JUNO ++ default y if PLATFORM_CUSTOM ++ help ++ Support devfreq for Mali. ++ ++ Using the devfreq framework and, by default, the simple on-demand ++ governor, the frequency of Mali will be dynamically selected from the ++ available OPPs. + + config MALI_MIDGARD_DVFS + bool "Enable legacy DVFS" +@@ -37,29 +73,25 @@ config MALI_MIDGARD_DVFS + help + Choose this option to enable legacy DVFS in the Mali Midgard DDK. + ++config MALI_GATOR_SUPPORT ++ bool "Enable Streamline tracing support" ++ depends on MALI_MIDGARD && !BACKEND_USER ++ default y ++ help ++ Enables kbase tracing used by the Arm Streamline Performance Analyzer. ++ The tracepoints are used to derive GPU activity charts in Streamline. ++ + config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default y if MALI_DEBUG + default n + help +- Enables tracing in kbase. Trace log available through ++ Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +-config MALI_DEVFREQ +- bool "devfreq support for Mali" +- depends on MALI_MIDGARD +- default y if PLATFORM_JUNO +- default y if PLATFORM_CUSTOM +- help +- Support devfreq for Mali. +- +- Using the devfreq framework and, by default, the simpleondemand +- governor, the frequency of Mali will be dynamically selected from the +- available OPPs. +- + config MALI_DMA_FENCE +- bool "DMA_BUF fence support for Mali" ++ bool "Enable DMA_BUF fence support for Mali" + depends on MALI_MIDGARD + default n + help +@@ -68,23 +100,9 @@ config MALI_DMA_FENCE + This option should only be enabled if the Linux Kernel has built in + support for DMA_BUF fences. + +-config MALI_PLATFORM_NAME +- depends on MALI_MIDGARD +- string "Platform name" +- default "hisilicon" if PLATFORM_HIKEY960 +- default "hisilicon" if PLATFORM_HIKEY970 +- default "devicetree" +- help +- Enter the name of the desired platform configuration directory to +- include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must +- exist. +- +- When PLATFORM_CUSTOM is set, this needs to be set manually to +- pick up the desired platform files. +- + config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" +- depends on MALI_MIDGARD ++ depends on MALI_MIDGARD && !MALI_CSF_SUPPORT + default n + help + Enable support for the arbiter interface in the driver. +@@ -93,62 +111,89 @@ config MALI_ARBITER_SUPPORT + + If unsure, say N. + +-# MALI_EXPERT configuration options ++config DMA_BUF_SYNC_IOCTL_SUPPORTED ++ bool "Enable Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" ++ depends on MALI_MIDGARD && BACKEND_KERNEL ++ default y ++ ++config MALI_DMA_BUF_MAP_ON_DEMAND ++ bool "Enable map imported dma-bufs on demand" ++ depends on MALI_MIDGARD ++ default n ++ default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED ++ help ++ This option caused kbase to set up the GPU mapping of imported ++ dma-buf when needed to run atoms. This is the legacy behavior. ++ ++ This is intended for testing and the option will get removed in the ++ future. ++ ++config MALI_DMA_BUF_LEGACY_COMPAT ++ bool "Enable legacy compatibility cache flush on dma-buf map" ++ depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND ++ default n ++ help ++ This option enables compatibility with legacy dma-buf mapping ++ behavior, then the dma-buf is mapped on import, by adding cache ++ maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, ++ including a cache flush. ++ ++ This option might work-around issues related to missing cache ++ flushes in other drivers. This only has an effect for clients using ++ UK 11.18 or older. For later UK versions it is not possible. + + menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default y + help +- Enabling this option and modifying the default settings may produce a driver with performance or +- other limitations. ++ Enabling this option and modifying the default settings may produce ++ a driver with performance or other limitations. + +-config MALI_CORESTACK +- bool "Support controlling power to the GPU core stack" ++config MALI_2MB_ALLOC ++ bool "Attempt to allocate 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help +- Enabling this feature on supported GPUs will let the driver powering +- on/off the GPU core stack independently without involving the Power +- Domain Controller. This should only be enabled on platforms which +- integration of the PDC to the Mali GPU is known to be problematic. +- This feature is currently only supported on t-Six and t-HEx GPUs. ++ Rather than allocating all GPU memory page-by-page, attempt to ++ allocate 2MB pages from the kernel. This reduces TLB pressure and ++ helps to prevent memory fragmentation. + +- If unsure, say N. ++ If in doubt, say N + +-config MALI_DEBUG +- bool "Debug build" ++config MALI_MEMORY_FULLY_BACKED ++ bool "Enable memory fully physically-backed" + depends on MALI_MIDGARD && MALI_EXPERT +- default y if DEBUG + default n + help +- Select this option for increased checking and reporting of errors. ++ This option enables full physical backing of all virtual ++ memory allocations in the kernel. Notice that this build ++ option only affects allocations of grow-on-GPU-page-fault ++ memory. + +-config MALI_FENCE_DEBUG +- bool "Debug sync fence usage" ++config MALI_CORESTACK ++ bool "Enable support of GPU core stack power control" + depends on MALI_MIDGARD && MALI_EXPERT +- default y if MALI_DEBUG ++ default n + help +- Select this option to enable additional checking and reporting on the +- use of sync fences in the Mali driver. +- +- This will add a 3s timeout to all sync fence waits in the Mali +- driver, so that when work for Mali has been waiting on a sync fence +- for a long time a debug message will be printed, detailing what fence +- is causing the block, and which dependent Mali atoms are blocked as a +- result of this. ++ Enabling this feature on supported GPUs will let the driver powering ++ on/off the GPU core stack independently without involving the Power ++ Domain Controller. This should only be enabled on platforms which ++ integration of the PDC to the Mali GPU is known to be problematic. ++ This feature is currently only supported on t-Six and t-HEx GPUs. + +- The timeout can be changed at runtime through the js_soft_timeout +- device attribute, where the timeout is specified in milliseconds. ++ If unsure, say N. + + choice + prompt "Error injection level" ++ depends on MALI_MIDGARD && MALI_EXPERT + default MALI_ERROR_INJECT_NONE + help + Enables insertion of errors to test module failure and recovery mechanisms. + + config MALI_ERROR_INJECT_NONE + bool "disabled" ++ depends on MALI_MIDGARD && MALI_EXPERT + help + Error injection is disabled. + +@@ -168,14 +213,49 @@ endchoice + + config MALI_ERROR_INJECT_ON + string ++ depends on MALI_MIDGARD && MALI_EXPERT + default "0" if MALI_ERROR_INJECT_NONE + default "1" if MALI_ERROR_INJECT_TRACK_LIST + default "2" if MALI_ERROR_INJECT_RANDOM + + config MALI_ERROR_INJECT + bool ++ depends on MALI_MIDGARD && MALI_EXPERT + default y if !MALI_ERROR_INJECT_NONE + ++config MALI_GEM5_BUILD ++ bool "Enable build of Mali kernel driver for GEM5" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default n ++ help ++ This option is to do a Mali GEM5 build. ++ If unsure, say N. ++ ++config MALI_DEBUG ++ bool "Enable debug build" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default y if DEBUG ++ default n ++ help ++ Select this option for increased checking and reporting of errors. ++ ++config MALI_FENCE_DEBUG ++ bool "Enable debug sync fence usage" ++ depends on MALI_MIDGARD && MALI_EXPERT ++ default y if MALI_DEBUG ++ help ++ Select this option to enable additional checking and reporting on the ++ use of sync fences in the Mali driver. ++ ++ This will add a 3s timeout to all sync fence waits in the Mali ++ driver, so that when work for Mali has been waiting on a sync fence ++ for a long time a debug message will be printed, detailing what fence ++ is causing the block, and which dependent Mali atoms are blocked as a ++ result of this. ++ ++ The timeout can be changed at runtime through the js_soft_timeout ++ device attribute, where the timeout is specified in milliseconds. ++ + config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT +@@ -187,56 +267,35 @@ config MALI_SYSTEM_TRACE + minimal overhead when not in use. Enable only if you know what + you are doing. + +-config MALI_2MB_ALLOC +- bool "Attempt to allocate 2MB pages" +- depends on MALI_MIDGARD && MALI_EXPERT +- default n +- help +- Rather than allocating all GPU memory page-by-page, attempt to +- allocate 2MB pages from the kernel. This reduces TLB pressure and +- helps to prevent memory fragmentation. ++# Instrumentation options. + +- If in doubt, say N ++# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig. ++# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. ++# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig. ++# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig. + +-config MALI_PWRSOFT_765 +- bool "PWRSOFT-765 ticket" ++config MALI_JOB_DUMP ++ bool "Enable system level support needed for job dumping" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help +- PWRSOFT-765 fixes devfreq cooling devices issues. However, they are +- not merged in mainline kernel yet. So this define helps to guard those +- parts of the code. +- +-config MALI_MEMORY_FULLY_BACKED +- bool "Memory fully physically-backed" +- default n +- help +- This option enables full backing of all virtual memory allocations +- for the kernel. This only affects grow-on-GPU-page-fault memory. ++ Choose this option to enable system level support needed for ++ job dumping. This is typically used for instrumentation but has ++ minimal overhead when not in use. Enable only if you know what ++ you are doing. + +-config MALI_DMA_BUF_MAP_ON_DEMAND +- bool "Map imported dma-bufs on demand" +- depends on MALI_MIDGARD ++config MALI_PWRSOFT_765 ++ bool "Enable workaround for PWRSOFT-765" ++ depends on MALI_MIDGARD && MALI_EXPERT + default n +- default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED + help +- This option caused kbase to set up the GPU mapping of imported +- dma-buf when needed to run atoms. This is the legacy behaviour. ++ PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged ++ in kernel v4.10, however if backported into the kernel then this ++ option must be manually selected. + +-config MALI_DMA_BUF_LEGACY_COMPAT +- bool "Enable legacy compatibility cache flush on dma-buf map" +- depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND +- default n +- help +- This option enables compatibility with legacy dma-buf mapping +- behavior, then the dma-buf is mapped on import, by adding cache +- maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, +- including a cache flush. ++ If using kernel >= v4.10 then say N, otherwise if devfreq cooling ++ changes have been backported say Y to avoid compilation errors. + +-config MALI_REAL_HW +- bool +- default y +- default n if NO_MALI + + config MALI_HW_ERRATA_1485982_NOT_AFFECTED + bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" +@@ -262,17 +321,6 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE + tree using the property, opp-mali-errata-1485982. Otherwise the + slowest clock will be selected. + +-config MALI_GEM5_BUILD +- bool "Enable build of Mali kernel driver for GEM5" +- depends on MALI_MIDGARD +- default n +- help +- This option is to do a Mali GEM5 build. +- If unsure, say N. +- +-# Instrumentation options. +- +-# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. +-# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. + ++source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig" + source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild +index 98e47be..5203281 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,8 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + mali_kbase-y += \ +- arbiter/mali_kbase_arbif.o \ +- arbiter/mali_kbase_arbiter_pm.o ++ arbiter/mali_kbase_arbif.o \ ++ arbiter/mali_kbase_arbiter_pm.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c +index d193cb9..64e11ce 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c +@@ -1,13 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 +- ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -18,13 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_arbif.c +- * Mali arbiter interface APIs to share GPU between Virtual Machines ++ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines + */ + + #include +@@ -34,32 +30,155 @@ + #include + #include "mali_kbase_arbiter_interface.h" + ++/* Arbiter interface version against which was implemented this module */ ++#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 ++#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ ++ MALI_KBASE_ARBITER_INTERFACE_VERSION ++#error "Unsupported Mali Arbiter interface version." ++#endif ++ ++static void on_max_config(struct device *dev, uint32_t max_l2_slices, ++ uint32_t max_core_mask) ++{ ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ if (!max_l2_slices || !max_core_mask) { ++ dev_dbg(dev, ++ "%s(): max_config ignored as one of the fields is zero", ++ __func__); ++ return; ++ } ++ ++ /* set the max config info in the kbase device */ ++ kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask); ++} ++ ++/** ++ * on_update_freq() - Updates GPU clock frequency ++ * @dev: arbiter interface device handle ++ * @freq: GPU clock frequency value reported from arbiter ++ * ++ * call back function to update GPU clock frequency with ++ * new value from arbiter ++ */ ++static void on_update_freq(struct device *dev, uint32_t freq) ++{ ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq); ++} ++ ++/** ++ * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop ++ * @dev: arbiter interface device handle ++ * ++ * call back function to signal a GPU STOP event from arbiter interface ++ */ + static void on_gpu_stop(struct device *dev) + { +- struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } + +- KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED(kbdev, kbdev); ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } ++ ++ KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); + } + ++/** ++ * on_gpu_granted() - sends KBASE_VM_GPU_GRANTED_EVT event on GPU granted ++ * @dev: arbiter interface device handle ++ * ++ * call back function to signal a GPU GRANT event from arbiter interface ++ */ + static void on_gpu_granted(struct device *dev) + { +- struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } + +- KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED(kbdev, kbdev); ++ KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); + } + ++/** ++ * on_gpu_lost() - sends KBASE_VM_GPU_LOST_EVT event on GPU granted ++ * @dev: arbiter interface device handle ++ * ++ * call back function to signal a GPU LOST event from arbiter interface ++ */ + static void on_gpu_lost(struct device *dev) + { +- struct kbase_device *kbdev = dev_get_drvdata(dev); ++ struct kbase_device *kbdev; ++ ++ if (!dev) { ++ pr_err("%s(): dev is NULL", __func__); ++ return; ++ } ++ ++ kbdev = dev_get_drvdata(dev); ++ if (!kbdev) { ++ dev_err(dev, "%s(): kbdev is NULL", __func__); ++ return; ++ } + + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); + } + ++/** ++ * kbase_arbif_init() - Kbase Arbiter interface initialisation. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Initialise Kbase Arbiter interface and assign callback functions. ++ * ++ * Return: ++ * * 0 - the interface was initialized or was not specified ++ * * in the device tree. ++ * * -EFAULT - the interface was specified but failed to initialize. ++ * * -EPROBE_DEFER - module dependencies are not yet available. ++ */ + int kbase_arbif_init(struct kbase_device *kbdev) + { +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + struct arbiter_if_arb_vm_ops ops; + struct arbiter_if_dev *arb_if; + struct device_node *arbiter_if_node; +@@ -100,17 +219,26 @@ int kbase_arbif_init(struct kbase_device *kbdev) + ops.arb_vm_gpu_stop = on_gpu_stop; + ops.arb_vm_gpu_granted = on_gpu_granted; + ops.arb_vm_gpu_lost = on_gpu_lost; ++ ops.arb_vm_max_config = on_max_config; ++ ops.arb_vm_update_freq = on_update_freq; ++ ++ kbdev->arb.arb_freq.arb_freq = 0; ++ kbdev->arb.arb_freq.freq_updated = false; ++ mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); + + /* register kbase arbiter_if callbacks */ + if (arb_if->vm_ops.vm_arb_register_dev) { + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, + kbdev->dev, &ops); + if (err) { +- dev_err(kbdev->dev, "Arbiter registration failed.\n"); ++ dev_err(&pdev->dev, "Failed to register with arbiter\n"); + module_put(pdev->dev.driver->owner); ++ if (err != -EPROBE_DEFER) ++ err = -EFAULT; + return err; + } + } ++ + #else /* CONFIG_OF */ + dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); + kbdev->arb.arb_dev = NULL; +@@ -119,6 +247,12 @@ int kbase_arbif_init(struct kbase_device *kbdev) + return 0; + } + ++/** ++ * kbase_arbif_destroy() - De-init Kbase arbiter interface ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * De-initialise Kbase arbiter interface ++ */ + void kbase_arbif_destroy(struct kbase_device *kbdev) + { + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; +@@ -133,27 +267,64 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) + kbdev->arb.arb_dev = NULL; + } + ++/** ++ * kbase_arbif_get_max_config() - Request max config info ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * call back function from arb interface to arbiter requesting max config info ++ */ ++void kbase_arbif_get_max_config(struct kbase_device *kbdev) ++{ ++ struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; ++ ++ if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { ++ dev_dbg(kbdev->dev, "%s\n", __func__); ++ arb_if->vm_ops.vm_arb_get_max_config(arb_if); ++ } ++} ++ ++/** ++ * kbase_arbif_gpu_request() - Request GPU from ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * call back function from arb interface to arbiter requesting GPU for VM ++ */ + void kbase_arbif_gpu_request(struct kbase_device *kbdev) + { + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { + dev_dbg(kbdev->dev, "%s\n", __func__); ++ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); + arb_if->vm_ops.vm_arb_gpu_request(arb_if); + } + } + ++/** ++ * kbase_arbif_gpu_stopped() - send GPU stopped message to the arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @gpu_required: GPU request flag ++ * ++ */ + void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) + { + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { + dev_dbg(kbdev->dev, "%s\n", __func__); +- KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED(kbdev, kbdev); ++ KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev); ++ if (gpu_required) ++ KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); + arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); + } + } + ++/** ++ * kbase_arbif_gpu_active() - Sends a GPU_ACTIVE message to the Arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Informs the arbiter VM is active ++ */ + void kbase_arbif_gpu_active(struct kbase_device *kbdev) + { + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; +@@ -164,6 +335,12 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev) + } + } + ++/** ++ * kbase_arbif_gpu_idle() - Inform the arbiter that the VM has gone idle ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Informs the arbiter VM is idle ++ */ + void kbase_arbif_gpu_idle(struct kbase_device *kbdev) + { + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h +index e7e9de7..701ffd4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h +@@ -1,28 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +- +-/* +- * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software +@@ -38,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * + */ + + /** +- * @file +- * Mali arbiter interface APIs to share GPU between Virtual Machines ++ * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines + */ + + #ifndef _MALI_KBASE_ARBIF_H_ +@@ -80,8 +57,11 @@ enum kbase_arbif_evt { + * Initialize the arbiter interface and also determines + * if Arbiter functionality is required. + * +- * Return: 0 if the Arbiter interface was successfully initialized or the +- * Arbiter was not required. ++ * Return: ++ * * 0 - the interface was initialized or was not specified ++ * * in the device tree. ++ * * -EFAULT - the interface was specified but failed to initialize. ++ * * -EPROBE_DEFER - module dependencies are not yet available. + */ + int kbase_arbif_init(struct kbase_device *kbdev); + +@@ -94,6 +74,14 @@ int kbase_arbif_init(struct kbase_device *kbdev); + */ + void kbase_arbif_destroy(struct kbase_device *kbdev); + ++/** ++ * kbase_arbif_get_max_config() - Request max config info ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * call back function from arb interface to arbiter requesting max config info ++ */ ++void kbase_arbif_get_max_config(struct kbase_device *kbdev); ++ + /** + * kbase_arbif_gpu_request() - Send GPU request message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h +index 1f53cbf..570a82a 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h +@@ -1,28 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +- +-/* +- * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software +@@ -38,7 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * + */ + + /** +@@ -66,7 +44,8 @@ + * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU + * @vm_arb_starting: Work queue resume in progress + * @vm_arb_stopping: Work queue suspend in progress +- * @vm_arb_users_waiting: Count of users waiting for GPU ++ * @interrupts_installed: Flag set when interrupts are installed ++ * @vm_request_timer: Timer to monitor GPU request + */ + struct kbase_arbiter_vm_state { + struct kbase_device *kbdev; +@@ -78,7 +57,8 @@ struct kbase_arbiter_vm_state { + struct work_struct vm_resume_work; + bool vm_arb_starting; + bool vm_arb_stopping; +- int vm_arb_users_waiting; ++ bool interrupts_installed; ++ struct hrtimer vm_request_timer; + }; + + /** +@@ -86,10 +66,12 @@ struct kbase_arbiter_vm_state { + * allocated from the probe method of Mali driver + * @arb_if: Pointer to the arbiter interface device + * @arb_dev: Pointer to the arbiter device ++ * @arb_freq: GPU clock frequency retrieved from arbiter. + */ + struct kbase_arbiter_device { + struct arbiter_if_dev *arb_if; + struct device *arb_dev; ++ struct kbase_arbiter_freq arb_freq; + }; + + #endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h +index 5d5d8a7..c0137f7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h +@@ -1,28 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +- +-/* +- * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software +@@ -38,7 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * + */ + + /** +@@ -50,7 +28,7 @@ + #define _MALI_KBASE_ARBITER_INTERFACE_H_ + + /** +- * @brief Mali arbiter interface version ++ * Mali arbiter interface version + * + * This specifies the current version of the configuration interface. Whenever + * the arbiter interface changes, so that integration effort is required, the +@@ -61,8 +39,15 @@ + * 1 - Added the Mali arbiter configuration interface. + * 2 - Strip out reference code from header + * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) ++ * 4 - Added max_config support ++ * 5 - Added GPU clock frequency reporting support from arbiter + */ +-#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 ++#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5 ++ ++/** ++ * NO_FREQ is used in case platform doesn't support reporting frequency ++ */ ++#define NO_FREQ 0 + + struct arbiter_if_dev; + +@@ -108,6 +93,27 @@ struct arbiter_if_arb_vm_ops { + * If successful, will respond with a vm_arb_gpu_stopped message. + */ + void (*arb_vm_gpu_lost)(struct device *dev); ++ ++ /** ++ * arb_vm_max_config() - Send max config info to the VM ++ * @dev: The arbif kernel module device. ++ * @max_l2_slices: The maximum number of L2 slices. ++ * @max_core_mask: The largest core mask. ++ * ++ * Informs KBase the maximum resources that can be allocated to the ++ * partition in use. ++ */ ++ void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices, ++ uint32_t max_core_mask); ++ ++ /** ++ * arb_vm_update_freq() - GPU clock frequency has been updated ++ * @dev: The arbif kernel module device. ++ * @freq: GPU clock frequency value reported from arbiter ++ * ++ * Informs KBase that the GPU clock frequency has been updated. ++ */ ++ void (*arb_vm_update_freq)(struct device *dev, uint32_t freq); + }; + + /** +@@ -126,6 +132,11 @@ struct arbiter_if_vm_arb_ops { + * @dev: The device structure to supply in the callbacks. + * @ops: The callbacks that the device driver supports + * (none are optional). ++ * ++ * Return: ++ * * 0 - successful. ++ * * -EINVAL - invalid argument. ++ * * -EPROBE_DEFER - module dependencies are not yet available. + */ + int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, + struct device *dev, struct arbiter_if_arb_vm_ops *ops); +@@ -136,6 +147,13 @@ struct arbiter_if_vm_arb_ops { + */ + void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); + ++ /** ++ * vm_arb_gpu_get_max_config() - Request the max config from the ++ * Arbiter. ++ * @arbif_dev: The arbiter interface we want to issue the request. ++ */ ++ void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev); ++ + /** + * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. + * @arbif_dev: The arbiter interface we want to issue the request. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c +index 6c35e16..5c75686 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c +@@ -1,13 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 +- ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -18,27 +17,49 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_arbiter_pm.c ++ * @file + * Mali arbiter power manager state machine and APIs + */ + + #include + #include + #include +-#include ++#include + #include +-#include ++#include + #include ++#include ++ ++/* A dmesg warning will occur if the GPU is not granted ++ * after the following time (in milliseconds) has ellapsed. ++ */ ++#define GPU_REQUEST_TIMEOUT 1000 ++#define KHZ_TO_HZ 1000 ++ ++#define MAX_L2_SLICES_MASK 0xFF ++ ++/* Maximum time in ms, before deferring probe incase ++ * GPU_GRANTED message is not received ++ */ ++static int gpu_req_timeout = 1; ++module_param(gpu_req_timeout, int, 0644); ++MODULE_PARM_DESC(gpu_req_timeout, ++ "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); + + static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); + static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev); + ++/** ++ * kbase_arbiter_pm_vm_state_str() - Helper function to get string ++ * for kbase VM state.(debug) ++ * @state: kbase VM state ++ * ++ * Return: string representation of Kbase_vm_state ++ */ + static inline const char *kbase_arbiter_pm_vm_state_str( + enum kbase_vm_state state) + { +@@ -73,6 +94,13 @@ static inline const char *kbase_arbiter_pm_vm_state_str( + } + } + ++/** ++ * kbase_arbiter_pm_vm_event_str() - Helper function to get string ++ * for kbase VM event.(debug) ++ * @evt: kbase VM state ++ * ++ * Return: String representation of Kbase_arbif_event ++ */ + static inline const char *kbase_arbiter_pm_vm_event_str( + enum kbase_arbif_evt evt) + { +@@ -99,6 +127,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str( + } + } + ++/** ++ * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @new_state: kbase VM new state ++ * ++ * This function sets the new state for the VM ++ */ + static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, + enum kbase_vm_state new_state) + { +@@ -107,11 +142,22 @@ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, + dev_dbg(kbdev->dev, "VM set_state %s -> %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), + kbase_arbiter_pm_vm_state_str(new_state)); ++ + lockdep_assert_held(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_state = new_state; ++ if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && ++ new_state != KBASE_VM_STATE_INITIALIZING) ++ KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state); + wake_up(&arb_vm_state->vm_state_wait); + } + ++/** ++ * kbase_arbiter_pm_suspend_wq() - suspend work queue of the driver. ++ * @data: work queue ++ * ++ * Suspends work queue of the driver, when VM is in SUSPEND_PENDING or ++ * STOPPING_IDLE or STOPPING_ACTIVE state ++ */ + static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) + { + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, +@@ -136,6 +182,13 @@ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) + dev_dbg(kbdev->dev, "<%s\n", __func__); + } + ++/** ++ * kbase_arbiter_pm_resume_wq() -Kbase resume work queue. ++ * @data: work item ++ * ++ * Resume work queue of the driver when VM is in STARTING state, ++ * else if its in STOPPING_ACTIVE will request a stop event. ++ */ + static void kbase_arbiter_pm_resume_wq(struct work_struct *data) + { + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, +@@ -157,9 +210,74 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) + } + arb_vm_state->vm_arb_starting = false; + mutex_unlock(&arb_vm_state->vm_state_lock); ++ KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev); + dev_dbg(kbdev->dev, "<%s\n", __func__); + } + ++/** ++ * request_timer_callback() - Issue warning on request timer expiration ++ * @timer: Request hr timer data ++ * ++ * Called when the Arbiter takes too long to grant the GPU after a ++ * request has been made. Issues a warning in dmesg. ++ * ++ * Return: Always returns HRTIMER_NORESTART ++ */ ++static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, ++ struct kbase_arbiter_vm_state, vm_request_timer); ++ ++ KBASE_DEBUG_ASSERT(arb_vm_state); ++ KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); ++ ++ dev_warn(arb_vm_state->kbdev->dev, ++ "Still waiting for GPU to be granted from Arbiter after %d ms\n", ++ GPU_REQUEST_TIMEOUT); ++ return HRTIMER_NORESTART; ++} ++ ++/** ++ * start_request_timer() - Start a timer after requesting GPU ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Start a timer to track when kbase is waiting for the GPU from the ++ * Arbiter. If the timer expires before GPU is granted, a warning in ++ * dmesg will be issued. ++ */ ++static void start_request_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ ++ hrtimer_start(&arb_vm_state->vm_request_timer, ++ HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), ++ HRTIMER_MODE_REL); ++} ++ ++/** ++ * cancel_request_timer() - Stop the request timer ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Stops the request timer once GPU has been granted. Safe to call ++ * even if timer is no longer running. ++ */ ++static void cancel_request_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ ++ hrtimer_cancel(&arb_vm_state->vm_request_timer); ++} ++ ++/** ++ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM ++ * Paravirtualized use. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Initialize the arbiter and other required resources during the runtime ++ * and request the GPU for the VM for the first time. ++ * ++ * Return: 0 if success, or a Linux error code ++ */ + int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) + { + int err; +@@ -179,29 +297,49 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) + WQ_HIGHPRI); + if (!arb_vm_state->vm_arb_wq) { + dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); ++ kfree(arb_vm_state); + return -ENOMEM; + } + INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); + INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); + arb_vm_state->vm_arb_starting = false; +- arb_vm_state->vm_arb_users_waiting = 0; ++ atomic_set(&kbdev->pm.gpu_users_waiting, 0); ++ hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); ++ arb_vm_state->vm_request_timer.function = ++ request_timer_callback; + kbdev->pm.arb_vm_state = arb_vm_state; + + err = kbase_arbif_init(kbdev); + if (err) { ++ dev_err(kbdev->dev, "Failed to initialise arbif module\n"); + goto arbif_init_fail; + } ++ + if (kbdev->arb.arb_if) { + kbase_arbif_gpu_request(kbdev); + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); +- wait_event(arb_vm_state->vm_state_wait, ++ err = wait_event_timeout(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == +- KBASE_VM_STATE_INITIALIZING_WITH_GPU); ++ KBASE_VM_STATE_INITIALIZING_WITH_GPU, ++ msecs_to_jiffies(gpu_req_timeout)); ++ ++ if (!err) { ++ dev_dbg(kbdev->dev, ++ "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", ++ gpu_req_timeout); ++ err = -EPROBE_DEFER; ++ goto arbif_eprobe_defer; ++ } ++ + dev_dbg(kbdev->dev, + "Waiting for initial GPU assignment - done\n"); + } + return 0; + ++arbif_eprobe_defer: ++ kbase_arbiter_pm_early_term(kbdev); ++ return err; + arbif_init_fail: + destroy_workqueue(arb_vm_state->vm_arb_wq); + kfree(arb_vm_state); +@@ -209,35 +347,72 @@ arbif_init_fail: + return err; + } + ++/** ++ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Clean up all the resources ++ */ + void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + ++ cancel_request_timer(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); +- if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) ++ if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { ++ kbase_pm_set_gpu_lost(kbdev, false); + kbase_arbif_gpu_stopped(kbdev, false); +- ++ } + mutex_unlock(&arb_vm_state->vm_state_lock); +- kbase_arbif_destroy(kbdev); + destroy_workqueue(arb_vm_state->vm_arb_wq); ++ kbase_arbif_destroy(kbdev); + arb_vm_state->vm_arb_wq = NULL; + kfree(kbdev->pm.arb_vm_state); + kbdev->pm.arb_vm_state = NULL; + } + ++/** ++ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Releases interrupts and set the interrupt flag to false ++ */ + void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); +- if (!kbdev->arb.arb_if || +- arb_vm_state->vm_state > +- KBASE_VM_STATE_STOPPED_GPU_REQUESTED) ++ if (arb_vm_state->interrupts_installed == true) { ++ arb_vm_state->interrupts_installed = false; + kbase_release_interrupts(kbdev); ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++} + ++/** ++ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Install interrupts and set the interrupt_install flag to true. ++ */ ++int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ int err; ++ ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ arb_vm_state->interrupts_installed = true; ++ err = kbase_install_interrupts(kbdev); + mutex_unlock(&arb_vm_state->vm_state_lock); ++ return err; + } + ++/** ++ * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Handles a stop state for the VM ++ */ + void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) + { + bool request_gpu = false; +@@ -245,14 +420,19 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + +- if (arb_vm_state->vm_arb_users_waiting > 0 && ++ if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 && + arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); +- kbase_release_interrupts(kbdev); ++ ++ if (arb_vm_state->interrupts_installed) { ++ arb_vm_state->interrupts_installed = false; ++ kbase_release_interrupts(kbdev); ++ } ++ + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + request_gpu = true; +@@ -271,14 +451,95 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) + break; + } + ++ kbase_pm_set_gpu_lost(kbdev, false); + kbase_arbif_gpu_stopped(kbdev, request_gpu); ++ if (request_gpu) ++ start_request_timer(kbdev); ++} ++ ++void kbase_arbiter_set_max_config(struct kbase_device *kbdev, ++ uint32_t max_l2_slices, ++ uint32_t max_core_mask) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state; ++ struct max_config_props max_config; ++ ++ if (!kbdev) ++ return; ++ ++ /* Mask the max_l2_slices as it is stored as 8 bits into kbase */ ++ max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK; ++ max_config.core_mask = max_core_mask; ++ arb_vm_state = kbdev->pm.arb_vm_state; ++ ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ /* Just set the max_props in kbase during initialization. */ ++ if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING) ++ kbase_gpuprops_set_max_config(kbdev, &max_config); ++ else ++ dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s", ++ kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); ++ ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++} ++ ++int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) ++{ ++ struct kbase_arbiter_vm_state *arb_vm_state; ++ int result = -EINVAL; ++ ++ if (!kbdev) ++ return result; ++ ++ /* First check the GPU_LOST state */ ++ kbase_pm_lock(kbdev); ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ kbase_pm_unlock(kbdev); ++ return 0; ++ } ++ kbase_pm_unlock(kbdev); ++ ++ /* Then the arbitration state machine */ ++ arb_vm_state = kbdev->pm.arb_vm_state; ++ ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_INITIALIZING: ++ case KBASE_VM_STATE_SUSPENDED: ++ case KBASE_VM_STATE_STOPPED: ++ case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: ++ case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: ++ result = 0; ++ break; ++ default: ++ result = 1; ++ break; ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ ++ return result; + } + ++/** ++ * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Handles the start state of the VM ++ */ + static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ bool freq_updated = false; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); ++ mutex_lock(&kbdev->arb.arb_freq.arb_freq_lock); ++ if (kbdev->arb.arb_freq.freq_updated) { ++ kbdev->arb.arb_freq.freq_updated = false; ++ freq_updated = true; ++ } ++ mutex_unlock(&kbdev->arb.arb_freq.arb_freq_lock); ++ ++ cancel_request_timer(kbdev); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + kbase_arbiter_pm_vm_set_state(kbdev, +@@ -286,22 +547,43 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); ++ arb_vm_state->interrupts_installed = true; + kbase_install_interrupts(kbdev); ++ /* ++ * GPU GRANTED received while in stop can be a result of a ++ * repartitioning. ++ */ ++ kbase_gpuprops_req_curr_config_update(kbdev); ++ /* curr_config will be updated while resuming the PM. */ + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_resume_work); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: ++ kbase_pm_set_gpu_lost(kbdev, false); + kbase_arbif_gpu_stopped(kbdev, false); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: +- dev_warn(kbdev->dev, +- "GPU_GRANTED when not expected - state %s\n", +- kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); ++ /* ++ * GPU_GRANTED can be received when there is a frequency update ++ * Only show a warning if received in an unexpected state ++ * without a frequency update ++ */ ++ if (!freq_updated) ++ dev_warn(kbdev->dev, ++ "GPU_GRANTED when not expected - state %s\n", ++ kbase_arbiter_pm_vm_state_str( ++ arb_vm_state->vm_state)); + break; + } + } + ++/** ++ * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Handles the start state of the VM ++ */ + static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; +@@ -344,9 +626,16 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) + } + } + ++/** ++ * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * On GPU lost event signals GPU_LOST to the aribiter ++ */ + static void kbase_gpu_lost(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ bool handle_gpu_lost = false; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + +@@ -357,33 +646,47 @@ static void kbase_gpu_lost(struct kbase_device *kbdev) + dev_warn(kbdev->dev, "GPU lost in state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_arbiter_pm_vm_gpu_stop(kbdev); +- mutex_unlock(&arb_vm_state->vm_state_lock); +- kbase_pm_handle_gpu_lost(kbdev); +- mutex_lock(&arb_vm_state->vm_state_lock); ++ handle_gpu_lost = true; + break; + case KBASE_VM_STATE_STOPPING_IDLE: + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_SUSPEND_PENDING: +- dev_info(kbdev->dev, "GPU lost while stopping"); +- mutex_unlock(&arb_vm_state->vm_state_lock); +- kbase_pm_handle_gpu_lost(kbdev); +- mutex_lock(&arb_vm_state->vm_state_lock); ++ dev_dbg(kbdev->dev, "GPU lost while stopping"); ++ handle_gpu_lost = true; + break; + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: +- dev_info(kbdev->dev, "GPU lost while already stopped"); ++ dev_dbg(kbdev->dev, "GPU lost while already stopped"); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: +- dev_info(kbdev->dev, "GPU lost while waiting to suspend"); ++ dev_dbg(kbdev->dev, "GPU lost while waiting to suspend"); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + break; + } +- ++ if (handle_gpu_lost) { ++ /* Releasing the VM state lock here is safe because ++ * we are guaranteed to be in either STOPPING_IDLE, ++ * STOPPING_ACTIVE or SUSPEND_PENDING at this point. ++ * The only transitions that are valid from here are to ++ * STOPPED, STOPPED_GPU_REQUESTED or SUSPENDED which can ++ * only happen at the completion of the GPU lost handling. ++ */ ++ mutex_unlock(&arb_vm_state->vm_state_lock); ++ kbase_pm_handle_gpu_lost(kbdev); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ } + } + ++/** ++ * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready ++ * to be moved to suspended state. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: True if its ready to be suspended else False. ++ */ + static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( + struct kbase_device *kbdev) + { +@@ -398,6 +701,14 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( + } + } + ++/** ++ * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state ++ * until it receives the grant message from arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Prepares OS to be in suspend state until it receives GRANT message ++ * from Arbiter asynchronously. ++ */ + static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; +@@ -463,6 +774,14 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) + } + } + ++/** ++ * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives ++ * a grant message from arbiter ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Resume OS function once it receives GRANT message ++ * from Arbiter asynchronously. ++ */ + static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; +@@ -475,6 +794,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); ++ start_request_timer(kbdev); + + /* Release lock and block resume OS function until we have + * asynchronously received the GRANT message from the Arbiter and +@@ -486,6 +806,14 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) + mutex_lock(&arb_vm_state->vm_state_lock); + } + ++/** ++ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @evt: VM event ++ * ++ * The state machine function. Receives events and transitions states ++ * according the event received and the current state ++ */ + void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt evt) + { +@@ -497,7 +825,9 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_event_str(evt)); +- ++ if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && ++ arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING) ++ KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt); + switch (evt) { + case KBASE_VM_GPU_GRANTED_EVT: + kbase_arbiter_pm_vm_gpu_start(kbdev); +@@ -506,7 +836,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + kbase_arbiter_pm_vm_gpu_stop(kbdev); + break; + case KBASE_VM_GPU_LOST_EVT: +- dev_info(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); ++ dev_dbg(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); + kbase_gpu_lost(kbdev); + break; + case KBASE_VM_OS_SUSPEND_EVENT: +@@ -530,8 +860,6 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + case KBASE_VM_REF_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: +- KBASE_TLSTREAM_TL_EVENT_ARB_STARTED(kbdev, kbdev); +- /* FALL THROUGH */ + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); +@@ -547,15 +875,21 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + break; + + case KBASE_VM_GPU_INITIALIZED_EVT: +- lockdep_assert_held(&kbdev->pm.lock); +- if (kbdev->pm.active_count > 0) { +- kbase_arbiter_pm_vm_set_state(kbdev, +- KBASE_VM_STATE_ACTIVE); +- kbase_arbif_gpu_active(kbdev); +- } else { +- kbase_arbiter_pm_vm_set_state(kbdev, +- KBASE_VM_STATE_IDLE); +- kbase_arbif_gpu_idle(kbdev); ++ switch (arb_vm_state->vm_state) { ++ case KBASE_VM_STATE_INITIALIZING_WITH_GPU: ++ lockdep_assert_held(&kbdev->pm.lock); ++ if (kbdev->pm.active_count > 0) { ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_ACTIVE); ++ kbase_arbif_gpu_active(kbdev); ++ } else { ++ kbase_arbiter_pm_vm_set_state(kbdev, ++ KBASE_VM_STATE_IDLE); ++ kbase_arbif_gpu_idle(kbdev); ++ } ++ break; ++ default: ++ break; + } + break; + +@@ -566,6 +900,14 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + mutex_unlock(&arb_vm_state->vm_state_lock); + } + ++KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event); ++ ++/** ++ * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * VM waits for a GPU assignment. ++ */ + static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) + { + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; +@@ -577,6 +919,12 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) + dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); + } + ++/** ++ * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Checks if the virtual machine holds VM state lock. ++ */ + static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev) + { +@@ -587,11 +935,25 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); + } + ++/** ++ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for ++ * arbitration mode ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @suspend_handler: The handler code for how to handle a suspend ++ * that might occur ++ * ++ * This function handles a suspend event from the driver, ++ * communicating with the arbiter and waiting synchronously for the GPU ++ * to be granted again depending on the VM state. ++ * ++ * Return: 0 on success else 1 suspend handler isn not possible. ++ */ + int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) + { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; ++ int res = 0; + + if (kbdev->arb.arb_if) { + mutex_lock(&arb_vm_state->vm_state_lock); +@@ -606,30 +968,41 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); ++ start_request_timer(kbdev); + } else if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + if (suspend_handler != + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { +- if (suspend_handler == +- KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED +- || +- kbdev->pm.active_count > 0) +- break; + +- mutex_unlock(&arb_vm_state->vm_state_lock); +- mutex_unlock(&kbdev->pm.lock); +- mutex_unlock(&js_devdata->runpool_mutex); +- return 1; +- } ++ /* In case of GPU lost, even if ++ * active_count > 0, we no longer have GPU ++ * access ++ */ ++ if (kbase_pm_is_gpu_lost(kbdev)) ++ res = 1; + +- if (arb_vm_state->vm_state == +- KBASE_VM_STATE_INITIALIZING_WITH_GPU) ++ switch (suspend_handler) { ++ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: ++ res = 1; ++ break; ++ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: ++ if (kbdev->pm.active_count == 0) ++ res = 1; ++ break; ++ case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED: ++ break; ++ default: ++ WARN(1, "Unknown suspend_handler\n"); ++ res = 1; ++ break; ++ } + break; ++ } + + /* Need to synchronously wait for GPU assignment */ +- arb_vm_state->vm_arb_users_waiting++; ++ atomic_inc(&kbdev->pm.gpu_users_waiting); + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); +@@ -637,9 +1010,128 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + mutex_lock(&arb_vm_state->vm_state_lock); +- arb_vm_state->vm_arb_users_waiting--; ++ atomic_dec(&kbdev->pm.gpu_users_waiting); + } + mutex_unlock(&arb_vm_state->vm_state_lock); + } +- return 0; ++ return res; ++} ++ ++/** ++ * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received ++ * from arbiter. ++ * @arb_freq - Pointer to struchture holding GPU clock frequenecy data ++ * @freq - New frequency value in KHz ++ */ ++void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, ++ uint32_t freq) ++{ ++ struct kbase_gpu_clk_notifier_data ndata; ++ ++ mutex_lock(&arb_freq->arb_freq_lock); ++ if (arb_freq->arb_freq != freq) { ++ ndata.new_rate = freq * KHZ_TO_HZ; ++ ndata.old_rate = arb_freq->arb_freq * KHZ_TO_HZ; ++ ndata.gpu_clk_handle = arb_freq; ++ arb_freq->arb_freq = freq; ++ arb_freq->freq_updated = true; ++ if (arb_freq->nb) ++ arb_freq->nb->notifier_call(arb_freq->nb, ++ POST_RATE_CHANGE, &ndata); ++ } ++ ++ mutex_unlock(&arb_freq->arb_freq_lock); ++} ++ ++/** ++ * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index ++ * @kbdev - kbase_device pointer ++ * @index - GPU clock index ++ * ++ * Returns pointer to structure holding GPU clock frequency data reported from ++ * arbiter, only index 0 is valid. ++ */ ++static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev, ++ unsigned int index) ++{ ++ if (index == 0) ++ return &kbdev->arb.arb_freq; ++ return NULL; ++} ++ ++/** ++ * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value ++ * @kbdev - kbase_device pointer ++ * @index - GPU clock index ++ * ++ * Returns the GPU clock frequency value saved when gpu is granted from arbiter ++ */ ++static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, ++ void *gpu_clk_handle) ++{ ++ uint32_t freq; ++ struct kbase_arbiter_freq *arb_dev_freq = ++ (struct kbase_arbiter_freq *) gpu_clk_handle; ++ ++ mutex_lock(&arb_dev_freq->arb_freq_lock); ++ /* Convert from KHz to Hz */ ++ freq = arb_dev_freq->arb_freq * KHZ_TO_HZ; ++ mutex_unlock(&arb_dev_freq->arb_freq_lock); ++ return freq; ++} ++ ++/** ++ * arb_gpu_clk_notifier_register() - Register a clock rate change notifier. ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * @nb - notifier block containing the callback function pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * This function registers a callback function that is invoked whenever the ++ * frequency of the clock corresponding to @gpu_clk_handle changes. ++ */ ++static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) ++{ ++ int ret = 0; ++ struct kbase_arbiter_freq *arb_dev_freq = ++ (struct kbase_arbiter_freq *)gpu_clk_handle; ++ ++ if (!arb_dev_freq->nb) ++ arb_dev_freq->nb = nb; ++ else ++ ret = -EBUSY; ++ ++ return ret; ++} ++ ++/** ++ * gpu_clk_notifier_unregister() - Unregister clock rate change notifier ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * @nb - notifier block containing the callback function pointer ++ * ++ * This function pointer is used to unregister a callback function that ++ * was previously registered to get notified of a frequency change of the ++ * clock corresponding to @gpu_clk_handle. ++ */ ++static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) ++{ ++ struct kbase_arbiter_freq *arb_dev_freq = ++ (struct kbase_arbiter_freq *)gpu_clk_handle; ++ if (arb_dev_freq->nb == nb) { ++ arb_dev_freq->nb = NULL; ++ } else { ++ dev_err(kbdev->dev, "%s - notifier did not match\n", ++ __func__); ++ } + } ++ ++struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = { ++ .get_gpu_clk_rate = get_arb_gpu_clk_rate, ++ .enumerate_gpu_clk = enumerate_arb_gpu_clk, ++ .gpu_clk_notifier_register = arb_gpu_clk_notifier_register, ++ .gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister ++}; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h +index 3c49eb1..1f570bb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h +@@ -1,28 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +- +-/* +- * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software +@@ -38,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -116,10 +93,18 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); + * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * +- * Releases interrupts if needed (GPU is available) otherwise does nothing ++ * Releases interrupts and set the interrupt flag to false + */ + void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); + ++/** ++ * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Install interrupts and set the interrupt_install flag to true. ++ */ ++int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); ++ + /** + * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine + * @kbdev: The kbase device structure for the device (must be a valid pointer) +@@ -156,4 +141,54 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + */ + void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); + ++/** ++ * kbase_arbiter_set_max_config() - Set the max config data in kbase device. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer). ++ * @max_l2_slices: The maximum number of L2 slices. ++ * @max_core_mask: The largest core mask. ++ * ++ * This function handles a stop event for the VM. ++ * It will update the VM state and forward the stop event to the driver. ++ */ ++void kbase_arbiter_set_max_config(struct kbase_device *kbdev, ++ uint32_t max_l2_slices, ++ uint32_t max_core_mask); ++ ++/** ++ * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 if the VM does not have access, 1 if it does, and a negative number ++ * if an error occurred ++ */ ++int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev); ++ ++extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops; ++ ++/** ++ * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved ++ * from arbiter ++ * @arb_freq: GPU clock frequency value ++ * @arb_freq_lock: Mutex protecting access to arbfreq value ++ * @nb: Notifier block to receive rate change callbacks ++ * @freq_updated: Flag to indicate whether a frequency changed has just been ++ * communicated to avoid "GPU_GRANTED when not expected" warning ++ */ ++struct kbase_arbiter_freq { ++ uint32_t arb_freq; ++ struct mutex arb_freq_lock; ++ struct notifier_block *nb; ++ bool freq_updated; ++}; ++ ++/** ++ * kbase_arbiter_pm_update_gpu_freq() - Update GPU frequency ++ * @arb_freq: Pointer to GPU clock frequency data ++ * @freq: The new frequency ++ * ++ * Updates the GPU frequency and triggers any notifications ++ */ ++void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, ++ uint32_t freq); ++ + #endif /*_MALI_KBASE_ARBITER_PM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild +index 2449e80..5dbcff3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,51 +16,34 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + +-BACKEND += \ +- backend/gpu/mali_kbase_cache_policy_backend.c \ +- backend/gpu/mali_kbase_device_hw.c \ +- backend/gpu/mali_kbase_gpuprops_backend.c \ +- backend/gpu/mali_kbase_irq_linux.c \ +- backend/gpu/mali_kbase_instr_backend.c \ +- backend/gpu/mali_kbase_js_backend.c \ +- backend/gpu/mali_kbase_pm_backend.c \ +- backend/gpu/mali_kbase_pm_driver.c \ +- backend/gpu/mali_kbase_pm_metrics.c \ +- backend/gpu/mali_kbase_pm_ca.c \ +- backend/gpu/mali_kbase_pm_always_on.c \ +- backend/gpu/mali_kbase_pm_coarse_demand.c \ +- backend/gpu/mali_kbase_pm_policy.c \ +- backend/gpu/mali_kbase_time.c \ +- backend/gpu/mali_kbase_l2_mmu_config.c ++mali_kbase-y += \ ++ backend/gpu/mali_kbase_cache_policy_backend.o \ ++ backend/gpu/mali_kbase_gpuprops_backend.o \ ++ backend/gpu/mali_kbase_irq_linux.o \ ++ backend/gpu/mali_kbase_js_backend.o \ ++ backend/gpu/mali_kbase_pm_backend.o \ ++ backend/gpu/mali_kbase_pm_driver.o \ ++ backend/gpu/mali_kbase_pm_metrics.o \ ++ backend/gpu/mali_kbase_pm_ca.o \ ++ backend/gpu/mali_kbase_pm_always_on.o \ ++ backend/gpu/mali_kbase_pm_coarse_demand.o \ ++ backend/gpu/mali_kbase_pm_policy.o \ ++ backend/gpu/mali_kbase_time.o \ ++ backend/gpu/mali_kbase_l2_mmu_config.o \ ++ backend/gpu/mali_kbase_clk_rate_trace_mgr.o + +-ifeq ($(MALI_USE_CSF),1) +-# empty +-else +- BACKEND += \ +- backend/gpu/mali_kbase_jm_as.c \ +- backend/gpu/mali_kbase_debug_job_fault_backend.c \ +- backend/gpu/mali_kbase_jm_hw.c \ +- backend/gpu/mali_kbase_jm_rb.c ++ifeq ($(MALI_USE_CSF),0) ++ mali_kbase-y += \ ++ backend/gpu/mali_kbase_instr_backend.o \ ++ backend/gpu/mali_kbase_jm_as.o \ ++ backend/gpu/mali_kbase_debug_job_fault_backend.o \ ++ backend/gpu/mali_kbase_jm_hw.o \ ++ backend/gpu/mali_kbase_jm_rb.o + endif + +-ifeq ($(MALI_CUSTOMER_RELEASE),0) +-BACKEND += \ +- backend/gpu/mali_kbase_pm_always_on_demand.c +-endif + +-ifeq ($(CONFIG_MALI_DEVFREQ),y) +-BACKEND += \ +- backend/gpu/mali_kbase_devfreq.c +-endif ++mali_kbase-$(CONFIG_MALI_DEVFREQ) += \ ++ backend/gpu/mali_kbase_devfreq.o + +-ifeq ($(CONFIG_MALI_NO_MALI),y) +- # Dummy model +- BACKEND += backend/gpu/mali_kbase_model_dummy.c +- BACKEND += backend/gpu/mali_kbase_model_linux.c +- # HW error simulation +- BACKEND += backend/gpu/mali_kbase_model_error_generator.c +-endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h +index 4a61f96..6924fdb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +index 7378bfd..e542ccf 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "backend/gpu/mali_kbase_cache_policy_backend.h" +-#include ++#include + + void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +index f78ada7..278125a 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,13 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + #ifndef _KBASE_CACHE_POLICY_BACKEND_H_ + #define _KBASE_CACHE_POLICY_BACKEND_H_ + + #include "mali_kbase.h" +-#include "mali_base_kernel.h" ++#include + + /** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +new file mode 100644 +index 0000000..6ad0f58 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +@@ -0,0 +1,325 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * Implementation of the GPU clock rate trace manager. ++ */ ++ ++#include ++#include ++#include ++#include ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ ++#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY ++#include ++#else ++#include "mali_power_gpu_frequency_trace.h" ++#endif ++ ++#ifndef CLK_RATE_TRACE_OPS ++#define CLK_RATE_TRACE_OPS (NULL) ++#endif ++ ++/** ++ * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops. ++ * @kbdev: Pointer to kbase device, used to check if arbitration is enabled ++ * when compiled with arbiter support. ++ * Return: Pointer to clk trace ops if supported or NULL. ++ */ ++static struct kbase_clk_rate_trace_op_conf * ++get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused) ++{ ++ /* base case */ ++ struct kbase_clk_rate_trace_op_conf *callbacks = ++ (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; ++#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) ++ const void *arbiter_if_node; ++ ++ if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) ++ return callbacks; ++ ++ arbiter_if_node = ++ of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); ++ /* Arbitration enabled, override the callback pointer.*/ ++ if (arbiter_if_node) ++ callbacks = &arb_clk_rate_trace_ops; ++ else ++ dev_dbg(kbdev->dev, ++ "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); ++ ++#endif ++ ++ return callbacks; ++} ++ ++static int gpu_clk_rate_change_notifier(struct notifier_block *nb, ++ unsigned long event, void *data) ++{ ++ struct kbase_gpu_clk_notifier_data *ndata = data; ++ struct kbase_clk_data *clk_data = ++ container_of(nb, struct kbase_clk_data, clk_rate_change_nb); ++ struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm; ++ unsigned long flags; ++ ++ if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle)) ++ return NOTIFY_BAD; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ if (event == POST_RATE_CHANGE) { ++ if (!clk_rtm->gpu_idle && ++ (clk_data->clock_val != ndata->new_rate)) { ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, ndata->new_rate); ++ } ++ ++ clk_data->clock_val = ndata->new_rate; ++ } ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++ ++ return NOTIFY_DONE; ++} ++ ++static int gpu_clk_data_init(struct kbase_device *kbdev, ++ void *gpu_clk_handle, unsigned int index) ++{ ++ struct kbase_clk_rate_trace_op_conf *callbacks; ++ struct kbase_clk_data *clk_data; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ int ret = 0; ++ ++ callbacks = get_clk_rate_trace_callbacks(kbdev); ++ ++ if (WARN_ON(!callbacks) || ++ WARN_ON(!gpu_clk_handle) || ++ WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) ++ return -EINVAL; ++ ++ clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); ++ if (!clk_data) { ++ dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); ++ return -ENOMEM; ++ } ++ ++ clk_data->index = (u8)index; ++ clk_data->gpu_clk_handle = gpu_clk_handle; ++ /* Store the initial value of clock */ ++ clk_data->clock_val = ++ callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); ++ ++ { ++ /* At the initialization time, GPU is powered off. */ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, 0); ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++ } ++ ++ clk_data->clk_rtm = clk_rtm; ++ clk_rtm->clks[index] = clk_data; ++ ++ clk_data->clk_rate_change_nb.notifier_call = ++ gpu_clk_rate_change_notifier; ++ ++ if (callbacks->gpu_clk_notifier_register) ++ ret = callbacks->gpu_clk_notifier_register(kbdev, ++ gpu_clk_handle, &clk_data->clk_rate_change_nb); ++ if (ret) { ++ dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); ++ kfree(clk_data); ++ } ++ ++ return ret; ++} ++ ++int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) ++{ ++ struct kbase_clk_rate_trace_op_conf *callbacks; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; ++ int ret = 0; ++ ++ callbacks = get_clk_rate_trace_callbacks(kbdev); ++ ++ spin_lock_init(&clk_rtm->lock); ++ INIT_LIST_HEAD(&clk_rtm->listeners); ++ ++ /* Return early if no callbacks provided for clock rate tracing */ ++ if (!callbacks) { ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); ++ return 0; ++ } ++ ++ clk_rtm->gpu_idle = true; ++ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ void *gpu_clk_handle = ++ callbacks->enumerate_gpu_clk(kbdev, i); ++ ++ if (!gpu_clk_handle) ++ break; ++ ++ ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i); ++ if (ret) ++ goto error; ++ } ++ ++ /* Activate clock rate trace manager if at least one GPU clock was ++ * enumerated. ++ */ ++ if (i) { ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks); ++ } else { ++ dev_info(kbdev->dev, "No clock(s) available for rate tracing"); ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); ++ } ++ ++ return 0; ++ ++error: ++ while (i--) { ++ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( ++ kbdev, clk_rtm->clks[i]->gpu_clk_handle, ++ &clk_rtm->clks[i]->clk_rate_change_nb); ++ kfree(clk_rtm->clks[i]); ++ } ++ ++ return ret; ++} ++ ++void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) ++{ ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; ++ ++ WARN_ON(!list_empty(&clk_rtm->listeners)); ++ ++ if (!clk_rtm->clk_rate_trace_ops) ++ return; ++ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (!clk_rtm->clks[i]) ++ break; ++ ++ if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) ++ clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister ++ (kbdev, clk_rtm->clks[i]->gpu_clk_handle, ++ &clk_rtm->clks[i]->clk_rate_change_nb); ++ kfree(clk_rtm->clks[i]); ++ } ++ ++ WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); ++} ++ ++void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) ++{ ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; ++ unsigned long flags; ++ ++ if (!clk_rtm->clk_rate_trace_ops) ++ return; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ struct kbase_clk_data *clk_data = clk_rtm->clks[i]; ++ ++ if (!clk_data) ++ break; ++ ++ if (unlikely(!clk_data->clock_val)) ++ continue; ++ ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, clk_data->clock_val); ++ } ++ ++ clk_rtm->gpu_idle = false; ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++} ++ ++void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) ++{ ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ unsigned int i; ++ unsigned long flags; ++ ++ if (!clk_rtm->clk_rate_trace_ops) ++ return; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ struct kbase_clk_data *clk_data = clk_rtm->clks[i]; ++ ++ if (!clk_data) ++ break; ++ ++ if (unlikely(!clk_data->clock_val)) ++ continue; ++ ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, clk_data->index, 0); ++ } ++ ++ clk_rtm->gpu_idle = true; ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++} ++ ++void kbase_clk_rate_trace_manager_notify_all( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ u32 clk_index, ++ unsigned long new_rate) ++{ ++ struct kbase_clk_rate_listener *pos; ++ struct kbase_device *kbdev; ++ ++ lockdep_assert_held(&clk_rtm->lock); ++ ++ kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); ++ ++ dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", ++ __func__, clk_index, new_rate, current->pid); ++ ++ /* Raise standard `power/gpu_frequency` ftrace event */ ++ { ++ unsigned long new_rate_khz = new_rate; ++ ++#if BITS_PER_LONG == 64 ++ do_div(new_rate_khz, 1000); ++#elif BITS_PER_LONG == 32 ++ new_rate_khz /= 1000; ++#else ++#error "unsigned long division is not supported for this architecture" ++#endif ++ ++ trace_gpu_frequency(new_rate_khz, clk_index); ++ } ++ ++ /* Notify the listeners. */ ++ list_for_each_entry(pos, &clk_rtm->listeners, node) { ++ pos->notify(pos, clk_index, new_rate); ++ } ++} ++KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +new file mode 100644 +index 0000000..f7ec9d1 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h +@@ -0,0 +1,154 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CLK_RATE_TRACE_MGR_ ++#define _KBASE_CLK_RATE_TRACE_MGR_ ++ ++/* The index of top clock domain in kbase_clk_rate_trace_manager:clks. */ ++#define KBASE_CLOCK_DOMAIN_TOP (0) ++ ++/* The index of shader-cores clock domain in ++ * kbase_clk_rate_trace_manager:clks. ++ */ ++#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1) ++ ++/** ++ * struct kbase_clk_data - Data stored per enumerated GPU clock. ++ * ++ * @clk_rtm: Pointer to clock rate trace manager object. ++ * @gpu_clk_handle: Handle unique to the enumerated GPU clock. ++ * @plat_private: Private data for the platform to store into ++ * @clk_rate_change_nb: notifier block containing the pointer to callback ++ * function that is invoked whenever the rate of ++ * enumerated GPU clock changes. ++ * @clock_val: Current rate of the enumerated GPU clock. ++ * @index: Index at which the GPU clock was enumerated. ++ */ ++struct kbase_clk_data { ++ struct kbase_clk_rate_trace_manager *clk_rtm; ++ void *gpu_clk_handle; ++ void *plat_private; ++ struct notifier_block clk_rate_change_nb; ++ unsigned long clock_val; ++ u8 index; ++}; ++ ++/** ++ * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: 0 if success, or an error code on failure. ++ */ ++int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. ++ * ++ * @kbdev: Device pointer ++ */ ++void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace ++ * manager of GPU becoming active. ++ * ++ * @kbdev: Device pointer ++ */ ++void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev); ++ ++/** ++ * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace ++ * manager of GPU becoming idle. ++ * @kbdev: Device pointer ++ */ ++void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); ++ ++/** ++ * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener. ++ * ++ * @clk_rtm: Clock rate manager instance. ++ * @listener: Listener handle ++ * ++ * kbase_clk_rate_trace_manager:lock must be held by the caller. ++ */ ++static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ struct kbase_clk_rate_listener *listener) ++{ ++ lockdep_assert_held(&clk_rtm->lock); ++ list_add(&listener->node, &clk_rtm->listeners); ++} ++ ++/** ++ * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener. ++ * ++ * @clk_rtm: Clock rate manager instance. ++ * @listener: Listener handle ++ */ ++static inline void kbase_clk_rate_trace_manager_subscribe( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ struct kbase_clk_rate_listener *listener) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ kbase_clk_rate_trace_manager_subscribe_no_lock( ++ clk_rtm, listener); ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++} ++ ++/** ++ * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener. ++ * ++ * @clk_rtm: Clock rate manager instance. ++ * @listener: Listener handle ++ */ ++static inline void kbase_clk_rate_trace_manager_unsubscribe( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ struct kbase_clk_rate_listener *listener) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&clk_rtm->lock, flags); ++ list_del(&listener->node); ++ spin_unlock_irqrestore(&clk_rtm->lock, flags); ++} ++ ++/** ++ * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \ ++ * rate listeners. ++ * ++ * @clk_rtm: Clock rate manager instance. ++ * @clock_index: Clock index. ++ * @new_rate: New clock frequency(Hz) ++ * ++ * kbase_clk_rate_trace_manager:lock must be locked. ++ * This function is exported to be used by clock rate trace test ++ * portal. ++ */ ++void kbase_clk_rate_trace_manager_notify_all( ++ struct kbase_clk_rate_trace_manager *clk_rtm, ++ u32 clock_index, ++ unsigned long new_rate); ++ ++#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +index b05844e..e121b41 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2015,2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,13 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +-#include ++#include + #include "mali_kbase_debug_job_fault.h" + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + /*GPU_CONTROL_REG(r)*/ + static int gpu_control_reg_snapshot[] = { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +index 2806f05..8c31499 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -27,40 +26,53 @@ + #include + #include + #include +-#ifdef CONFIG_DEVFREQ_THERMAL ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + #include + #endif + + #include +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) + #include +-#else /* Linux >= 3.13 */ +-/* In 3.13 the OPP include header file, types, and functions were all +- * renamed. Use the old filename for the include, and define the new names to +- * the old, when an old kernel is detected. +- */ +-#include +-#define dev_pm_opp opp +-#define dev_pm_opp_get_voltage opp_get_voltage +-#define dev_pm_opp_get_opp_count opp_get_opp_count +-#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil +-#define dev_pm_opp_find_freq_floor opp_find_freq_floor +-#endif /* Linux >= 3.13 */ + + /** +- * opp_translate - Translate nominal OPP frequency from devicetree into real +- * frequency and core mask +- * @kbdev: Device pointer +- * @freq: Nominal frequency +- * @core_mask: Pointer to u64 to store core mask to +- * @freqs: Pointer to array of frequencies +- * @volts: Pointer to array of voltages ++ * get_voltage() - Get the voltage value corresponding to the nominal frequency ++ * used by devfreq. ++ * @kbdev: Device pointer ++ * @freq: Nominal frequency in Hz passed by devfreq. ++ * ++ * This function will be called only when the opp table which is compatible with ++ * "operating-points-v2-mali", is not present in the devicetree for GPU device. + * +- * This function will only perform translation if an operating-points-v2-mali +- * table is present in devicetree. If one is not present then it will return an +- * untranslated frequency and all cores enabled. ++ * Return: Voltage value in milli volts, 0 in case of error. + */ +-static void opp_translate(struct kbase_device *kbdev, unsigned long freq, ++static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) ++{ ++ struct dev_pm_opp *opp; ++ unsigned long voltage = 0; ++ ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_lock(); ++#endif ++ ++ opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); ++ ++ if (IS_ERR_OR_NULL(opp)) ++ dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); ++ else { ++ voltage = dev_pm_opp_get_voltage(opp); ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE ++ dev_pm_opp_put(opp); ++#endif ++ } ++ ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE ++ rcu_read_unlock(); ++#endif ++ ++ /* Return the voltage in milli volts */ ++ return voltage / 1000; ++} ++ ++void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, + u64 *core_mask, unsigned long *freqs, unsigned long *volts) + { + unsigned int i; +@@ -82,12 +94,17 @@ static void opp_translate(struct kbase_device *kbdev, unsigned long freq, + } + + /* If failed to find OPP, return all cores enabled +- * and nominal frequency ++ * and nominal frequency and the corresponding voltage. + */ + if (i == kbdev->num_opps) { ++ unsigned long voltage = get_voltage(kbdev, freq); ++ + *core_mask = kbdev->gpu_props.props.raw_props.shader_present; +- for (i = 0; i < kbdev->nr_clocks; i++) ++ ++ for (i = 0; i < kbdev->nr_clocks; i++) { + freqs[i] = freq; ++ volts[i] = voltage; ++ } + } + } + +@@ -104,18 +121,18 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) + + nominal_freq = *target_freq; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_lock(); + #endif + opp = devfreq_recommended_opp(dev, &nominal_freq, flags); +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); + #endif + if (IS_ERR_OR_NULL(opp)) { + dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE + dev_pm_opp_put(opp); + #endif + +@@ -127,9 +144,10 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) + return 0; + } + +- opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); ++ kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, ++ freqs, volts); + +-#ifdef CONFIG_REGULATOR ++#if IS_ENABLED(CONFIG_REGULATOR) + /* Regulators and clocks work in pairs: every clock has a regulator, + * and we never expect to have more regulators than clocks. + * +@@ -177,7 +195,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) + } + } + +-#ifdef CONFIG_REGULATOR ++#if IS_ENABLED(CONFIG_REGULATOR) + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->regulators[i] && + kbdev->current_voltages[i] != volts[i] && +@@ -238,6 +256,10 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) + stat->current_frequency = kbdev->current_nominal_freq; + stat->private_data = NULL; + ++#if MALI_USE_CSF && defined CONFIG_DEVFREQ_THERMAL ++ kbase_ipa_reset_data(kbdev); ++#endif ++ + return 0; + } + +@@ -249,11 +271,11 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, + unsigned long freq; + struct dev_pm_opp *opp; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_lock(); + #endif + count = dev_pm_opp_get_opp_count(kbdev->dev); +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); + #endif + if (count < 0) +@@ -264,20 +286,20 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, + if (!dp->freq_table) + return -ENOMEM; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_lock(); + #endif + for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { + opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); + if (IS_ERR(opp)) + break; +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE + dev_pm_opp_put(opp); +-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */ ++#endif /* KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE */ + + dp->freq_table[i] = freq; + } +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); + #endif + +@@ -309,18 +331,21 @@ static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) + struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; + + kfree(dp->freq_table); ++ dp->freq_table = NULL; + } + + static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) + { + kfree(kbdev->devfreq_table); ++ kbdev->devfreq_table = NULL; + } + + static void kbase_devfreq_exit(struct device *dev) + { + struct kbase_device *kbdev = dev_get_drvdata(dev); + +- kbase_devfreq_term_freq_table(kbdev); ++ if (kbdev) ++ kbase_devfreq_term_freq_table(kbdev); + } + + static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, +@@ -359,7 +384,7 @@ static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, + + static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) + { +-#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) ++#ifndef CONFIG_OF + /* OPP table initialization requires at least the capability to get + * regulators and clocks from the device tree, as well as parsing + * arrays of unsigned integer values. +@@ -392,7 +417,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) + u64 core_mask, opp_freq, + real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + int err; +-#ifdef CONFIG_REGULATOR ++#if IS_ENABLED(CONFIG_REGULATOR) + u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; + #endif + +@@ -420,7 +445,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) + err); + continue; + } +-#ifdef CONFIG_REGULATOR ++#if IS_ENABLED(CONFIG_REGULATOR) + err = of_property_read_u32_array(node, + "opp-microvolt", opp_volts, kbdev->nr_regulators); + if (err < 0) { +@@ -474,7 +499,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) + kbdev->devfreq_table[i].real_freqs[j] = + real_freqs[j]; + } +-#ifdef CONFIG_REGULATOR ++#if IS_ENABLED(CONFIG_REGULATOR) + if (kbdev->nr_regulators > 0) { + int j; + +@@ -493,11 +518,9 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) + kbdev->num_opps = i; + + return 0; +-#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ ++#endif /* CONFIG_OF */ + } + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) +- + static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type) + { + const char *p; +@@ -554,27 +577,26 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) + } + } + +-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */ +- + void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, + enum kbase_devfreq_work_type work_type) + { +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + unsigned long flags; + + WARN_ON(work_type == DEVFREQ_WORK_NONE); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- kbdev->devfreq_queue.req_type = work_type; +- queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work); ++ /* Skip enqueuing a work if workqueue has already been terminated. */ ++ if (likely(kbdev->devfreq_queue.workq)) { ++ kbdev->devfreq_queue.req_type = work_type; ++ queue_work(kbdev->devfreq_queue.workq, ++ &kbdev->devfreq_queue.work); ++ } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", + kbase_devfreq_req_type_name(work_type)); +-#endif + } + + static int kbase_devfreq_work_init(struct kbase_device *kbdev) + { +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE; + kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME; + +@@ -584,17 +606,23 @@ static int kbase_devfreq_work_init(struct kbase_device *kbdev) + + INIT_WORK(&kbdev->devfreq_queue.work, + kbase_devfreq_suspend_resume_worker); +-#endif + return 0; + } + + static void kbase_devfreq_work_term(struct kbase_device *kbdev) + { +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) +- destroy_workqueue(kbdev->devfreq_queue.workq); +-#endif ++ unsigned long flags; ++ struct workqueue_struct *workq; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ workq = kbdev->devfreq_queue.workq; ++ kbdev->devfreq_queue.workq = NULL; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ destroy_workqueue(workq); + } + ++ + int kbase_devfreq_init(struct kbase_device *kbdev) + { + struct devfreq_dev_profile *dp; +@@ -631,19 +659,11 @@ int kbase_devfreq_init(struct kbase_device *kbdev) + /* Record the maximum frequency possible */ + kbdev->gpu_props.props.core_props.gpu_freq_khz_max = + dp->freq_table[0] / 1000; +- }; +- +- err = kbase_devfreq_init_core_mask_table(kbdev); +- if (err) { +- kbase_devfreq_term_freq_table(kbdev); +- return err; + } + +- /* Initialise devfreq suspend/resume workqueue */ +- err = kbase_devfreq_work_init(kbdev); ++ err = kbase_devfreq_init_core_mask_table(kbdev); + if (err) { + kbase_devfreq_term_freq_table(kbdev); +- dev_err(kbdev->dev, "Devfreq initialization failed"); + return err; + } + +@@ -651,13 +671,27 @@ int kbase_devfreq_init(struct kbase_device *kbdev) + "simple_ondemand", NULL); + if (IS_ERR(kbdev->devfreq)) { + err = PTR_ERR(kbdev->devfreq); +- kbase_devfreq_work_term(kbdev); ++ kbdev->devfreq = NULL; ++ kbase_devfreq_term_core_mask_table(kbdev); + kbase_devfreq_term_freq_table(kbdev); ++ dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err); ++ return err; ++ } ++ ++ /* Initialize devfreq suspend/resume workqueue */ ++ err = kbase_devfreq_work_init(kbdev); ++ if (err) { ++ if (devfreq_remove_device(kbdev->devfreq)) ++ dev_err(kbdev->dev, "Fail to rm devfreq\n"); ++ kbdev->devfreq = NULL; ++ kbase_devfreq_term_core_mask_table(kbdev); ++ dev_err(kbdev->dev, "Fail to init devfreq workqueue\n"); + return err; + } + + /* devfreq_add_device only copies a few of kbdev->dev's fields, so +- * set drvdata explicitly so IPA models can access kbdev. */ ++ * set drvdata explicitly so IPA models can access kbdev. ++ */ + dev_set_drvdata(&kbdev->devfreq->dev, kbdev); + + err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); +@@ -667,11 +701,11 @@ int kbase_devfreq_init(struct kbase_device *kbdev) + goto opp_notifier_failed; + } + +-#ifdef CONFIG_DEVFREQ_THERMAL ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + err = kbase_ipa_init(kbdev); + if (err) { + dev_err(kbdev->dev, "IPA initialization failed\n"); +- goto cooling_failed; ++ goto ipa_init_failed; + } + + kbdev->devfreq_cooling = of_devfreq_cooling_register_power( +@@ -683,23 +717,28 @@ int kbase_devfreq_init(struct kbase_device *kbdev) + dev_err(kbdev->dev, + "Failed to register cooling device (%d)\n", + err); +- goto cooling_failed; ++ goto cooling_reg_failed; + } + #endif + + return 0; + +-#ifdef CONFIG_DEVFREQ_THERMAL +-cooling_failed: ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) ++cooling_reg_failed: ++ kbase_ipa_term(kbdev); ++ipa_init_failed: + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + #endif /* CONFIG_DEVFREQ_THERMAL */ ++ + opp_notifier_failed: ++ kbase_devfreq_work_term(kbdev); ++ + if (devfreq_remove_device(kbdev->devfreq)) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); +- else +- kbdev->devfreq = NULL; + +- kbase_devfreq_work_term(kbdev); ++ kbdev->devfreq = NULL; ++ ++ kbase_devfreq_term_core_mask_table(kbdev); + + return err; + } +@@ -710,7 +749,7 @@ void kbase_devfreq_term(struct kbase_device *kbdev) + + dev_dbg(kbdev->dev, "Term Mali devfreq\n"); + +-#ifdef CONFIG_DEVFREQ_THERMAL ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + if (kbdev->devfreq_cooling) + devfreq_cooling_unregister(kbdev->devfreq_cooling); + +@@ -719,6 +758,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) + + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + ++ kbase_devfreq_work_term(kbdev); ++ + err = devfreq_remove_device(kbdev->devfreq); + if (err) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); +@@ -726,6 +767,4 @@ void kbase_devfreq_term(struct kbase_device *kbdev) + kbdev->devfreq = NULL; + + kbase_devfreq_term_core_mask_table(kbdev); +- +- kbase_devfreq_work_term(kbdev); + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h +index 8c976b2..901827e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _BASE_DEVFREQ_H_ +@@ -43,4 +42,20 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); + void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, + enum kbase_devfreq_work_type work_type); + ++/** ++ * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree ++ * into real frequency & voltage pair, along with ++ * core mask ++ * @kbdev: Device pointer ++ * @freq: Nominal frequency ++ * @core_mask: Pointer to u64 to store core mask to ++ * @freqs: Pointer to array of frequencies ++ * @volts: Pointer to array of voltages ++ * ++ * This function will only perform translation if an operating-points-v2-mali ++ * table is present in devicetree. If one is not present then it will return an ++ * untranslated frequency (and corresponding voltage) and all cores enabled. ++ */ ++void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, ++ u64 *core_mask, unsigned long *freqs, unsigned long *volts); + #endif /* _BASE_DEVFREQ_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +deleted file mode 100755 +index 5943e4e..0000000 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c ++++ /dev/null +@@ -1,388 +0,0 @@ +-/* +- * +- * (C) COPYRIGHT 2014-2016, 2018-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- */ +- +- +-/* +- * +- */ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#if !defined(CONFIG_MALI_NO_MALI) +- +-#ifdef CONFIG_DEBUG_FS +- +-int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +-{ +- struct kbase_io_access *old_buf; +- struct kbase_io_access *new_buf; +- unsigned long flags; +- +- if (!new_size) +- goto out_err; /* The new size must not be 0 */ +- +- new_buf = vmalloc(new_size * sizeof(*h->buf)); +- if (!new_buf) +- goto out_err; +- +- spin_lock_irqsave(&h->lock, flags); +- +- old_buf = h->buf; +- +- /* Note: we won't bother with copying the old data over. The dumping +- * logic wouldn't work properly as it relies on 'count' both as a +- * counter and as an index to the buffer which would have changed with +- * the new array. This is a corner case that we don't need to support. +- */ +- h->count = 0; +- h->size = new_size; +- h->buf = new_buf; +- +- spin_unlock_irqrestore(&h->lock, flags); +- +- vfree(old_buf); +- +- return 0; +- +-out_err: +- return -1; +-} +- +- +-int kbase_io_history_init(struct kbase_io_history *h, u16 n) +-{ +- h->enabled = false; +- spin_lock_init(&h->lock); +- h->count = 0; +- h->size = 0; +- h->buf = NULL; +- if (kbase_io_history_resize(h, n)) +- return -1; +- +- return 0; +-} +- +- +-void kbase_io_history_term(struct kbase_io_history *h) +-{ +- vfree(h->buf); +- h->buf = NULL; +-} +- +- +-/* kbase_io_history_add - add new entry to the register access history +- * +- * @h: Pointer to the history data structure +- * @addr: Register address +- * @value: The value that is either read from or written to the register +- * @write: 1 if it's a register write, 0 if it's a read +- */ +-static void kbase_io_history_add(struct kbase_io_history *h, +- void __iomem const *addr, u32 value, u8 write) +-{ +- struct kbase_io_access *io; +- unsigned long flags; +- +- spin_lock_irqsave(&h->lock, flags); +- +- io = &h->buf[h->count % h->size]; +- io->addr = (uintptr_t)addr | write; +- io->value = value; +- ++h->count; +- /* If count overflows, move the index by the buffer size so the entire +- * buffer will still be dumped later */ +- if (unlikely(!h->count)) +- h->count = h->size; +- +- spin_unlock_irqrestore(&h->lock, flags); +-} +- +- +-void kbase_io_history_dump(struct kbase_device *kbdev) +-{ +- struct kbase_io_history *const h = &kbdev->io_history; +- u16 i; +- size_t iters; +- unsigned long flags; +- +- if (!unlikely(h->enabled)) +- return; +- +- spin_lock_irqsave(&h->lock, flags); +- +- dev_err(kbdev->dev, "Register IO History:"); +- iters = (h->size > h->count) ? h->count : h->size; +- dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, +- h->count); +- for (i = 0; i < iters; ++i) { +- struct kbase_io_access *io = +- &h->buf[(h->count - iters + i) % h->size]; +- char const access = (io->addr & 1) ? 'w' : 'r'; +- +- dev_err(kbdev->dev, "%6i: %c: reg 0x%016lx val %08x\n", i, +- access, (unsigned long)(io->addr & ~0x1), io->value); +- } +- +- spin_unlock_irqrestore(&h->lock, flags); +-} +- +- +-#endif /* CONFIG_DEBUG_FS */ +- +- +-void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +-{ +- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); +- KBASE_DEBUG_ASSERT(kbdev->dev != NULL); +- +- writel(value, kbdev->reg + offset); +- +-#ifdef CONFIG_DEBUG_FS +- if (unlikely(kbdev->io_history.enabled)) +- kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, +- value, 1); +-#endif /* CONFIG_DEBUG_FS */ +- dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); +-} +- +-KBASE_EXPORT_TEST_API(kbase_reg_write); +- +-u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +-{ +- u32 val; +- KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); +- KBASE_DEBUG_ASSERT(kbdev->dev != NULL); +- +- val = readl(kbdev->reg + offset); +- +-#ifdef CONFIG_DEBUG_FS +- if (unlikely(kbdev->io_history.enabled)) +- kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, +- val, 0); +-#endif /* CONFIG_DEBUG_FS */ +- dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); +- +- return val; +-} +- +-KBASE_EXPORT_TEST_API(kbase_reg_read); +- +-bool kbase_is_gpu_lost(struct kbase_device *kbdev) +-{ +- u32 val; +- +- val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); +- +- return val == 0; +-} +-#endif /* !defined(CONFIG_MALI_NO_MALI) */ +- +-/** +- * kbase_report_gpu_fault - Report a GPU fault. +- * @kbdev: Kbase device pointer +- * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS +- * was also set +- * +- * This function is called from the interrupt handler when a GPU fault occurs. +- * It reports the details of the fault using dev_warn(). +- */ +-static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) +-{ +- u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); +- u64 address = (u64) kbase_reg_read(kbdev, +- GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; +- +- address |= kbase_reg_read(kbdev, +- GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); +- +- meson_gpu_fault ++; +- dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", +- status, +- kbase_gpu_exception_name(status & 0xFF), +- address); +- if (multiple) +- dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); +-} +- +-static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev, int multiple) +-{ +- kbase_report_gpu_fault(kbdev, multiple); +- return false; +-} +- +-void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) +-{ +- u32 irq_mask; +- +- lockdep_assert_held(&kbdev->hwaccess_lock); +- +- if (kbdev->cache_clean_in_progress) { +- /* If this is called while another clean is in progress, we +- * can't rely on the current one to flush any new changes in +- * the cache. Instead, trigger another cache clean immediately +- * after this one finishes. +- */ +- kbdev->cache_clean_queued = true; +- return; +- } +- +- /* Enable interrupt */ +- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); +- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), +- irq_mask | CLEAN_CACHES_COMPLETED); +- +- KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); +- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), +- GPU_COMMAND_CLEAN_INV_CACHES); +- +- kbdev->cache_clean_in_progress = true; +-} +- +-void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) +-{ +- unsigned long flags; +- +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- kbase_gpu_start_cache_clean_nolock(kbdev); +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +-} +- +-void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) +-{ +- lockdep_assert_held(&kbdev->hwaccess_lock); +- +- kbdev->cache_clean_queued = false; +- kbdev->cache_clean_in_progress = false; +- wake_up(&kbdev->cache_clean_wait); +-} +- +-static void kbase_clean_caches_done(struct kbase_device *kbdev) +-{ +- u32 irq_mask; +- unsigned long flags; +- +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- +- if (kbdev->cache_clean_queued) { +- kbdev->cache_clean_queued = false; +- +- KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); +- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), +- GPU_COMMAND_CLEAN_INV_CACHES); +- } else { +- /* Disable interrupt */ +- irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); +- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), +- irq_mask & ~CLEAN_CACHES_COMPLETED); +- +- kbase_gpu_cache_clean_wait_complete(kbdev); +- } +- +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +-} +- +-static inline bool get_cache_clean_flag(struct kbase_device *kbdev) +-{ +- bool cache_clean_in_progress; +- unsigned long flags; +- +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- cache_clean_in_progress = kbdev->cache_clean_in_progress; +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +- +- return cache_clean_in_progress; +-} +- +-void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +-{ +- while (get_cache_clean_flag(kbdev)) { +- wait_event_interruptible(kbdev->cache_clean_wait, +- !kbdev->cache_clean_in_progress); +- } +-} +- +-int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, +- unsigned int wait_timeout_ms) +-{ +- long remaining = msecs_to_jiffies(wait_timeout_ms); +- +- while (remaining && get_cache_clean_flag(kbdev)) { +- remaining = wait_event_timeout(kbdev->cache_clean_wait, +- !kbdev->cache_clean_in_progress, +- remaining); +- } +- +- return (remaining ? 0 : -ETIMEDOUT); +-} +- +-void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +-{ +- bool clear_gpu_fault = false; +- +- KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); +- if (val & GPU_FAULT) +- clear_gpu_fault = kbase_gpu_fault_interrupt(kbdev, +- val & MULTIPLE_GPU_FAULTS); +- +- if (val & RESET_COMPLETED) +- kbase_pm_reset_done(kbdev); +- +- if (val & PRFCNT_SAMPLE_COMPLETED) +- kbase_instr_hwcnt_sample_done(kbdev); +- +- KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); +- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); +- +- /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must +- * be called after the IRQ has been cleared. This is because it might +- * trigger further power transitions and we don't want to miss the +- * interrupt raised to notify us that these further transitions have +- * finished. The same applies to kbase_clean_caches_done() - if another +- * clean was queued, it might trigger another clean, which might +- * generate another interrupt which shouldn't be missed. +- */ +- +- if (val & CLEAN_CACHES_COMPLETED) +- kbase_clean_caches_done(kbdev); +- +- if (val & POWER_CHANGED_ALL) { +- kbase_pm_power_changed(kbdev); +- } else if (val & CLEAN_CACHES_COMPLETED) { +- /* If cache line evict messages can be lost when shader cores +- * power down then we need to flush the L2 cache before powering +- * down cores. When the flush completes, the shaders' state +- * machine needs to be re-invoked to proceed with powering down +- * cores. +- */ +- if (kbdev->pm.backend.l2_always_on || +- kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) +- kbase_pm_power_changed(kbdev); +- } +- +- +- KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); +-} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h +deleted file mode 100644 +index 2e1d011..0000000 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h ++++ /dev/null +@@ -1,127 +0,0 @@ +-/* +- * +- * (C) COPYRIGHT 2014,2019-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- */ +- +- +- +-/* +- * Backend-specific HW access device APIs +- */ +- +-#ifndef _KBASE_DEVICE_INTERNAL_H_ +-#define _KBASE_DEVICE_INTERNAL_H_ +- +-/** +- * kbase_reg_write - write to GPU register +- * @kbdev: Kbase device pointer +- * @offset: Offset of register +- * @value: Value to write +- * +- * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). +- */ +-void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); +- +-/** +- * kbase_reg_read - read from GPU register +- * @kbdev: Kbase device pointer +- * @offset: Offset of register +- * +- * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). +- * +- * Return: Value in desired register +- */ +-u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); +- +-/** +- * kbase_is_gpu_lost() - Has the GPU been lost. +- * @kbdev: Kbase device pointer +- * +- * This function will return true if the GPU has been lost. +- * When this happens register reads will be zero. A zero GPU_ID is +- * invalid so this is used to detect GPU_LOST +- * +- * Return: True if GPU LOST +- */ +-bool kbase_is_gpu_lost(struct kbase_device *kbdev); +- +-/** +- * kbase_gpu_start_cache_clean - Start a cache clean +- * @kbdev: Kbase device +- * +- * Issue a cache clean and invalidate command to hardware. This function will +- * take hwaccess_lock. +- */ +-void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); +- +-/** +- * kbase_gpu_start_cache_clean_nolock - Start a cache clean +- * @kbdev: Kbase device +- * +- * Issue a cache clean and invalidate command to hardware. hwaccess_lock +- * must be held by the caller. +- */ +-void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); +- +-/** +- * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish +- * @kbdev: Kbase device +- * +- * This function will take hwaccess_lock, and may sleep. +- */ +-void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); +- +-/** +- * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache +- * cleaning to finish +- * @kbdev: Kbase device +- * @wait_timeout_ms: Time, in milli seconds, to wait for cache clean to complete. +- * +- * This function will take hwaccess_lock, and may sleep. This is supposed to be +- * called from paths (like GPU reset) where an indefinite wait for the completion +- * of cache clean operation can cause deadlock, as the operation may never +- * complete. +- * +- * Return: 0 if successful or a negative error code on failure. +- */ +-int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, +- unsigned int wait_timeout_ms); +- +-/** +- * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is +- * finished. Would also be called after +- * the GPU reset. +- * @kbdev: Kbase device +- * +- * Caller must hold the hwaccess_lock. +- */ +-void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); +- +-/** +- * kbase_gpu_interrupt - GPU interrupt handler +- * @kbdev: Kbase device pointer +- * @val: The value of the GPU IRQ status register which triggered the call +- * +- * This function is called from the interrupt handler when a GPU irq is to be +- * handled. +- */ +-void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); +- +-#endif /* _KBASE_DEVICE_INTERNAL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +index 352afa1..11088db 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -26,7 +24,7 @@ + */ + + #include +-#include ++#include + #include + #include + +@@ -41,8 +39,19 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + + registers.l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES)); ++ registers.core_features = 0; ++#if !MALI_USE_CSF ++ /* TGOx */ + registers.core_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CORE_FEATURES)); ++#else /* !MALI_USE_CSF */ ++ if (((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == ++ GPU_ID2_PRODUCT_TGRX) || ++ ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == ++ GPU_ID2_PRODUCT_TVAX)) ++ registers.core_features = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); ++#endif /* MALI_USE_CSF */ + registers.tiler_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_FEATURES)); + registers.mem_features = kbase_reg_read(kbdev, +@@ -51,12 +60,20 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + GPU_CONTROL_REG(MMU_FEATURES)); + registers.as_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(AS_PRESENT)); ++#if !MALI_USE_CSF + registers.js_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_PRESENT)); ++#else /* !MALI_USE_CSF */ ++ registers.js_present = 0; ++#endif /* !MALI_USE_CSF */ + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) ++#if !MALI_USE_CSF + registers.js_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_FEATURES_REG(i))); ++#else /* !MALI_USE_CSF */ ++ registers.js_features[i] = 0; ++#endif /* !MALI_USE_CSF */ + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + registers.texture_features[i] = kbase_reg_read(kbdev, +@@ -93,13 +110,49 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + registers.stack_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_HI)); + +- if (!kbase_is_gpu_lost(kbdev)) { ++ if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) { ++ registers.gpu_features_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FEATURES_LO)); ++ registers.gpu_features_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FEATURES_HI)); ++ } else { ++ registers.gpu_features_lo = 0; ++ registers.gpu_features_hi = 0; ++ } ++ ++ if (!kbase_is_gpu_removed(kbdev)) { + *regdump = registers; + return 0; + } else + return -EIO; + } + ++int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, ++ struct kbase_current_config_regdump *curr_config_regdump) ++{ ++ if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) ++ return -EINVAL; ++ ++ curr_config_regdump->mem_features = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(MEM_FEATURES)); ++ ++ curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_LO)); ++ curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(SHADER_PRESENT_HI)); ++ ++ curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_LO)); ++ curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(L2_PRESENT_HI)); ++ ++ if (WARN_ON(kbase_is_gpu_removed(kbdev))) ++ return -EIO; ++ ++ return 0; ++ ++} ++ + int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) + { +@@ -112,7 +165,7 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES)); + +- if (kbase_is_gpu_lost(kbdev)) ++ if (kbase_is_gpu_removed(kbdev)) + return -EIO; + + regdump->coherency_features = coherency_features; +@@ -135,11 +188,15 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + u32 l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES)); ++ u32 l2_config = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); ++ + +- if (kbase_is_gpu_lost(kbdev)) ++ if (kbase_is_gpu_removed(kbdev)) + return -EIO; + + regdump->l2_features = l2_features; ++ regdump->l2_config = l2_config; + } + + return 0; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +index 8b320c7..d7edf30 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * GPU backend instrumentation APIs. + */ +@@ -29,7 +26,7 @@ + #include + #include + #include +-#include ++#include + #include + + +@@ -71,12 +68,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + + /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +- if (kbdev->hwcnt.backend.use_secondary_override) ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ prfcnt_config |= kbdev->hwcnt.backend.override_counter_set ++ << PRFCNT_CONFIG_SETSELECT_SHIFT; + #else +- if (enable->use_secondary) ++ prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; + #endif +- prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_OFF); +@@ -87,7 +84,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + enable->dump_buffer >> 32); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), +- enable->jm_bm); ++ enable->fe_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + enable->shader_bm); +@@ -110,7 +107,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + + err = 0; + +- dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); ++ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); + return err; + out_err: + return err; +@@ -170,11 +167,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + +- dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", ++ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", + kctx); + + err = 0; +- + out: + return err; + } +@@ -194,7 +190,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { + /* HW counters are disabled or another dump is ongoing, or we're +- * resetting */ ++ * resetting ++ */ + goto unlock; + } + +@@ -204,7 +201,6 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + +- + /* Reconfigure the dump address */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + kbdev->hwcnt.addr & 0xFFFFFFFF); +@@ -218,14 +214,13 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_SAMPLE); + +- dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); ++ dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); + + err = 0; + + unlock: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + +- + return err; + } + KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); +@@ -254,40 +249,6 @@ bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + } + KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + +-void kbasep_cache_clean_worker(struct work_struct *data) +-{ +- struct kbase_device *kbdev; +- unsigned long flags, pm_flags; +- +- kbdev = container_of(data, struct kbase_device, +- hwcnt.backend.cache_clean_work); +- +- spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); +- spin_lock_irqsave(&kbdev->hwcnt.lock, flags); +- +- /* Clean and invalidate the caches so we're sure the mmu tables for the +- * dump buffer is valid. +- */ +- KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == +- KBASE_INSTR_STATE_REQUEST_CLEAN); +- kbase_gpu_start_cache_clean_nolock(kbdev); +- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); +- +- kbase_gpu_wait_cache_clean(kbdev); +- +- spin_lock_irqsave(&kbdev->hwcnt.lock, flags); +- KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == +- KBASE_INSTR_STATE_REQUEST_CLEAN); +- /* All finished and idle */ +- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; +- kbdev->hwcnt.backend.triggered = 1; +- wake_up(&kbdev->hwcnt.backend.wait); +- +- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +-} +- +- + void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) + { + unsigned long flags; +@@ -298,20 +259,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { +- if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { +- /* All finished and idle */ +- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; +- kbdev->hwcnt.backend.triggered = 1; +- wake_up(&kbdev->hwcnt.backend.wait); +- } else { +- int ret; +- /* Always clean and invalidate the cache after a successful dump +- */ +- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; +- ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, +- &kbdev->hwcnt.backend.cache_clean_work); +- KBASE_DEBUG_ASSERT(ret); +- } ++ /* All finished and idle */ ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +@@ -353,7 +304,8 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* Check it's the context previously set up and we're not already +- * dumping */ ++ * dumping ++ */ + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_IDLE) + goto out; +@@ -373,39 +325,45 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + + int kbase_instr_backend_init(struct kbase_device *kbdev) + { +- int ret = 0; ++ spin_lock_init(&kbdev->hwcnt.lock); + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + + init_waitqueue_head(&kbdev->hwcnt.backend.wait); +- INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, +- kbasep_cache_clean_worker); +- + + kbdev->hwcnt.backend.triggered = 0; + +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +- kbdev->hwcnt.backend.use_secondary_override = false; ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++/* Use the build time option for the override default. */ ++#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) ++ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY; + #endif +- +- kbdev->hwcnt.backend.cache_clean_wq = +- alloc_workqueue("Mali cache cleaning workqueue", 0, 1); +- if (NULL == kbdev->hwcnt.backend.cache_clean_wq) +- ret = -EINVAL; +- +- return ret; ++#endif ++ return 0; + } + + void kbase_instr_backend_term(struct kbase_device *kbdev) + { +- destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); ++ CSTD_UNUSED(kbdev); + } + +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) + { +- debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR, +- kbdev->mali_debugfs_directory, +- &kbdev->hwcnt.backend.use_secondary_override); ++ /* No validation is done on the debugfs input. Invalid input could cause ++ * performance counter errors. This is acceptable since this is a debug ++ * only feature and users should know what they are doing. ++ * ++ * Valid inputs are the values accepted bythe SET_SELECT bits of the ++ * PRFCNT_CONFIG register as defined in the architecture specification. ++ */ ++ debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR, ++ kbdev->mali_debugfs_directory, ++ (u8 *)&kbdev->hwcnt.backend.override_counter_set); + } + #endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +index 9930968..e356348 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -27,6 +26,8 @@ + #ifndef _KBASE_INSTR_DEFS_H_ + #define _KBASE_INSTR_DEFS_H_ + ++#include ++ + /* + * Instrumentation State Machine States + */ +@@ -37,8 +38,6 @@ enum kbase_instr_state { + KBASE_INSTR_STATE_IDLE, + /* Hardware is currently dumping a frame. */ + KBASE_INSTR_STATE_DUMPING, +- /* We've requested a clean to occur on a workqueue */ +- KBASE_INSTR_STATE_REQUEST_CLEAN, + /* An error has occured during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT + }; +@@ -47,14 +46,11 @@ enum kbase_instr_state { + struct kbase_instr_backend { + wait_queue_head_t wait; + int triggered; +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +- bool use_secondary_override; ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS ++ enum kbase_hwcnt_physical_set override_counter_set; + #endif + + enum kbase_instr_state state; +- struct workqueue_struct *cache_clean_wq; +- struct work_struct cache_clean_work; + }; + + #endif /* _KBASE_INSTR_DEFS_H_ */ +- +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h +index 2254b9f..332cc69 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Backend-specific HW access instrumentation APIs + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h +index ca3c048..2671ce5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +index 8696c6a..a29f7ef 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,17 +17,15 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +-#include ++#include + #include + + #include + +-#if !defined(CONFIG_MALI_NO_MALI) ++#if !IS_ENABLED(CONFIG_MALI_NO_MALI) + + /* GPU IRQ Tags */ + #define JOB_IRQ_TAG 0 +@@ -72,7 +71,12 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + ++#if MALI_USE_CSF ++ /* call the csf interrupt handler */ ++ kbase_csf_interrupt(kbdev, val); ++#else + kbase_job_done(kbdev, val); ++#endif + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +@@ -210,24 +214,25 @@ int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + int result = 0; + irq_handler_t requested_irq_handler = NULL; + +- KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && +- (GPU_IRQ_HANDLER >= irq_type)); ++ KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && ++ (irq_type <= GPU_IRQ_HANDLER)); + + /* Release previous handler */ + if (kbdev->irqs[irq_type].irq) + free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + +- requested_irq_handler = (NULL != custom_handler) ? custom_handler : +- kbase_handler_table[irq_type]; ++ requested_irq_handler = (custom_handler != NULL) ? ++ custom_handler : ++ kbase_handler_table[irq_type]; + +- if (0 != request_irq(kbdev->irqs[irq_type].irq, +- requested_irq_handler, ++ if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, + kbdev->irqs[irq_type].flags | IRQF_SHARED, +- dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { ++ dev_name(kbdev->dev), ++ kbase_tag(kbdev, irq_type)) != 0) { + result = -EINVAL; + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[irq_type].irq, irq_type); +-#ifdef CONFIG_SPARSE_IRQ ++#if IS_ENABLED(CONFIG_SPARSE_IRQ) + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); + #endif /* CONFIG_SPARSE_IRQ */ + } +@@ -456,7 +461,7 @@ int kbase_install_interrupts(struct kbase_device *kbdev) + if (err) { + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[i].irq, i); +-#ifdef CONFIG_SPARSE_IRQ ++#if IS_ENABLED(CONFIG_SPARSE_IRQ) + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); + #endif /* CONFIG_SPARSE_IRQ */ + goto release; +@@ -496,4 +501,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) + + KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); + +-#endif /* !defined(CONFIG_MALI_NO_MALI) */ ++#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +index bb4f548..888aa59 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Register backend context / address space management + */ +@@ -58,8 +56,10 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + ++#if !MALI_USE_CSF + /* Attribute handling */ + kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); ++#endif + + /* Allow it to run jobs */ + kbasep_js_set_submit_allowed(js_devdata, kctx); +@@ -188,8 +188,8 @@ int kbase_backend_find_and_release_free_address_space( + } + + /* Context was retained while locks were dropped, +- * continue looking for free AS */ +- ++ * continue looking for free AS ++ */ + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +index 7cda61a..e29ace7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Register-based HW access backend specific definitions + */ +@@ -78,11 +76,12 @@ struct slot_rb { + * The hwaccess_lock (a spinlock) must be held when accessing this structure + */ + struct kbase_backend_data { ++#if !MALI_USE_CSF + struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; +- + struct hrtimer scheduling_timer; + + bool timer_running; ++#endif + bool suspend_timer; + + atomic_t reset_gpu; +@@ -92,13 +91,16 @@ struct kbase_backend_data { + /* kbase_prepare_to_reset_gpu has been called */ + #define KBASE_RESET_GPU_PREPARED 1 + /* kbase_reset_gpu has been called - the reset will now definitely happen +- * within the timeout period */ ++ * within the timeout period ++ */ + #define KBASE_RESET_GPU_COMMITTED 2 + /* The GPU reset process is currently occuring (timeout has expired or +- * kbasep_try_reset_gpu_early was called) */ ++ * kbasep_try_reset_gpu_early was called) ++ */ + #define KBASE_RESET_GPU_HAPPENING 3 + /* Reset the GPU silently, used when resetting the GPU as part of normal +- * behavior (e.g. when exiting protected mode). */ ++ * behavior (e.g. when exiting protected mode). ++ */ + #define KBASE_RESET_GPU_SILENT 4 + struct workqueue_struct *reset_workq; + struct work_struct reset_work; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +index fa6bc83..ae0377f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -33,16 +32,20 @@ + #include + #include + #include ++#include + #include +-#include ++#include + #include + #include ++#include + + static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); ++static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, ++ const u64 affinity, const u64 limited_core_mask); + + static u64 kbase_job_write_affinity(struct kbase_device *kbdev, + base_jd_core_req core_req, +- int js) ++ int js, const u64 limited_core_mask) + { + u64 affinity; + +@@ -71,14 +74,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, + */ + if (js == 2 && num_core_groups > 1) + affinity &= coherency_info->group[1].core_mask; +- else ++ else if (num_core_groups > 1) + affinity &= coherency_info->group[0].core_mask; ++ else ++ affinity &= kbdev->gpu_props.curr_config.shader_present; + } else { + /* Use all cores */ + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + } + ++ if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { ++ /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */ ++ affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); ++ } ++ + if (unlikely(!affinity)) { + #ifdef CONFIG_MALI_DEBUG + u64 shaders_ready = +@@ -88,6 +98,16 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, + #endif + + affinity = kbdev->pm.backend.shaders_avail; ++ ++ if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { ++ /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ ++ affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); ++ ++#ifdef CONFIG_MALI_DEBUG ++ /* affinity should never be 0 */ ++ WARN_ON(!affinity); ++#endif ++ } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), +@@ -168,7 +188,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) + } + + dev_dbg(kctx->kbdev->dev, +- "Selected job chain 0x%llx for end atom %p in state %d\n", ++ "Selected job chain 0x%llx for end atom %pK in state %d\n", + jc, (void *)katom, (int)rp->state); + + katom->jc = jc; +@@ -192,7 +212,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, + /* Command register must be available */ + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + +- dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", ++ dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", + jc_head, (void *)katom); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), +@@ -200,10 +220,12 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), + jc_head >> 32); + +- affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); ++ affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, ++ kctx->limited_core_mask); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on +- * start */ ++ * start ++ */ + cfg = kctx->as_nr; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && +@@ -255,7 +277,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, + katom->start_timestamp = ktime_get(); + + /* GO ! */ +- dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", ++ dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", + katom, kctx, js, jc_head); + + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, +@@ -277,7 +299,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, + katom, + &kbdev->gpu_props.props.raw_props.js_features[js], + "ctx_nr,atom_nr"); +-#ifdef CONFIG_GPU_TRACEPOINTS ++ kbase_kinstr_jm_atom_hw_submit(katom); ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { + /* If this is the only job on the slot, trace it as starting */ + char js_string[16]; +@@ -328,7 +351,8 @@ static void kbasep_job_slot_update_head_start_timestamp( + /* Only update the timestamp if it's a better estimate + * than what's currently stored. This is because our + * estimate that accounts for the throttle time may be +- * too much of an overestimate */ ++ * too much of an overestimate ++ */ + katom->start_timestamp = end_timestamp; + } + } +@@ -371,9 +395,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) + /* treat failed slots as finished slots */ + u32 finished = (done & 0xFFFF) | failed; + +- /* Note: This is inherently unfair, as we always check +- * for lower numbered interrupts before the higher +- * numbered ones.*/ ++ /* Note: This is inherently unfair, as we always check for lower ++ * numbered interrupts before the higher numbered ones. ++ */ + i = ffs(finished) - 1; + KBASE_DEBUG_ASSERT(i >= 0); + +@@ -385,7 +409,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) + + if (failed & (1u << i)) { + /* read out the job slot status code if the job +- * slot reported failure */ ++ * slot reported failure ++ */ + completion_code = kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_STATUS)); + +@@ -399,7 +424,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) + + /* Soft-stopped job - read the value of + * JS_TAIL so that the job chain can +- * be resumed */ ++ * be resumed ++ */ + job_tail = (u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_LO)) | + ((u64)kbase_reg_read(kbdev, +@@ -408,21 +434,26 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) + } else if (completion_code == + BASE_JD_EVENT_NOT_STARTED) { + /* PRLAM-10673 can cause a TERMINATED +- * job to come back as NOT_STARTED, but +- * the error interrupt helps us detect +- * it */ ++ * job to come back as NOT_STARTED, ++ * but the error interrupt helps us ++ * detect it ++ */ + completion_code = + BASE_JD_EVENT_TERMINATED; + } + + kbase_gpu_irq_evict(kbdev, i, completion_code); + +- /* Some jobs that encounter a BUS FAULT may result in corrupted +- * state causing future jobs to hang. Reset GPU before +- * allowing any other jobs on the slot to continue. */ ++ /* Some jobs that encounter a BUS FAULT may ++ * result in corrupted state causing future ++ * jobs to hang. Reset GPU before allowing ++ * any other jobs on the slot to continue. ++ */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { + if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { +- if (kbase_prepare_to_reset_gpu_locked(kbdev)) ++ if (kbase_prepare_to_reset_gpu_locked( ++ kbdev, ++ RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } + } +@@ -480,7 +511,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) + + if ((rawstat >> (i + 16)) & 1) { + /* There is a failed job that we've +- * missed - add it back to active */ ++ * missed - add it back to active ++ */ + active |= (1u << i); + } + } +@@ -582,7 +614,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + } + + /* We are about to issue a soft stop, so mark the atom as having +- * been soft stopped */ ++ * been soft stopped ++ */ + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; + + /* Mark the point where we issue the soft-stop command */ +@@ -692,12 +725,40 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) + kbase_job_slot_hardstop(kctx, i, NULL); + } + ++/** ++ * kbase_is_existing_atom_submitted_later_than_ready ++ * @ready: sequence number of the ready atom ++ * @existing: sequence number of the existing atom ++ * ++ * Returns true if the existing atom has been submitted later than the ++ * ready atom. It is used to understand if an atom that is ready has been ++ * submitted earlier than the currently running atom, so that the currently ++ * running atom should be preempted to allow the ready atom to run. ++ */ ++static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing) ++{ ++ /* No seq_nr set? */ ++ if (!ready || !existing) ++ return false; ++ ++ /* Efficiently handle the unlikely case of wrapping. ++ * The following code assumes that the delta between the sequence number ++ * of the two atoms is less than INT64_MAX. ++ * In the extremely unlikely case where the delta is higher, the comparison ++ * defaults for no preemption. ++ * The code also assumes that the conversion from unsigned to signed types ++ * works because the signed integers are 2's complement. ++ */ ++ return (s64)(ready - existing) < 0; ++} ++ + void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *target_katom) + { + struct kbase_device *kbdev; + int js = target_katom->slot_nr; + int priority = target_katom->sched_priority; ++ int seq_nr = target_katom->seq_nr; + int i; + bool stop_sent = false; + +@@ -719,7 +780,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + (katom->kctx != kctx)) + continue; + +- if (katom->sched_priority > priority) { ++ if ((katom->sched_priority > priority) || ++ (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) { + if (!stop_sent) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( + kbdev, +@@ -749,7 +811,7 @@ static int softstop_start_rp_nolock( + + if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { + dev_dbg(kctx->kbdev->dev, +- "Atom %p on job slot is not start RP\n", (void *)katom); ++ "Atom %pK on job slot is not start RP\n", (void *)katom); + return -EPERM; + } + +@@ -762,13 +824,13 @@ static int softstop_start_rp_nolock( + rp->state != KBASE_JD_RP_RETRY)) + return -EINVAL; + +- dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", ++ dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", + (int)rp->state, (void *)reg); + + if (WARN_ON(katom != rp->start_katom)) + return -EINVAL; + +- dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", ++ dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", + (void *)reg, (void *)&rp->oom_reg_list); + list_move_tail(®->link, &rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Added region to list\n"); +@@ -813,9 +875,9 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) + if (timeout != 0) + goto exit; + +- if (kbase_prepare_to_reset_gpu(kbdev)) { ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { + dev_err(kbdev->dev, +- "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ++ "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", + ZAP_TIMEOUT); + kbase_reset_gpu(kbdev); + } +@@ -823,7 +885,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) + /* Wait for the reset to complete */ + kbase_reset_gpu_wait(kbdev); + exit: +- dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); ++ dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx); + + /* Ensure that the signallers of the waitqs have finished */ + mutex_lock(&kctx->jctx.lock); +@@ -884,7 +946,7 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); + void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) + { +- dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", ++ dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", + target_katom, sw_flags, js); + + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); +@@ -988,6 +1050,33 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + } + } + ++int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) ++{ ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++ return -EINVAL; ++} ++ ++int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) ++{ ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++ return -EINVAL; ++} ++ ++void kbase_reset_gpu_allow(struct kbase_device *kbdev) ++{ ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++} ++ ++void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) ++{ ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++} ++ ++void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) ++{ ++ WARN(true, "%s Not implemented for JM GPUs", __func__); ++} ++ + static void kbase_debug_dump_registers(struct kbase_device *kbdev) + { + int i; +@@ -1054,13 +1143,15 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) + + /* Make sure the timer has completed - this cannot be done from + * interrupt context, so this cannot be done within +- * kbasep_try_reset_gpu_early. */ ++ * kbasep_try_reset_gpu_early. ++ */ + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* This would re-activate the GPU. Since it's already idle, +- * there's no need to reset it */ ++ * there's no need to reset it ++ */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + kbase_disjoint_state_down(kbdev); +@@ -1081,14 +1172,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the +- * spinlock; this also clears any outstanding interrupts */ ++ * spinlock; this also clears any outstanding interrupts ++ */ + kbase_pm_disable_interrupts_nolock(kbdev); + + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Ensure that any IRQ handlers have finished +- * Must be done without any locks IRQ handlers will take */ ++ * Must be done without any locks IRQ handlers will take ++ */ + kbase_synchronize_irqs(kbdev); + + /* Flush out any in-flight work items */ +@@ -1099,7 +1192,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + /* Ensure that L2 is not transitioning when we send the reset +- * command */ ++ * command ++ */ + while (--max_loops && kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2)) + ; +@@ -1114,14 +1208,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) + /* All slot have been soft-stopped and we've waited + * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we + * assume that anything that is still left on the GPU is stuck there and +- * we'll kill it when we reset the GPU */ ++ * we'll kill it when we reset the GPU ++ */ + + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + RESET_TIMEOUT); + + /* Output the state of some interesting registers to help in the +- * debugging of GPU resets */ ++ * debugging of GPU resets ++ */ + if (!silent) + kbase_debug_dump_registers(kbdev); + +@@ -1160,7 +1256,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) + kbase_pm_update_cores_state(kbdev); + + /* Synchronously request and wait for those cores, because if +- * instrumentation is enabled it would need them immediately. */ ++ * instrumentation is enabled it would need them immediately. ++ */ + kbase_pm_wait_for_desired_state(kbdev); + + mutex_unlock(&kbdev->pm.lock); +@@ -1237,7 +1334,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) + + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has + * been called), and that no other thread beat this thread to starting +- * the reset */ ++ * the reset ++ */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != + KBASE_RESET_GPU_COMMITTED) { +@@ -1261,6 +1359,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) + /** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU + * @kbdev: kbase device ++ * @flags: Bitfield indicating impact of reset (see flag defines) + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. +@@ -1271,12 +1370,23 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) + * false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, ++ unsigned int flags) + { + int i; + ++ CSTD_UNUSED(flags); + KBASE_DEBUG_ASSERT(kbdev); + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ /* GPU access has been removed, reset will be done by ++ * Arbiter instead ++ */ ++ return false; ++ } ++#endif ++ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != +@@ -1293,14 +1403,14 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) + return true; + } + +-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) + { +- unsigned long flags; ++ unsigned long lock_flags; + bool ret; + +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- ret = kbase_prepare_to_reset_gpu_locked(kbdev); +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags); ++ ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags); + + return ret; + } +@@ -1321,7 +1431,8 @@ void kbase_reset_gpu(struct kbase_device *kbdev) + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for +- * a race to be occuring here */ ++ * a race to be occurring here ++ */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, +@@ -1344,7 +1455,8 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for +- * a race to be occuring here */ ++ * a race to be occurring here ++ */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, +@@ -1419,3 +1531,21 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) + { + destroy_workqueue(kbdev->hwaccess.backend.reset_workq); + } ++ ++static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, ++ const u64 affinity, const u64 limited_core_mask) ++{ ++ const u64 result = affinity & limited_core_mask; ++ ++#ifdef CONFIG_MALI_DEBUG ++ dev_dbg(kbdev->dev, ++ "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", ++ (unsigned long int)affinity, ++ (unsigned long int)result, ++ (unsigned long int)limited_core_mask); ++#else ++ CSTD_UNUSED(kbdev); ++#endif ++ ++ return result; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +index 1419b59..1039e85 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Job Manager backend-specific low-level APIs. + */ +@@ -34,7 +31,7 @@ + #include + + #include +-#include ++#include + + /** + * kbase_job_submit_nolock() - Submit a job to a certain job-slot +@@ -62,7 +59,7 @@ void kbase_job_submit_nolock(struct kbase_device *kbdev, + void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); + +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + static inline char *kbasep_make_job_slot_string(int js, char *js_string, + size_t js_size) + { +@@ -71,11 +68,13 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string, + } + #endif + ++#if !MALI_USE_CSF + static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, + struct kbase_context *kctx) + { + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); + } ++#endif + + + /** +@@ -94,6 +93,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + ++#if !MALI_USE_CSF + /** + * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop + * on the specified atom +@@ -112,6 +112,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); ++#endif /* !MALI_USE_CSF */ + + /** + * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +index 4e4ed05..8ee897f 100755 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Register-based HW access backend specific APIs + */ +@@ -33,16 +31,19 @@ + #include + #include + #include ++#include + #include +-#include ++#include + #include + #include + + /* Return whether the specified ringbuffer is empty. HW access lock must be +- * held */ ++ * held ++ */ + #define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) + /* Return number of atoms currently in the specified ringbuffer. HW access lock +- * must be held */ ++ * must be held ++ */ + #define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) + + static void kbase_gpu_release_atom(struct kbase_device *kbdev, +@@ -253,6 +254,8 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, + + int kbase_backend_slot_free(struct kbase_device *kbdev, int js) + { ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != + KBASE_RESET_GPU_NOT_PENDING) { + /* The GPU is being reset - so prevent submission */ +@@ -278,15 +281,19 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, + break; + + case KBASE_ATOM_GPU_RB_SUBMITTED: ++ kbase_kinstr_jm_atom_hw_release(katom); + /* Inform power management at start/finish of atom so it can + * update its GPU utilisation metrics. Mark atom as not +- * submitted beforehand. */ ++ * submitted beforehand. ++ */ + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + kbase_pm_metrics_update(kbdev, end_timestamp); + ++ /* Inform platform at start/finish of atom */ ++ kbasep_platform_event_atom_complete(katom); ++ + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); +- /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, + &kbdev->gpu_props.props.raw_props.js_features +@@ -296,6 +303,8 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + ++ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ ++ + case KBASE_ATOM_GPU_RB_READY: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + +@@ -540,7 +549,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that +- * there are no atoms currently on the GPU. */ ++ * there are no atoms currently on the GPU. ++ */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + /* If hwcnt is disabled, it means we didn't clean up correctly +@@ -566,19 +576,15 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, + + /* We couldn't disable atomically, so kick off a worker */ + if (!kbdev->protected_mode_hwcnt_disabled) { +-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE +- queue_work(system_wq, +- &kbdev->protected_mode_hwcnt_disable_work); +-#else +- queue_work(system_highpri_wq, ++ kbase_hwcnt_context_queue_work( ++ kbdev->hwcnt_gpu_ctx, + &kbdev->protected_mode_hwcnt_disable_work); +-#endif + return -EAGAIN; + } + +- /* Once reaching this point GPU must be +- * switched to protected mode or hwcnt +- * re-enabled. */ ++ /* Once reaching this point GPU must be switched to protected ++ * mode or hwcnt re-enabled. ++ */ + + if (kbase_pm_protected_entry_override_enable(kbdev)) + return -EAGAIN; +@@ -618,7 +624,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, + KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2) || +- kbase_is_gpu_lost(kbdev)) { ++ kbase_is_gpu_removed(kbdev)) { + /* + * The L2 is still powered, wait for all + * the users to finish with it before doing +@@ -718,7 +724,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that +- * there are no atoms currently on the GPU. */ ++ * there are no atoms currently on the GPU. ++ */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + +@@ -764,8 +771,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom +- * already removed - as atoms must be returned +- * in order */ ++ * already removed - as atoms must be returned in order ++ */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); +@@ -811,7 +818,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) + lockdep_assert_held(&kbdev->hwaccess_lock); + + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_lost(kbdev)) ++ if (kbase_reset_gpu_is_active(kbdev) || ++ kbase_is_gpu_removed(kbdev)) + #else + if (kbase_reset_gpu_is_active(kbdev)) + #endif +@@ -843,7 +851,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) + break; + + katom[idx]->gpu_rb_state = +- KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; ++ KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ +@@ -907,12 +915,14 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be +- completed, not unpulled. */ ++ * completed, not unpulled. ++ */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must +- * be returned in order. */ ++ * be returned in order. ++ */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); +@@ -943,7 +953,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) + + if (idx == 1) { + /* Only submit if head atom or previous +- * atom already submitted */ ++ * atom already submitted ++ */ + if ((katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != +@@ -959,7 +970,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) + } + + /* If inter-slot serialization in use then don't +- * submit atom if any other slots are in use */ ++ * submit atom if any other slots are in use ++ */ + if ((kbdev->serialize_jobs & + KBASE_SERIALIZE_INTER_SLOT) && + other_slots_busy(kbdev, js)) +@@ -971,31 +983,37 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) + break; + #endif + /* Check if this job needs the cycle counter +- * enabled before submission */ ++ * enabled before submission ++ */ + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_request_gpu_cycle_counter_l2_is_on( + kbdev); + + kbase_job_hw_submit(kbdev, katom[idx], js); + katom[idx]->gpu_rb_state = +- KBASE_ATOM_GPU_RB_SUBMITTED; ++ KBASE_ATOM_GPU_RB_SUBMITTED; ++ ++ /* ***TRANSITION TO HIGHER STATE*** */ ++ /* fallthrough */ ++ case KBASE_ATOM_GPU_RB_SUBMITTED: + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation +- * metrics. */ ++ * metrics. ++ */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + +- /* ***TRANSITION TO HIGHER STATE*** */ +- /* fallthrough */ +- case KBASE_ATOM_GPU_RB_SUBMITTED: +- /* Atom submitted to HW, nothing else to do */ ++ /* Inform platform at start/finish of atom */ ++ kbasep_platform_event_atom_submit(katom[idx]); ++ + break; + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + /* Only return if head atom or previous atom + * already removed - as atoms must be returned +- * in order */ ++ * in order ++ */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); +@@ -1013,7 +1031,7 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) + { + lockdep_assert_held(&kbdev->hwaccess_lock); +- dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); ++ dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); + + kbase_gpu_enqueue_atom(kbdev, katom); + kbase_backend_slot_update(kbdev); +@@ -1074,7 +1092,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + struct kbase_context *kctx = katom->kctx; + + dev_dbg(kbdev->dev, +- "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", ++ "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + (void *)katom, completion_code, job_tail, js); + + lockdep_assert_held(&kbdev->hwaccess_lock); +@@ -1098,7 +1116,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not + * flushed. To prevent future evictions causing possible memory + * corruption we need to flush the cache manually before any +- * affected memory gets reused. */ ++ * affected memory gets reused. ++ */ + katom->need_cache_flush_cores_retained = true; + } + +@@ -1181,7 +1200,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + katom_idx1->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Can not dequeue this atom yet - will be +- * dequeued when atom at idx0 completes */ ++ * dequeued when atom at idx0 completes ++ */ + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_gpu_mark_atom_for_return(kbdev, + katom_idx1); +@@ -1194,7 +1214,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + if (job_tail != 0 && job_tail != katom->jc) { + /* Some of the job has been executed */ + dev_dbg(kbdev->dev, +- "Update job chain address of atom %p to resume from 0x%llx\n", ++ "Update job chain address of atom %pK to resume from 0x%llx\n", + (void *)katom, job_tail); + + katom->jc = job_tail; +@@ -1214,7 +1234,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + * - Schedule out the parent context if necessary, and schedule a new + * one in. + */ +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + { + /* The atom in the HEAD */ + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, +@@ -1255,7 +1275,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + + if (katom) { + dev_dbg(kbdev->dev, +- "Cross-slot dependency %p has become runnable.\n", ++ "Cross-slot dependency %pK has become runnable.\n", + (void *)katom); + + /* Check if there are lower priority jobs to soft stop */ +@@ -1268,7 +1288,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + kbase_pm_update_state(kbdev); + + /* Job completion may have unblocked other atoms. Try to update all job +- * slots */ ++ * slots ++ */ + kbase_backend_slot_update(kbdev); + } + +@@ -1319,7 +1340,8 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + /* As the atom was not removed, increment the + * index so that we read the correct atom in the +- * next iteration. */ ++ * next iteration. ++ */ + atom_idx++; + continue; + } +@@ -1422,7 +1444,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + katom_idx0_valid = (katom_idx0 == katom); + /* If idx0 is to be removed and idx1 is on the same context, + * then idx1 must also be removed otherwise the atoms might be +- * returned out of order */ ++ * returned out of order ++ */ + if (katom_idx1) + katom_idx1_valid = (katom_idx1 == katom) || + (katom_idx0_valid && +@@ -1469,7 +1492,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT)) == 0) { + /* idx0 has already completed - stop +- * idx1 if needed*/ ++ * idx1 if needed ++ */ + if (katom_idx1_valid) { + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, +@@ -1478,7 +1502,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + } + } else { + /* idx1 is in NEXT registers - attempt +- * to remove */ ++ * to remove ++ */ + kbase_reg_write(kbdev, + JOB_SLOT_REG(js, + JS_COMMAND_NEXT), +@@ -1493,7 +1518,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + JS_HEAD_NEXT_HI)) + != 0) { + /* idx1 removed successfully, +- * will be handled in IRQ */ ++ * will be handled in IRQ ++ */ + kbase_gpu_remove_atom(kbdev, + katom_idx1, + action, true); +@@ -1507,7 +1533,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + ret = true; + } else if (katom_idx1_valid) { + /* idx0 has already completed, +- * stop idx1 if needed */ ++ * stop idx1 if needed ++ */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); +@@ -1526,7 +1553,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + * flow was also interrupted, and this function + * might not enter disjoint state e.g. if we + * don't actually do a hard stop on the head +- * atom */ ++ * atom ++ */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; +@@ -1554,7 +1582,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + ret = true; + } else { + /* idx1 is in NEXT registers - attempt to +- * remove */ ++ * remove ++ */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP); +@@ -1564,13 +1593,15 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI)) != 0) { + /* idx1 removed successfully, will be +- * handled in IRQ once idx0 completes */ ++ * handled in IRQ once idx0 completes ++ */ + kbase_gpu_remove_atom(kbdev, katom_idx1, + action, + false); + } else { + /* idx0 has already completed - stop +- * idx1 */ ++ * idx1 ++ */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); +@@ -1644,7 +1675,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) + + if (katom) + dev_info(kbdev->dev, +- " js%d idx%d : katom=%p gpu_rb_state=%d\n", ++ " js%d idx%d : katom=%pK gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); + else + dev_info(kbdev->dev, " js%d idx%d : empty\n", +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +index c3b9f2d..d3ff203 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Register-based HW access backend specific APIs + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +index fcc0437..02d7cdb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Register-based HW access backend specific job scheduler APIs + */ +@@ -31,13 +29,14 @@ + #include + #include + ++#if !MALI_USE_CSF + /* + * Hold the runpool_mutex for this + */ + static inline bool timer_callback_should_run(struct kbase_device *kbdev) + { + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; +- s8 nr_running_ctxs; ++ int nr_running_ctxs; + + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + +@@ -47,7 +46,8 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) + + /* nr_contexts_pullable is updated with the runpool_mutex. However, the + * locking in the caller gives us a barrier that ensures +- * nr_contexts_pullable is up-to-date for reading */ ++ * nr_contexts_pullable is up-to-date for reading ++ */ + nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + + #ifdef CONFIG_MALI_DEBUG +@@ -69,10 +69,10 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) + * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). + */ + { +- s8 nr_compute_ctxs = ++ int nr_compute_ctxs = + kbasep_js_ctx_attr_count_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE); +- s8 nr_noncompute_ctxs = nr_running_ctxs - ++ int nr_noncompute_ctxs = nr_running_ctxs - + nr_compute_ctxs; + + return (bool) (nr_compute_ctxs >= 2 || +@@ -113,7 +113,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) + + if (atom != NULL) { + /* The current version of the model doesn't support +- * Soft-Stop */ ++ * Soft-Stop ++ */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + u32 ticks = atom->ticks++; + +@@ -141,7 +142,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) + * new soft_stop timeout. This ensures that + * atoms do not miss any of the timeouts due to + * races between this worker and the thread +- * changing the timeouts. */ ++ * changing the timeouts. ++ */ + if (backend->timeouts_updated && + ticks > soft_stop_ticks) + ticks = atom->ticks = soft_stop_ticks; +@@ -171,10 +173,11 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) + * + * Similarly, if it's about to be + * decreased, the last job from another +- * context has already finished, so it's +- * not too bad that we observe the older +- * value and register a disjoint event +- * when we try soft-stopping */ ++ * context has already finished, so ++ * it's not too bad that we observe the ++ * older value and register a disjoint ++ * event when we try soft-stopping ++ */ + if (js_devdata->nr_user_contexts_running + >= disjoint_threshold) + softstop_flags |= +@@ -252,9 +255,9 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) + } + } + if (reset_needed) { +- dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); ++ dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); + +- if (kbase_prepare_to_reset_gpu_locked(kbdev)) ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } + /* the timer is re-issued if there is contexts in the run-pool */ +@@ -270,9 +273,11 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) + + return HRTIMER_NORESTART; + } ++#endif /* !MALI_USE_CSF */ + + void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) + { ++#if !MALI_USE_CSF + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + unsigned long flags; +@@ -284,11 +289,12 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +- /* From now on, return value of timer_callback_should_run() will +- * also cause the timer to not requeue itself. Its return value +- * cannot change, because it depends on variables updated with +- * the runpool_mutex held, which the caller of this must also +- * hold */ ++ /* From now on, return value of timer_callback_should_run() ++ * will also cause the timer to not requeue itself. Its return ++ * value cannot change, because it depends on variables updated ++ * with the runpool_mutex held, which the caller of this must ++ * also hold ++ */ + hrtimer_cancel(&backend->scheduling_timer); + } + +@@ -303,25 +309,36 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) + + KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); + } ++#else /* !MALI_USE_CSF */ ++ CSTD_UNUSED(kbdev); ++#endif /* !MALI_USE_CSF */ + } + + int kbase_backend_timer_init(struct kbase_device *kbdev) + { ++#if !MALI_USE_CSF + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + backend->scheduling_timer.function = timer_callback; + backend->timer_running = false; ++#else /* !MALI_USE_CSF */ ++ CSTD_UNUSED(kbdev); ++#endif /* !MALI_USE_CSF */ + + return 0; + } + + void kbase_backend_timer_term(struct kbase_device *kbdev) + { ++#if !MALI_USE_CSF + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_cancel(&backend->scheduling_timer); ++#else /* !MALI_USE_CSF */ ++ CSTD_UNUSED(kbdev); ++#endif /* !MALI_USE_CSF */ + } + + void kbase_backend_timer_suspend(struct kbase_device *kbdev) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h +index 6576e55..5284288 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Register-based HW access backend specific job scheduler APIs + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c +index e67d12b..7131546 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,14 +17,12 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include + #include +-#include ++#include + #include "mali_kbase_l2_mmu_config.h" + + /** +@@ -56,23 +54,34 @@ struct l2_mmu_config_limit { + /* + * Zero represents no limit + * +- * For LBEX TBEX TTRX and TNAX: ++ * For LBEX TBEX TBAX TTRX and TNAX: + * The value represents the number of outstanding reads (6 bits) or writes (5 bits) + * + * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h + */ + static const struct l2_mmu_config_limit limits[] = { +- /* GPU read write */ +- {GPU_ID2_PRODUCT_LBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, +- {GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, +- {GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, +- {GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, +- {GPU_ID2_PRODUCT_TGOX, +- {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, +- {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, +- {GPU_ID2_PRODUCT_TNOX, +- {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, +- {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, ++ /* GPU, read, write */ ++ {GPU_ID2_PRODUCT_LBEX, ++ {0, GENMASK(10, 5), 5}, ++ {0, GENMASK(16, 12), 12} }, ++ {GPU_ID2_PRODUCT_TBEX, ++ {0, GENMASK(10, 5), 5}, ++ {0, GENMASK(16, 12), 12} }, ++ {GPU_ID2_PRODUCT_TBAX, ++ {0, GENMASK(10, 5), 5}, ++ {0, GENMASK(16, 12), 12} }, ++ {GPU_ID2_PRODUCT_TTRX, ++ {0, GENMASK(12, 7), 7}, ++ {0, GENMASK(17, 13), 13} }, ++ {GPU_ID2_PRODUCT_TNAX, ++ {0, GENMASK(12, 7), 7}, ++ {0, GENMASK(17, 13), 13} }, ++ {GPU_ID2_PRODUCT_TGOX, ++ {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, ++ {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, ++ {GPU_ID2_PRODUCT_TNOX, ++ {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, ++ {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + }; + + int kbase_set_mmu_quirks(struct kbase_device *kbdev) +@@ -100,7 +109,7 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev) + + mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + +- if (kbase_is_gpu_lost(kbdev)) ++ if (kbase_is_gpu_removed(kbdev)) + return -EIO; + + mmu_config &= ~(limit.read.mask | limit.write.mask); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h +index 0c779ac..07014ad 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h +@@ -1,31 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +index e33fe0b..077c234 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * "Always on" power management policy + */ +@@ -61,7 +58,11 @@ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { + always_on_term, /* term */ + always_on_shaders_needed, /* shaders_needed */ + always_on_get_core_active, /* get_core_active */ ++ NULL, /* handle_event */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ ++#if MALI_USE_CSF ++ ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ ++#endif + }; + + KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h +index e7927cf..98d35da 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * "Always on" power management policy + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +index 576c9f2..cc791df 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +@@ -1,11 +1,12 @@ +- /* ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * GPU backend implementation of base kernel power management APIs + */ +@@ -30,14 +28,16 @@ + #include + + #include ++#if !MALI_USE_CSF + #include + #include + #include ++#endif /* !MALI_USE_CSF */ + #include + #include + #include + #include +-#include ++#include + + static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); + static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); +@@ -106,6 +106,11 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev) + if (callbacks) + callbacks->power_on_callback(kbdev); + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) ++ dev_err(kbdev->dev, "Attempting to power on while GPU lost\n"); ++#endif ++ + kbdev->pm.backend.gpu_powered = true; + } + +@@ -139,24 +144,35 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) + + kbdev->pm.backend.ca_cores_enabled = ~0ull; + kbdev->pm.backend.gpu_powered = false; ++ kbdev->pm.backend.gpu_ready = false; + kbdev->pm.suspending = false; + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- kbdev->pm.gpu_lost = false; ++ kbase_pm_set_gpu_lost(kbdev, false); + #endif + #ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = false; + #endif /* CONFIG_MALI_DEBUG */ + init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); + ++#if !MALI_USE_CSF + /* Initialise the metrics subsystem */ + ret = kbasep_pm_metrics_init(kbdev); + if (ret) + return ret; ++#else ++ mutex_init(&kbdev->pm.backend.policy_change_lock); ++ kbdev->pm.backend.policy_change_clamp_state_to_off = false; ++ /* Due to dependency on kbase_ipa_control, the metrics subsystem can't ++ * be initialized here. ++ */ ++ CSTD_UNUSED(ret); ++#endif + + init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); + kbdev->pm.backend.reset_done = false; + + init_waitqueue_head(&kbdev->pm.zero_active_count_wait); ++ init_waitqueue_head(&kbdev->pm.resume_wait); + kbdev->pm.active_count = 0; + + spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); +@@ -213,7 +229,9 @@ pm_state_machine_fail: + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + workq_fail: ++#if !MALI_USE_CSF + kbasep_pm_metrics_term(kbdev); ++#endif + return -EINVAL; + } + +@@ -222,7 +240,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) + lockdep_assert_held(&kbdev->pm.lock); + + /* Turn clocks and interrupts on - no-op if we haven't done a previous +- * kbase_pm_clock_off() */ ++ * kbase_pm_clock_off() ++ */ + kbase_pm_clock_on(kbdev, is_resume); + + if (!is_resume) { +@@ -240,7 +259,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) + kbase_pm_update_cores_state(kbdev); + + /* NOTE: We don't wait to reach the desired state, since running atoms +- * will wait for that state to be reached anyway */ ++ * will wait for that state to be reached anyway ++ */ + } + + static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) +@@ -251,13 +271,20 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + ++#if !MALI_USE_CSF + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues. + */ + kbase_pm_wait_for_desired_state(kbdev); ++#endif + + kbase_pm_lock(kbdev); + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_gpu_lost(kbdev)) ++ backend->poweron_required = false; ++#endif ++ + if (!backend->poweron_required) { + unsigned long flags; + +@@ -278,6 +305,14 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) + kbase_flush_mmu_wqs(kbdev); + kbase_pm_lock(kbdev); + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* poweron_required may have changed while pm lock ++ * was released. ++ */ ++ if (kbase_pm_is_gpu_lost(kbdev)) ++ backend->poweron_required = false; ++#endif ++ + /* Turn off clock now that fault have been handled. We + * dropped locks so poweron_required may have changed - + * power back on if this is the case (effectively only +@@ -296,9 +331,14 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) + if (backend->poweron_required) { + backend->poweron_required = false; + kbdev->pm.backend.l2_desired = true; ++#if MALI_USE_CSF ++ kbdev->pm.backend.mcu_desired = true; ++#endif + kbase_pm_update_state(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); ++#if !MALI_USE_CSF + kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +@@ -451,12 +491,22 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) + */ + backend->hwcnt_disabled = true; + kbase_pm_update_state(kbdev); ++#if !MALI_USE_CSF + kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ + } else { + /* PM state was updated while we were doing the disable, + * so we need to undo the disable we just performed. + */ ++#if MALI_USE_CSF ++ unsigned long lock_flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &lock_flags); ++#endif + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_unlock(kbdev, lock_flags); ++#endif + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +@@ -476,8 +526,12 @@ void kbase_pm_do_poweroff(struct kbase_device *kbdev) + if (kbdev->pm.backend.poweroff_wait_in_progress) + goto unlock_hwaccess; + ++#if MALI_USE_CSF ++ kbdev->pm.backend.mcu_desired = false; ++#else + /* Force all cores off */ + kbdev->pm.backend.shaders_desired = false; ++#endif + kbdev->pm.backend.l2_desired = false; + + kbdev->pm.backend.poweroff_wait_in_progress = true; +@@ -528,20 +582,35 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Power up the GPU, don't enable IRQs as we are not ready to receive +- * them. */ ++ * them ++ */ + ret = kbase_pm_init_hw(kbdev, flags); + if (ret) { + kbase_pm_unlock(kbdev); + return ret; + } +- ++#if MALI_USE_CSF ++ kbdev->pm.debug_core_mask = ++ kbdev->gpu_props.props.raw_props.shader_present; ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ /* Set the initial value for 'shaders_avail'. It would be later ++ * modified only from the MCU state machine, when the shader core ++ * allocation enable mask request has completed. So its value would ++ * indicate the mask of cores that are currently being used by FW for ++ * the allocation of endpoints requested by CSGs. ++ */ ++ kbdev->pm.backend.shaders_avail = kbase_pm_ca_get_core_mask(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++#else + kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = + kbdev->pm.debug_core_mask[1] = + kbdev->pm.debug_core_mask[2] = + kbdev->gpu_props.props.raw_props.shader_present; ++#endif + + /* Pretend the GPU is active to prevent a power policy turning the GPU +- * cores off */ ++ * cores off ++ */ + kbdev->pm.active_count = 1; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, +@@ -553,13 +622,27 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + irq_flags); + + /* We are ready to receive IRQ's now as power policy is set up, so +- * enable them now. */ ++ * enable them now. ++ */ + #ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = true; + #endif + kbase_pm_enable_interrupts(kbdev); + ++ WARN_ON(!kbdev->pm.backend.gpu_powered); ++ /* GPU has been powered up (by kbase_pm_init_hw) and interrupts have ++ * been enabled, so GPU is ready for use and PM state machine can be ++ * exercised from this point onwards. ++ */ ++ kbdev->pm.backend.gpu_ready = true; ++ + /* Turn on the GPU and any cores needed by the policy */ ++#if MALI_USE_CSF ++ /* Turn on the L2 caches, needed for firmware boot */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ kbdev->pm.backend.l2_desired = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++#endif + kbase_pm_do_poweron(kbdev, false); + kbase_pm_unlock(kbdev); + +@@ -573,6 +656,8 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) + mutex_lock(&kbdev->pm.lock); + kbase_pm_do_poweroff(kbdev); + mutex_unlock(&kbdev->pm.lock); ++ ++ kbase_pm_wait_for_poweroff_complete(kbdev); + } + + KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); +@@ -587,10 +672,15 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) + + if (kbdev->pm.backend.hwcnt_disabled) { + unsigned long flags; +- ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++#else + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif + } + + /* Free any resources the policy allocated */ +@@ -598,8 +688,16 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + ++#if !MALI_USE_CSF + /* Shut down the metrics subsystem */ + kbasep_pm_metrics_term(kbdev); ++#else ++ if (WARN_ON(mutex_is_locked(&kbdev->pm.backend.policy_change_lock))) { ++ mutex_lock(&kbdev->pm.backend.policy_change_lock); ++ mutex_unlock(&kbdev->pm.backend.policy_change_lock); ++ } ++ mutex_destroy(&kbdev->pm.backend.policy_change_lock); ++#endif + + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); + } +@@ -611,11 +709,24 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + ++#if !MALI_USE_CSF + kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + ++#if MALI_USE_CSF ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ kbdev->pm.debug_core_mask = new_core_mask; ++ kbase_pm_update_dynamic_cores_onoff(kbdev); ++} ++KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); ++#else + void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2) +@@ -636,6 +747,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + + kbase_pm_update_dynamic_cores_onoff(kbdev); + } ++#endif /* MALI_USE_CSF */ + + void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) + { +@@ -651,12 +763,15 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) + { + /* Force power off the GPU and all cores (regardless of policy), only + * after the PM active count reaches zero (otherwise, we risk turning it +- * off prematurely) */ ++ * off prematurely) ++ */ + kbase_pm_lock(kbdev); + + kbase_pm_do_poweroff(kbdev); + ++#if !MALI_USE_CSF + kbase_backend_timer_suspend(kbdev); ++#endif /* !MALI_USE_CSF */ + + kbase_pm_unlock(kbdev); + +@@ -672,12 +787,19 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) + + kbdev->pm.suspending = false; + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- kbdev->pm.gpu_lost = false; ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ dev_dbg(kbdev->dev, "%s: GPU lost in progress\n", __func__); ++ kbase_pm_unlock(kbdev); ++ return; ++ } + #endif + kbase_pm_do_poweron(kbdev, true); + ++#if !MALI_USE_CSF + kbase_backend_timer_resume(kbdev); ++#endif /* !MALI_USE_CSF */ + ++ wake_up_all(&kbdev->pm.resume_wait); + kbase_pm_unlock(kbdev); + } + +@@ -685,63 +807,50 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) + void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) + { + unsigned long flags; +- struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + ktime_t end_timestamp = ktime_get(); ++ struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + +- /* Full GPU reset will have been done by hypervisor, so cancel */ +- atomic_set(&kbdev->hwaccess.backend.reset_gpu, +- KBASE_RESET_GPU_NOT_PENDING); +- hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); +- +- /* GPU is no longer mapped to VM. So no interrupts will be received +- * and Mali registers have been replaced by dummy RAM +- */ +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- spin_lock(&kbdev->mmu_mask_change); +- kbdev->irq_reset_flush = true; +- spin_unlock(&kbdev->mmu_mask_change); +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +- kbase_synchronize_irqs(kbdev); +- kbase_flush_mmu_wqs(kbdev); +- kbdev->irq_reset_flush = false; ++ if (!kbdev->arb.arb_if) ++ return; + +- /* Clear all jobs running on the GPU */ + mutex_lock(&kbdev->pm.lock); +- kbdev->pm.gpu_lost = true; +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- kbdev->protected_mode = false; +- if (!kbdev->pm.backend.protected_entry_transition_override) +- kbase_backend_reset(kbdev, &end_timestamp); +- kbase_pm_metrics_update(kbdev, NULL); +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_lock(&arb_vm_state->vm_state_lock); ++ if (kbdev->pm.backend.gpu_powered && ++ !kbase_pm_is_gpu_lost(kbdev)) { ++ kbase_pm_set_gpu_lost(kbdev, true); ++ ++ /* GPU is no longer mapped to VM. So no interrupts will ++ * be received and Mali registers have been replaced by ++ * dummy RAM ++ */ ++ WARN(!kbase_is_gpu_removed(kbdev), ++ "GPU is still available after GPU lost event\n"); + +- /* Cancel any pending HWC dumps */ +- spin_lock_irqsave(&kbdev->hwcnt.lock, flags); +- kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; +- kbdev->hwcnt.backend.triggered = 1; +- wake_up(&kbdev->hwcnt.backend.wait); +- spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ /* Full GPU reset will have been done by hypervisor, so ++ * cancel ++ */ ++ atomic_set(&kbdev->hwaccess.backend.reset_gpu, ++ KBASE_RESET_GPU_NOT_PENDING); ++ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); ++ kbase_synchronize_irqs(kbdev); + +- /* Wait for all threads keeping GPU active to complete */ +- mutex_unlock(&kbdev->pm.lock); +- wait_event(kbdev->pm.zero_active_count_wait, +- kbdev->pm.active_count == 0); +- mutex_lock(&kbdev->pm.lock); ++ /* Clear all jobs running on the GPU */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->protected_mode = false; ++ kbase_backend_reset(kbdev, &end_timestamp); ++ kbase_pm_metrics_update(kbdev, NULL); ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +- /* Update state to GPU off */ +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- kbdev->pm.backend.shaders_desired = false; +- kbdev->pm.backend.l2_desired = false; +- backend->l2_state = KBASE_L2_OFF; +- backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; +- kbdev->pm.backend.gpu_powered = false; +- backend->poweroff_wait_in_progress = false; +- KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); +- wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); +- kbase_gpu_cache_clean_wait_complete(kbdev); +- backend->poweroff_wait_in_progress = false; +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +- wake_up(&kbdev->pm.backend.poweroff_wait); ++ /* Cancel any pending HWC dumps */ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); ++ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; ++ kbdev->hwcnt.backend.triggered = 1; ++ wake_up(&kbdev->hwcnt.backend.wait); ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); ++ } ++ mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + } ++ + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +index e7eef26..efc620f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2013-2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -27,9 +26,6 @@ + #include + #include + #include +-#ifdef CONFIG_MALI_NO_MALI +-#include +-#endif + #include + + int kbase_pm_ca_init(struct kbase_device *kbdev) +@@ -59,6 +55,14 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ++#if MALI_USE_CSF ++ if (!(core_mask & kbdev->pm.debug_core_mask)) { ++ dev_err(kbdev->dev, ++ "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", ++ core_mask, kbdev->pm.debug_core_mask); ++ goto unlock; ++ } ++#else + if (!(core_mask & kbdev->pm.debug_core_mask_all)) { + dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); +@@ -69,6 +73,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) + dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); + goto unlock; + } ++#endif /* MALI_USE_CSF */ + + pm_backend->ca_cores_enabled = core_mask; + +@@ -80,21 +85,32 @@ unlock: + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", + pm_backend->ca_cores_enabled); + } ++KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask); + #endif + + u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) + { +-#ifdef CONFIG_MALI_DEVFREQ +- struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; ++#if MALI_USE_CSF ++ u64 debug_core_mask = kbdev->pm.debug_core_mask; ++#else ++ u64 debug_core_mask = kbdev->pm.debug_core_mask_all; + #endif + + lockdep_assert_held(&kbdev->hwaccess_lock); + + #ifdef CONFIG_MALI_DEVFREQ +- return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; ++ /* ++ * Although in the init we let the pm_backend->ca_cores_enabled to be ++ * the max config (it uses the base_gpu_props), at this function we need ++ * to limit it to be a subgroup of the curr config, otherwise the ++ * shaders state machine on the PM does not evolve. ++ */ ++ return kbdev->gpu_props.curr_config.shader_present & ++ kbdev->pm.backend.ca_cores_enabled & ++ debug_core_mask; + #else +- return kbdev->gpu_props.props.raw_props.shader_present & +- kbdev->pm.debug_core_mask_all; ++ return kbdev->gpu_props.curr_config.shader_present & ++ debug_core_mask; + #endif + } + +@@ -104,8 +120,8 @@ u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) + { + lockdep_assert_held(&kbdev->hwaccess_lock); + +-#ifdef CONFIG_MALI_NO_MALI +- return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); ++#if MALI_USE_CSF ++ return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + #else + return kbdev->pm.backend.pm_shaders_core_mask; + #endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h +index 5423e96..8d169c3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h +index f67ec65..41f3c14 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +index 9eef44a..f40b753 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * "Coarse Demand" power management policy + */ +@@ -60,7 +57,11 @@ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { + coarse_demand_term, /* term */ + coarse_demand_shaders_needed, /* shaders_needed */ + coarse_demand_get_core_active, /* get_core_active */ ++ NULL, /* handle_event */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ ++#if MALI_USE_CSF ++ COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ ++#endif + }; + + KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +index 304e5d7..5e3f17e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * "Coarse Demand" power management policy + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +index f4bcf3e..1a0572b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -29,9 +28,6 @@ + + #include "mali_kbase_pm_always_on.h" + #include "mali_kbase_pm_coarse_demand.h" +-#if !MALI_CUSTOMER_RELEASE +-#include "mali_kbase_pm_always_on_demand.h" +-#endif + + /* Forward definition - see mali_kbase.h */ + struct kbase_device; +@@ -62,24 +58,9 @@ enum kbase_pm_core_type { + KBASE_PM_CORE_STACK = STACK_PRESENT_LO + }; + +-/** ++/* + * enum kbase_l2_core_state - The states used for the L2 cache & tiler power + * state machine. +- * +- * @KBASE_L2_OFF: The L2 cache and tiler are off +- * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on +- * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. +- * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being +- * enabled +- * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled +- * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being +- * disabled +- * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest +- * clock. Conditionally used. +- * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off +- * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off +- * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state +- * are unknown + */ + enum kbase_l2_core_state { + #define KBASEP_L2_STATE(n) KBASE_L2_ ## n, +@@ -87,45 +68,19 @@ enum kbase_l2_core_state { + #undef KBASEP_L2_STATE + }; + +-/** ++#if MALI_USE_CSF ++/* ++ * enum kbase_mcu_state - The states used for the MCU state machine. ++ */ ++enum kbase_mcu_state { ++#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, ++#include "mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE ++}; ++#endif ++ ++/* + * enum kbase_shader_core_state - The states used for the shaders' state machine. +- * +- * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off +- * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have +- * been requested to power on and hwcnt +- * is being disabled +- * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been +- * requested to power on. Or after doing +- * partial shader on/off, checking whether +- * it's the desired state. +- * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt +- * already enabled. +- * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks +- * are on, hwcnt disabled, and checks +- * to powering down or re-enabling +- * hwcnt. +- * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to +- * power off, but they remain on for the +- * duration of the hysteresis timer +- * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach +- * a state where jobs on the GPU are finished +- * including jobs currently running and in the +- * GPU queue because of GPU2017-861 +- * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired +- * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the +- * level 2 cache is being flushed. +- * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders +- * are ready to be powered off. +- * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders +- * have been requested to power off +- * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks +- * have been requested to power off +- * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are +- * off, but the tick timer +- * cancellation is still +- * pending. +- * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power +- * states are unknown + */ + enum kbase_shader_core_state { + #define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, +@@ -137,28 +92,40 @@ enum kbase_shader_core_state { + * struct kbasep_pm_metrics - Metrics data collected for use by the power + * management framework. + * +- * @time_busy: number of ns the GPU was busy executing jobs since the +- * @time_period_start timestamp. +- * @time_idle: number of ns since time_period_start the GPU was not executing +- * jobs since the @time_period_start timestamp. +- * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that +- * if two CL jobs were active for 400ns, this value would be updated +- * with 800. +- * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that +- * if two GL jobs were active for 400ns, this value would be updated +- * with 800. ++ * @time_busy: the amount of time the GPU was busy executing jobs since the ++ * @time_period_start timestamp, in units of 256ns. This also includes ++ * time_in_protm, the time spent in protected mode, since it's assumed ++ * the GPU was busy 100% during this period. ++ * @time_idle: the amount of time the GPU was not executing jobs since the ++ * time_period_start timestamp, measured in units of 256ns. ++ * @time_in_protm: The amount of time the GPU has spent in protected mode since ++ * the time_period_start timestamp, measured in units of 256ns. ++ * @busy_cl: the amount of time the GPU was busy executing CL jobs. Note that ++ * if two CL jobs were active for 256ns, this value would be updated ++ * with 2 (2x256ns). ++ * @busy_gl: the amount of time the GPU was busy executing GL jobs. Note that ++ * if two GL jobs were active for 256ns, this value would be updated ++ * with 2 (2x256ns). + */ + struct kbasep_pm_metrics { + u32 time_busy; + u32 time_idle; ++#if MALI_USE_CSF ++ u32 time_in_protm; ++#else + u32 busy_cl[2]; + u32 busy_gl; ++#endif + }; + + /** + * struct kbasep_pm_metrics_state - State required to collect the metrics in + * struct kbasep_pm_metrics + * @time_period_start: time at which busy/idle measurements started ++ * @ipa_control_client: Handle returned on registering DVFS as a ++ * kbase_ipa_control client ++ * @skip_gpu_active_sanity_check: Decide whether to skip GPU_ACTIVE sanity ++ * check in DVFS utilisation calculation + * @gpu_active: true when the GPU is executing jobs. false when + * not. Updated when the job scheduler informs us a job in submitted + * or removed from a GPU slot. +@@ -170,6 +137,7 @@ struct kbasep_pm_metrics { + * @values: The current values of the power management metrics. The + * kbase_pm_get_dvfs_metrics() function is used to compare these + * current values with the saved values from a previous invocation. ++ * @initialized: tracks whether metrics_state has been initialized or not. + * @timer: timer to regularly make DVFS decisions based on the power + * management metrics. + * @timer_active: boolean indicating @timer is running +@@ -178,9 +146,14 @@ struct kbasep_pm_metrics { + */ + struct kbasep_pm_metrics_state { + ktime_t time_period_start; ++#if MALI_USE_CSF ++ void *ipa_control_client; ++ bool skip_gpu_active_sanity_check; ++#else + bool gpu_active; + u32 active_cl_ctx[2]; + u32 active_gl_ctx[3]; ++#endif + spinlock_t lock; + + void *platform_data; +@@ -189,6 +162,7 @@ struct kbasep_pm_metrics_state { + struct kbasep_pm_metrics values; + + #ifdef CONFIG_MALI_MIDGARD_DVFS ++ bool initialized; + struct hrtimer timer; + bool timer_active; + struct kbasep_pm_metrics dvfs_last; +@@ -202,8 +176,12 @@ struct kbasep_pm_metrics_state { + * @work: Work item which cancels the timer + * @timer: Timer for powering off the shader cores + * @configured_interval: Period of GPU poweroff timer +- * @configured_ticks: User-configured number of ticks to wait after the shader +- * power down request is received before turning off the cores ++ * @default_ticks: User-configured number of ticks to wait after the shader ++ * power down request is received before turning off the cores ++ * @configured_ticks: Power-policy configured number of ticks to wait after the ++ * shader power down request is received before turning off ++ * the cores. For simple power policies, this is equivalent ++ * to @default_ticks. + * @remaining_ticks: Number of remaining timer ticks until shaders are powered off + * @cancel_queued: True if the cancellation work item has been queued. This is + * required to ensure that it is not queued twice, e.g. after +@@ -217,6 +195,7 @@ struct kbasep_pm_tick_timer_state { + struct hrtimer timer; + + ktime_t configured_interval; ++ unsigned int default_ticks; + unsigned int configured_ticks; + unsigned int remaining_ticks; + +@@ -227,9 +206,6 @@ struct kbasep_pm_tick_timer_state { + union kbase_pm_policy_data { + struct kbasep_pm_policy_always_on always_on; + struct kbasep_pm_policy_coarse_demand coarse_demand; +-#if !MALI_CUSTOMER_RELEASE +- struct kbasep_pm_policy_always_on_demand always_on_demand; +-#endif + }; + + /** +@@ -240,7 +216,8 @@ union kbase_pm_policy_data { + * + * @pm_current_policy: The policy that is currently actively controlling the + * power state. +- * @pm_policy_data: Private data for current PM policy ++ * @pm_policy_data: Private data for current PM policy. This is automatically ++ * zeroed when a policy change occurs. + * @reset_done: Flag when a reset is complete + * @reset_done_wait: Wait queue to wait for changes to @reset_done + * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter +@@ -254,6 +231,11 @@ union kbase_pm_policy_data { + * variable should be protected by: both the hwaccess_lock + * spinlock and the pm.lock mutex for writes; or at least + * one of either lock for reads. ++ * @gpu_ready: Indicates whether the GPU is in a state in which it is ++ * safe to perform PM changes. When false, the PM state ++ * machine needs to wait before making changes to the GPU ++ * power policy, DevFreq or core_mask, so as to avoid these ++ * changing while implicit GPU resets are ongoing. + * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It + * holds the cores enabled in a hardware counters dump, + * and may differ from @shaders_avail when under different +@@ -294,6 +276,8 @@ union kbase_pm_policy_data { + * @callback_soft_reset: Optional callback to software reset the GPU. See + * &struct kbase_pm_callback_conf + * @ca_cores_enabled: Cores that are currently available ++ * @mcu_state: The current state of the micro-control unit, only applicable ++ * to GPUs that have such a component + * @l2_state: The current state of the L2 cache state machine. See + * &enum kbase_l2_core_state + * @l2_desired: True if the L2 cache should be powered on by the L2 cache state +@@ -303,10 +287,10 @@ union kbase_pm_policy_data { + * @shaders_avail: This is updated by the state machine when it is in a state + * where it can write to the SHADER_PWRON or PWROFF registers + * to have the same set of available cores as specified by +- * @shaders_desired_mask. So it would eventually have the same +- * value as @shaders_desired_mask and would precisely indicate +- * the cores that are currently available. This is internal to +- * shader state machine and should *not* be modified elsewhere. ++ * @shaders_desired_mask. So would precisely indicate the cores ++ * that are currently available. This is internal to shader ++ * state machine of JM GPUs and should *not* be modified ++ * elsewhere. + * @shaders_desired_mask: This is updated by the state machine when it is in + * a state where it can handle changes to the core + * availability (either by DVFS or sysfs). This is +@@ -318,6 +302,16 @@ union kbase_pm_policy_data { + * cores may be different, but there should be transitions in + * progress that will eventually achieve this state (assuming + * that the policy doesn't change its mind in the mean time). ++ * @mcu_desired: True if the micro-control unit should be powered on ++ * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy ++ * change transition, needs the mcu/L2 to be brought back to the ++ * off state and remain in that state until the flag is cleared. ++ * @csf_pm_sched_flags: CSF Dynamic PM control flags in accordance to the ++ * current active PM policy. This field is updated whenever a ++ * new policy is activated. ++ * @policy_change_lock: Used to serialize the policy change calls. In CSF case, ++ * the change of policy may involve the scheduler to ++ * suspend running CSGs and then reconfigure the MCU. + * @in_reset: True if a GPU is resetting and normal power manager operation is + * suspended + * @partial_shaderoff: True if we want to partial power off shader cores, +@@ -373,6 +367,7 @@ struct kbase_pm_backend_data { + wait_queue_head_t gpu_in_desired_state_wait; + + bool gpu_powered; ++ bool gpu_ready; + + u64 pm_shaders_core_mask; + +@@ -406,10 +401,19 @@ struct kbase_pm_backend_data { + + u64 ca_cores_enabled; + ++#if MALI_USE_CSF ++ enum kbase_mcu_state mcu_state; ++#endif + enum kbase_l2_core_state l2_state; + enum kbase_shader_core_state shaders_state; + u64 shaders_avail; + u64 shaders_desired_mask; ++#if MALI_USE_CSF ++ bool mcu_desired; ++ bool policy_change_clamp_state_to_off; ++ unsigned int csf_pm_sched_flags; ++ struct mutex policy_change_lock; ++#endif + bool l2_desired; + bool l2_always_on; + bool shaders_desired; +@@ -433,6 +437,23 @@ struct kbase_pm_backend_data { + struct work_struct gpu_clock_control_work; + }; + ++#if MALI_USE_CSF ++/* CSF PM flag, signaling that the MCU CORE should be kept on */ ++#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) ++/* CSF PM flag, signaling no scheduler suspension on idle groups */ ++#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) ++/* CSF PM flag, signaling no scheduler suspension on no runnable groups */ ++#define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2) ++ ++/* The following flags corresponds to existing defined PM policies */ ++#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \ ++ CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ ++ CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) ++#define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0) ++#if !MALI_CUSTOMER_RELEASE ++#define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE) ++#endif ++#endif + + /* List of policy IDs */ + enum kbase_pm_policy_id { +@@ -443,6 +464,33 @@ enum kbase_pm_policy_id { + KBASE_PM_POLICY_ID_ALWAYS_ON + }; + ++/** ++ * enum kbase_pm_policy_event - PM Policy event ID ++ */ ++enum kbase_pm_policy_event { ++ /** ++ * @KBASE_PM_POLICY_EVENT_IDLE: Indicates that the GPU power state ++ * model has determined that the GPU has gone idle. ++ */ ++ KBASE_PM_POLICY_EVENT_IDLE, ++ /** ++ * @KBASE_PM_POLICY_EVENT_POWER_ON: Indicates that the GPU state model ++ * is preparing to power on the GPU. ++ */ ++ KBASE_PM_POLICY_EVENT_POWER_ON, ++ /** ++ * @KBASE_PM_POLICY_EVENT_TIMER_HIT: Indicates that the GPU became ++ * active while the Shader Tick Timer was holding the GPU in a powered ++ * on state. ++ */ ++ KBASE_PM_POLICY_EVENT_TIMER_HIT, ++ /** ++ * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not ++ * become active before the Shader Tick Timer timeout occurred. ++ */ ++ KBASE_PM_POLICY_EVENT_TIMER_MISS, ++}; ++ + /** + * struct kbase_pm_policy - Power policy structure. + * +@@ -455,15 +503,22 @@ enum kbase_pm_policy_id { + * @shaders_needed: Function called to find out if shader cores are needed + * @get_core_active: Function called to get the current overall GPU power + * state ++ * @handle_event: Function called when a PM policy event occurs. Should be ++ * set to NULL if the power policy doesn't require any ++ * event notifications. + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. ++ * @pm_sched_flags: Policy associated with CSF PM scheduling operational flags. ++ * Pre-defined required flags exist for each of the ++ * ARM released policies, such as 'always_on', 'coarse_demand' ++ * and etc. + */ + struct kbase_pm_policy { + char *name; + +- /** ++ /* + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.pm_policy_data structure. It +@@ -477,7 +532,7 @@ struct kbase_pm_policy { + */ + void (*init)(struct kbase_device *kbdev); + +- /** ++ /* + * Function called when the policy is unselected. + * + * @kbdev: The kbase device structure for the device (must be a +@@ -485,7 +540,7 @@ struct kbase_pm_policy { + */ + void (*term)(struct kbase_device *kbdev); + +- /** ++ /* + * Function called to find out if shader cores are needed + * + * This needs to at least satisfy kbdev->pm.backend.shaders_desired, +@@ -498,7 +553,7 @@ struct kbase_pm_policy { + */ + bool (*shaders_needed)(struct kbase_device *kbdev); + +- /** ++ /* + * Function called to get the current overall GPU power state + * + * This function must meet or exceed the requirements for power +@@ -511,7 +566,26 @@ struct kbase_pm_policy { + */ + bool (*get_core_active)(struct kbase_device *kbdev); + ++ /** ++ * Function called when a power event occurs ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @event: The id of the power event that has occurred ++ */ ++ void (*handle_event)(struct kbase_device *kbdev, ++ enum kbase_pm_policy_event event); ++ + enum kbase_pm_policy_id id; ++ ++#if MALI_USE_CSF ++ /* Policy associated with CSF PM scheduling operational flags. ++ * There are pre-defined required flags exist for each of the ++ * ARM released policies, such as 'always_on', 'coarse_demand' ++ * and etc. ++ */ ++ unsigned int pm_sched_flags; ++#endif + }; + + #endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +index 6b821f7..bcada93 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -32,12 +30,18 @@ + #include + #include + #include ++ ++#if MALI_USE_CSF ++#include ++#else + #include ++#endif /* !MALI_USE_CSF */ ++ + #include + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -45,6 +49,9 @@ + #ifdef CONFIG_MALI_ARBITER_SUPPORT + #include + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++#if MALI_USE_CSF ++#include ++#endif + + #include + +@@ -89,6 +96,28 @@ static u64 kbase_pm_get_state( + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); + ++#if MALI_USE_CSF ++bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (unlikely(!kbdev->csf.firmware_inited)) ++ return false; ++ ++ if (kbdev->csf.scheduler.pm_active_count) ++ return true; ++ ++ /* MCU is supposed to be ON, only when scheduler.pm_active_count is ++ * non zero. But for always_on policy, the MCU needs to be kept on, ++ * unless policy changing transition needs it off. ++ */ ++ ++ return (kbdev->pm.backend.mcu_desired && ++ kbase_pm_no_mcu_core_pwroff(kbdev) && ++ !kbdev->pm.backend.policy_change_clamp_state_to_off); ++} ++#endif ++ + bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) + { + if (kbdev->pm.backend.protected_entry_transition_override) +@@ -102,6 +131,11 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) + !kbdev->pm.backend.shaders_desired) + return false; + ++#if MALI_USE_CSF ++ if (kbdev->pm.backend.policy_change_clamp_state_to_off) ++ return false; ++#endif ++ + return kbdev->pm.backend.l2_desired; + } + +@@ -210,7 +244,7 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, + return (u32)core_type + (u32)action; + } + +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + static void mali_cci_flush_l2(struct kbase_device *kbdev) + { + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; +@@ -233,7 +267,8 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + + /* Wait for cache flush to complete before continuing, exit on +- * gpu resets or loop expiry. */ ++ * gpu resets or loop expiry. ++ */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); +@@ -372,9 +407,9 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + + switch (type) { + case KBASE_PM_CORE_L2: +- return kbdev->gpu_props.props.raw_props.l2_present; ++ return kbdev->gpu_props.curr_config.l2_present; + case KBASE_PM_CORE_SHADER: +- return kbdev->gpu_props.props.raw_props.shader_present; ++ return kbdev->gpu_props.curr_config.shader_present; + case KBASE_PM_CORE_TILER: + return kbdev->gpu_props.props.raw_props.tiler_present; + case KBASE_PM_CORE_STACK: +@@ -468,14 +503,10 @@ static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) + */ + if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { + backend->hwcnt_disabled = true; ++ + } else { +-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE +- queue_work(system_wq, +- &backend->hwcnt_disable_work); +-#else +- queue_work(system_highpri_wq, +- &backend->hwcnt_disable_work); +-#endif ++ kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, ++ &backend->hwcnt_disable_work); + } + } + +@@ -493,7 +524,8 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) + * Skip if size and hash are not given explicitly, + * which means default values are used. + */ +- if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0)) ++ if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0) && ++ (!kbdev->l2_hash_values_override)) + return; + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); +@@ -504,13 +536,25 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) + } + + if (kbdev->l2_hash_override) { ++ WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); + val &= ~L2_CONFIG_HASH_MASK; + val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); ++ } else if (kbdev->l2_hash_values_override) { ++ int i; ++ ++ WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); ++ val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK; ++ val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT); ++ ++ for (i = 0; i < ASN_HASH_COUNT; i++) { ++ dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n", ++ kbdev->l2_hash_values[i], i); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)), ++ kbdev->l2_hash_values[i]); ++ } + } + + dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); +- +- /* Write L2_CONFIG to override */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); + } + +@@ -523,6 +567,278 @@ static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) + queue_work(system_wq, &backend->gpu_clock_control_work); + } + ++#if MALI_USE_CSF ++static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) ++{ ++ const char *const strings[] = { ++#define KBASEP_MCU_STATE(n) #n, ++#include "mali_kbase_pm_mcu_states.h" ++#undef KBASEP_MCU_STATE ++ }; ++ if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) ++ return "Bad MCU state"; ++ else ++ return strings[state]; ++} ++ ++static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ bool timer_update; ++ bool core_mask_update; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ WARN_ON(backend->mcu_state != KBASE_MCU_ON); ++ ++ /* This function is only for cases where the MCU managing Cores, if ++ * the firmware mode is with host control, do nothing here. ++ */ ++ if (unlikely(kbdev->csf.firmware_hctl_core_pwr)) ++ return false; ++ ++ core_mask_update = ++ backend->shaders_avail != backend->shaders_desired_mask; ++ ++ timer_update = kbdev->csf.mcu_core_pwroff_dur_count != ++ kbdev->csf.mcu_core_pwroff_reg_shadow; ++ ++ if (core_mask_update || timer_update) ++ kbase_csf_firmware_update_core_attr(kbdev, timer_update, ++ core_mask_update, backend->shaders_desired_mask); ++ ++ return (core_mask_update || timer_update); ++} ++ ++static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) ++{ ++ struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ++ enum kbase_mcu_state prev_state; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* ++ * Initial load of firmare should have been done to ++ * exercise the MCU state machine. ++ */ ++ if (unlikely(!kbdev->csf.firmware_inited)) { ++ WARN_ON(backend->mcu_state != KBASE_MCU_OFF); ++ return -EIO; ++ } ++ ++ do { ++ u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); ++ u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); ++ ++ /* mask off ready from trans in case transitions finished ++ * between the register reads ++ */ ++ shaders_trans &= ~shaders_ready; ++ ++ prev_state = backend->mcu_state; ++ ++ switch (backend->mcu_state) { ++ case KBASE_MCU_OFF: ++ if (kbase_pm_is_mcu_desired(kbdev) && ++ !backend->policy_change_clamp_state_to_off && ++ backend->l2_state == KBASE_L2_ON) { ++ kbase_csf_firmware_trigger_reload(kbdev); ++ backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD; ++ } ++ break; ++ ++ case KBASE_MCU_PEND_ON_RELOAD: ++ if (kbdev->csf.firmware_reloaded) { ++ backend->shaders_desired_mask = ++ kbase_pm_ca_get_core_mask(kbdev); ++ kbase_csf_firmware_global_reinit(kbdev, ++ backend->shaders_desired_mask); ++ backend->mcu_state = ++ KBASE_MCU_ON_GLB_REINIT_PEND; ++ } ++ break; ++ ++ case KBASE_MCU_ON_GLB_REINIT_PEND: ++ if (kbase_csf_firmware_global_reinit_complete(kbdev)) { ++ backend->shaders_avail = ++ backend->shaders_desired_mask; ++ backend->pm_shaders_core_mask = 0; ++ if (kbdev->csf.firmware_hctl_core_pwr) { ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ backend->shaders_avail, ACTION_PWRON); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_ON; ++ } else ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ } ++ break; ++ ++ case KBASE_MCU_HCTL_SHADERS_PEND_ON: ++ if (!shaders_trans && ++ shaders_ready == backend->shaders_avail) { ++ /* Cores now stable, notify MCU the stable mask */ ++ kbase_csf_firmware_update_core_attr(kbdev, ++ false, true, shaders_ready); ++ ++ backend->pm_shaders_core_mask = shaders_ready; ++ backend->mcu_state = ++ KBASE_MCU_HCTL_CORES_NOTIFY_PEND; ++ } ++ break; ++ ++ case KBASE_MCU_HCTL_CORES_NOTIFY_PEND: ++ /* Wait for the acknowledgement */ ++ if (kbase_csf_firmware_core_attr_updated(kbdev)) ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; ++ ++ case KBASE_MCU_ON_HWCNT_ENABLE: ++ backend->hwcnt_desired = true; ++ if (backend->hwcnt_disabled) { ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable( ++ kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ backend->hwcnt_disabled = false; ++ } ++ backend->mcu_state = KBASE_MCU_ON; ++ break; ++ ++ case KBASE_MCU_ON: ++ backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); ++ ++ if (!kbase_pm_is_mcu_desired(kbdev)) ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; ++ else if (kbdev->csf.firmware_hctl_core_pwr) { ++ /* Host control add additional Cores to be active */ ++ if (backend->shaders_desired_mask & ~shaders_ready) { ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_MCU_ON_RECHECK; ++ } ++ } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) ++ kbdev->pm.backend.mcu_state = ++ KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; ++ break; ++ ++ case KBASE_MCU_HCTL_MCU_ON_RECHECK: ++ backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); ++ ++ if (!backend->hwcnt_disabled) { ++ /* Wait for being disabled */ ++ ; ++ } else if (!kbase_pm_is_mcu_desired(kbdev)) { ++ /* Converging to MCU powering down flow */ ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; ++ } else if (backend->shaders_desired_mask & ~shaders_ready) { ++ /* set cores ready but not available to ++ * meet SHADERS_PEND_ON check pass ++ */ ++ backend->shaders_avail = ++ (backend->shaders_desired_mask | shaders_ready); ++ ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ backend->shaders_avail & ~shaders_ready, ++ ACTION_PWRON); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_ON; ++ } else { ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_ON; ++ } ++ break; ++ ++ case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: ++ if (kbase_csf_firmware_core_attr_updated(kbdev)) { ++ backend->shaders_avail = ++ backend->shaders_desired_mask; ++ backend->mcu_state = KBASE_MCU_ON; ++ } ++ break; ++ ++ case KBASE_MCU_ON_HWCNT_DISABLE: ++ if (kbase_pm_is_mcu_desired(kbdev)) { ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; ++ } ++ ++ backend->hwcnt_desired = false; ++ if (!backend->hwcnt_disabled) ++ kbase_pm_trigger_hwcnt_disable(kbdev); ++ ++ if (backend->hwcnt_disabled) ++ backend->mcu_state = KBASE_MCU_ON_HALT; ++ break; ++ ++ case KBASE_MCU_ON_HALT: ++ if (!kbase_pm_is_mcu_desired(kbdev)) { ++ kbase_csf_firmware_trigger_mcu_halt(kbdev); ++ backend->mcu_state = KBASE_MCU_ON_PEND_HALT; ++ } else ++ backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; ++ break; ++ ++ case KBASE_MCU_ON_PEND_HALT: ++ if (kbase_csf_firmware_mcu_halted(kbdev)) { ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_READY_OFF; ++ else ++ backend->mcu_state = KBASE_MCU_POWER_DOWN; ++ } ++ break; ++ ++ case KBASE_MCU_HCTL_SHADERS_READY_OFF: ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, ++ shaders_ready, ACTION_PWROFF); ++ backend->mcu_state = ++ KBASE_MCU_HCTL_SHADERS_PEND_OFF; ++ break; ++ ++ case KBASE_MCU_HCTL_SHADERS_PEND_OFF: ++ if (!shaders_trans && !shaders_ready) { ++ backend->pm_shaders_core_mask = 0; ++ backend->mcu_state = KBASE_MCU_POWER_DOWN; ++ } ++ break; ++ ++ case KBASE_MCU_POWER_DOWN: ++ kbase_csf_firmware_disable_mcu(kbdev); ++ backend->mcu_state = KBASE_MCU_PEND_OFF; ++ break; ++ ++ case KBASE_MCU_PEND_OFF: ++ /* wait synchronously for the MCU to get disabled */ ++ kbase_csf_firmware_disable_mcu_wait(kbdev); ++ backend->mcu_state = KBASE_MCU_OFF; ++ break; ++ ++ case KBASE_MCU_RESET_WAIT: ++ /* Reset complete */ ++ if (!backend->in_reset) ++ backend->mcu_state = KBASE_MCU_OFF; ++ break; ++ ++ default: ++ WARN(1, "Invalid state in mcu_state: %d", ++ backend->mcu_state); ++ } ++ ++ if (backend->mcu_state != prev_state) ++ dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n", ++ kbase_mcu_state_to_string(prev_state), ++ kbase_mcu_state_to_string(backend->mcu_state)); ++ ++ } while (backend->mcu_state != prev_state); ++ ++ return 0; ++} ++#endif ++ + static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) + { + const char *const strings[] = { +@@ -539,8 +855,10 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) + static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + { + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; +- u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; ++ u64 l2_present = kbdev->gpu_props.curr_config.l2_present; ++#if !MALI_USE_CSF + u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; ++#endif + enum kbase_l2_core_state prev_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); +@@ -551,35 +869,49 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2); ++ ++#if !MALI_USE_CSF + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER); ++#endif + + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ +- if (kbase_is_gpu_lost(kbdev)) +- return -EIO; ++ if (kbase_is_gpu_removed(kbdev) ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ || kbase_pm_is_gpu_lost(kbdev)) { ++#else ++ ) { ++#endif ++ backend->shaders_state = ++ KBASE_SHADERS_OFF_CORESTACK_OFF; ++ backend->l2_state = KBASE_L2_OFF; ++ dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n"); ++ break; ++ } + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + l2_trans &= ~l2_ready; ++#if !MALI_USE_CSF + tiler_trans &= ~tiler_ready; +- ++#endif + prev_state = backend->l2_state; + + switch (backend->l2_state) { + case KBASE_L2_OFF: + if (kbase_pm_is_l2_desired(kbdev)) { + /* +- * Set the desired config for L2 before powering +- * it on ++ * Set the desired config for L2 before ++ * powering it on + */ + kbase_pm_l2_config_override(kbdev); +- ++#if !MALI_USE_CSF + /* L2 is required, power on. Powering on the + * tiler will also power the first L2 cache. + */ +@@ -593,14 +925,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present & ~1, + ACTION_PWRON); ++#else ++ /* With CSF firmware, Host driver doesn't need to ++ * handle power management with both shader and tiler cores. ++ * The CSF firmware will power up the cores appropriately. ++ * So only power the l2 cache explicitly. ++ */ ++ kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, ++ l2_present, ACTION_PWRON); ++#endif + backend->l2_state = KBASE_L2_PEND_ON; + } + break; + + case KBASE_L2_PEND_ON: ++#if !MALI_USE_CSF + if (!l2_trans && l2_ready == l2_present && !tiler_trans + && tiler_ready == tiler_present) { +- KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, ++ tiler_ready); ++#else ++ if (!l2_trans && l2_ready == l2_present) { ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, ++ l2_ready); ++#endif + /* + * Ensure snoops are enabled after L2 is powered + * up. Note that kbase keeps track of the snoop +@@ -658,22 +1006,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + break; + + case KBASE_L2_ON_HWCNT_ENABLE: ++#if !MALI_USE_CSF + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } ++#endif + backend->l2_state = KBASE_L2_ON; + break; + + case KBASE_L2_ON: + if (!kbase_pm_is_l2_desired(kbdev)) { ++#if !MALI_USE_CSF + /* Do not power off L2 until the shaders and + * core stacks are off. + */ + if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + break; ++#else ++ /* Do not power off L2 until the MCU has been stopped */ ++ if (backend->mcu_state != KBASE_MCU_OFF) ++ break; ++#endif + + /* We need to make sure hardware counters are + * disabled before powering down the L2, to +@@ -690,6 +1046,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + break; + + case KBASE_L2_ON_HWCNT_DISABLE: ++#if !MALI_USE_CSF + /* If the L2 became desired while we were waiting on the + * worker to do the actual hwcnt disable (which might + * happen if some work was submitted immediately after +@@ -719,6 +1076,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + if (!backend->hwcnt_disabled) { + kbase_pm_trigger_hwcnt_disable(kbdev); + } ++#endif + + if (backend->hwcnt_disabled) { + if (kbdev->pm.backend.gpu_clock_slow_down_wa) +@@ -769,9 +1127,11 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) + */ + kbase_gpu_start_cache_clean_nolock( + kbdev); +- ++#if !MALI_USE_CSF + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); +- ++#else ++ KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u); ++#endif + backend->l2_state = KBASE_L2_PEND_OFF; + break; + +@@ -877,6 +1237,7 @@ static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) + } + } + ++#if !MALI_USE_CSF + static const char *kbase_shader_core_state_to_string( + enum kbase_shader_core_state state) + { +@@ -898,7 +1259,6 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + &kbdev->pm.backend.shader_tick_timer; + enum kbase_shader_core_state prev_state; + u64 stacks_avail = 0; +- int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + +@@ -924,8 +1284,15 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ +- if (kbase_is_gpu_lost(kbdev)) { +- err = -EIO; ++ if (kbase_is_gpu_removed(kbdev) ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ || kbase_pm_is_gpu_lost(kbdev)) { ++#else ++ ) { ++#endif ++ backend->shaders_state = ++ KBASE_SHADERS_OFF_CORESTACK_OFF; ++ dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); + break; + } + +@@ -976,6 +1343,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail, ACTION_PWRON); + ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_POWER_ON); ++ + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } + break; +@@ -986,8 +1359,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + backend->pm_shaders_core_mask = shaders_ready; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { ++#if MALI_USE_CSF ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, ++ &flags); ++#endif + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_unlock(kbdev, ++ flags); ++#endif + backend->hwcnt_disabled = false; + } + +@@ -1018,6 +1401,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + /* Wait for being disabled */ + ; + } else if (!backend->shaders_desired) { ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_IDLE); ++ + if (kbdev->pm.backend.protected_transition_override || + #ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_suspending(kbdev) || +@@ -1078,9 +1467,21 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + } + + if (backend->shaders_desired) { ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_TIMER_HIT); ++ + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + } else if (stt->remaining_ticks == 0) { ++ if (backend->pm_current_policy && ++ backend->pm_current_policy->handle_event) ++ backend->pm_current_policy->handle_event( ++ kbdev, ++ KBASE_PM_POLICY_EVENT_TIMER_MISS); ++ + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + #ifdef CONFIG_MALI_ARBITER_SUPPORT + } else if (kbase_pm_is_suspending(kbdev) || +@@ -1167,8 +1568,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + backend->pm_shaders_core_mask = 0; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { ++#if MALI_USE_CSF ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, ++ &flags); ++#endif + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_unlock(kbdev, ++ flags); ++#endif + backend->hwcnt_disabled = false; + } + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; +@@ -1195,8 +1606,9 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) + + } while (backend->shaders_state != prev_state); + +- return err; ++ return 0; + } ++#endif + + static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) + { +@@ -1211,12 +1623,21 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) + kbdev->pm.backend.l2_state != KBASE_L2_OFF) + in_desired_state = false; + ++#if !MALI_USE_CSF + if (kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + in_desired_state = false; + else if (!kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + in_desired_state = false; ++#else ++ if (kbase_pm_is_mcu_desired(kbdev) && ++ kbdev->pm.backend.mcu_state != KBASE_MCU_ON) ++ in_desired_state = false; ++ else if (!kbase_pm_is_mcu_desired(kbdev) && ++ kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) ++ in_desired_state = false; ++#endif + + return in_desired_state; + } +@@ -1280,17 +1701,22 @@ static void kbase_pm_trace_power_state(struct kbase_device *kbdev) + + void kbase_pm_update_state(struct kbase_device *kbdev) + { ++#if !MALI_USE_CSF + enum kbase_shader_core_state prev_shaders_state = + kbdev->pm.backend.shaders_state; ++#else ++ enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state; ++#endif + + lockdep_assert_held(&kbdev->hwaccess_lock); + +- if (!kbdev->pm.backend.gpu_powered) +- return; /* Do nothing if the GPU is off */ ++ if (!kbdev->pm.backend.gpu_ready) ++ return; /* Do nothing if the GPU is not ready */ + + if (kbase_pm_l2_update_state(kbdev)) + return; + ++#if !MALI_USE_CSF + if (kbase_pm_shaders_update_state(kbdev)) + return; + +@@ -1304,9 +1730,20 @@ void kbase_pm_update_state(struct kbase_device *kbdev) + if (kbase_pm_l2_update_state(kbdev)) + return; + } ++#else ++ if (kbase_pm_mcu_update_state(kbdev)) ++ return; ++ ++ if (prev_mcu_state != KBASE_MCU_OFF && ++ kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) { ++ if (kbase_pm_l2_update_state(kbdev)) ++ return; ++ } ++#endif + + if (kbase_pm_is_in_desired_state_nolock(kbdev)) { +- KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); ++ KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, ++ kbdev->pm.backend.shaders_avail); + + kbase_pm_trace_power_state(kbdev); + +@@ -1363,7 +1800,8 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) + hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stt->timer.function = shader_tick_timer_callback; + stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); +- stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; ++ stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; ++ stt->configured_ticks = stt->default_ticks; + + return 0; + } +@@ -1382,7 +1820,19 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) + + backend->in_reset = true; + backend->l2_state = KBASE_L2_RESET_WAIT; ++#if !MALI_USE_CSF + backend->shaders_state = KBASE_SHADERS_RESET_WAIT; ++#else ++ /* MCU state machine is exercised only after the initial load/boot ++ * of the firmware. ++ */ ++ if (likely(kbdev->csf.firmware_inited)) { ++ backend->mcu_state = KBASE_MCU_RESET_WAIT; ++ kbdev->csf.firmware_reload_needed = true; ++ } else { ++ WARN_ON(backend->mcu_state != KBASE_MCU_OFF); ++ } ++#endif + + /* We're in a reset, so hwcnt will have been synchronously disabled by + * this function's caller as part of the reset process. We therefore +@@ -1422,15 +1872,28 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) + + /* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has + * aborted due to a fatal signal. If the time spent waiting has exceeded this +- * threshold then there is most likely a hardware issue. */ +-#define PM_TIMEOUT (5*HZ) /* 5s */ ++ * threshold then there is most likely a hardware issue. ++ */ ++#define PM_TIMEOUT_MS (5000) /* 5s */ + + static void kbase_pm_timed_out(struct kbase_device *kbdev) + { ++ unsigned long flags; ++ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); ++#if !MALI_USE_CSF ++ CSTD_UNUSED(flags); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); ++#else ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ dev_err(kbdev->dev, "\tMCU desired = %d\n", ++ kbase_pm_is_mcu_desired(kbdev)); ++ dev_err(kbdev->dev, "\tMCU sw state = %d\n", ++ kbdev->pm.backend.mcu_state); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, +@@ -1447,6 +1910,10 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) + GPU_CONTROL_REG(L2_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO))); ++#if MALI_USE_CSF ++ dev_err(kbdev->dev, "\tMCU status = %d\n", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); ++#endif + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( +@@ -1465,49 +1932,87 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) + L2_PWRTRANS_LO))); + + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); +- if (kbase_prepare_to_reset_gpu(kbdev)) ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + +-void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) ++int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) + { + unsigned long flags; + unsigned long timeout; +- int err; ++ long remaining; ++ int err = 0; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +- timeout = jiffies + PM_TIMEOUT; ++#if MALI_USE_CSF ++ timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS); ++#else ++ timeout = msecs_to_jiffies(PM_TIMEOUT_MS); ++#endif + + /* Wait for cores */ +- err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, +- kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout( ++#else ++ remaining = wait_event_timeout( ++#endif ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); + +- if (err < 0 && time_after(jiffies, timeout)) ++ if (!remaining) { + kbase_pm_timed_out(kbdev); ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info( ++ kbdev->dev, ++ "Wait for desired PM state with L2 powered got interrupted"); ++ err = (int)remaining; ++ } ++ ++ return err; + } + +-void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) ++int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) + { + unsigned long flags; +- unsigned long timeout; +- int err; ++ long remaining; ++#if MALI_USE_CSF ++ long timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS); ++#else ++ long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); ++#endif ++ int err = 0; + + /* Let the state machine latch the most recent desired state. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +- timeout = jiffies + PM_TIMEOUT; +- + /* Wait for cores */ +- err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, +- kbase_pm_is_in_desired_state(kbdev)); ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state(kbdev), timeout); ++#else ++ remaining = wait_event_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbase_pm_is_in_desired_state(kbdev), timeout); ++#endif + +- if (err < 0 && time_after(jiffies, timeout)) ++ if (!remaining) { + kbase_pm_timed_out(kbdev); ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info(kbdev->dev, ++ "Wait for desired PM state got interrupted"); ++ err = (int)remaining; ++ } ++ ++ return err; + } + KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); + +@@ -1515,7 +2020,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) + { + unsigned long flags; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + /* + * Clear all interrupts, + * and unmask them all. +@@ -1529,14 +2034,19 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); ++#if MALI_USE_CSF ++ /* Enable only the Page fault bits part */ ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); ++#else + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); ++#endif + } + + KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); + + void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) + { +- KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + /* + * Mask all interrupts, + * and clear them all. +@@ -1563,6 +2073,23 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) + + KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + ++#if MALI_USE_CSF ++static void update_user_reg_page_mapping(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->pm.lock); ++ ++ if (kbdev->csf.mali_file_inode) { ++ /* This would zap the pte corresponding to the mapping of User ++ * register page for all the Kbase contexts. ++ */ ++ unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, ++ BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, ++ PAGE_SIZE, 1); ++ } ++} ++#endif ++ ++ + /* + * pmu layout: + * 0x0000: PMU TAG (RO) (0xCAFECAFE) +@@ -1574,10 +2101,20 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) + bool reset_required = is_resume; + unsigned long flags; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++#if !MALI_USE_CSF + lockdep_assert_held(&kbdev->js_data.runpool_mutex); ++#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kbdev->pm.lock); + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { ++ dev_err(kbdev->dev, ++ "%s: Cannot power up while GPU lost", __func__); ++ return; ++ } ++#endif ++ + if (kbdev->pm.backend.gpu_powered) { + /* Already turned on */ + if (kbdev->poweroff_pending) +@@ -1602,11 +2139,40 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + ++#if MALI_USE_CSF ++ /* GPU has been turned on, can switch to actual register page */ ++ update_user_reg_page_mapping(kbdev); ++#endif ++ + if (reset_required) { + /* GPU state was lost, reset GPU to ensure it is in a +- * consistent state */ ++ * consistent state ++ */ + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); + } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ else { ++ if (kbdev->arb.arb_if) { ++ struct kbase_arbiter_vm_state *arb_vm_state = ++ kbdev->pm.arb_vm_state; ++ ++ /* In the case that the GPU has just been granted by ++ * the Arbiter, a reset will have already been done. ++ * However, it is still necessary to initialize the GPU. ++ */ ++ if (arb_vm_state->vm_arb_starting) ++ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | ++ PM_NO_RESET); ++ } ++ } ++ /* ++ * This point means that the GPU trasitioned to ON. So there is a chance ++ * that a repartitioning occurred. In this case the current config ++ * should be read again. ++ */ ++ kbase_gpuprops_get_curr_config_props(kbdev, ++ &kbdev->gpu_props.curr_config); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +@@ -1628,7 +2194,19 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) + + /* Turn on the L2 caches */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.gpu_ready = true; + kbdev->pm.backend.l2_desired = true; ++#if MALI_USE_CSF ++ if (reset_required) { ++ /* GPU reset was done after the power on, so send the post ++ * reset event instead. This is okay as GPU power off event ++ * is same as pre GPU reset event. ++ */ ++ kbase_ipa_control_handle_gpu_reset_post(kbdev); ++ } else { ++ kbase_ipa_control_handle_gpu_power_on(kbdev); ++ } ++#endif + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +@@ -1639,7 +2217,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) + { + unsigned long flags; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + lockdep_assert_held(&kbdev->pm.lock); + + /* ASSERT that the cores should now be unavailable. No lock needed. */ +@@ -1663,16 +2241,38 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) + + if (atomic_read(&kbdev->faults_pending)) { + /* Page/bus faults are still being processed. The GPU can not +- * be powered off until they have completed */ ++ * be powered off until they have completed ++ */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return false; + } + + kbase_pm_cache_snoop_disable(kbdev); ++#if MALI_USE_CSF ++ kbase_ipa_control_handle_gpu_power_off(kbdev); ++#endif ++ ++ kbdev->pm.backend.gpu_ready = false; + + /* The GPU power may be turned off from this point */ + kbdev->pm.backend.gpu_powered = false; ++ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbase_pm_is_gpu_lost(kbdev)) { ++ /* Ensure we unblock any threads that are stuck waiting ++ * for the GPU ++ */ ++ kbase_gpu_cache_clean_wait_complete(kbdev); ++ } ++#endif ++ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ /* GPU is about to be turned off, switch to dummy page */ ++ update_user_reg_page_mapping(kbdev); ++#endif ++ + #ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +@@ -1720,19 +2320,23 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) + struct kbasep_reset_timeout_data *rtdata = + container_of(timer, struct kbasep_reset_timeout_data, timer); + +- rtdata->timed_out = 1; ++ rtdata->timed_out = true; + + /* Set the wait queue to wake up kbase_pm_init_hw even though the reset +- * hasn't completed */ ++ * hasn't completed ++ */ + kbase_pm_reset_done(rtdata->kbdev); + + return HRTIMER_NORESTART; + } + +-static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) ++static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) + { +- u32 hw_quirks_jm = kbase_reg_read(kbdev, +- GPU_CONTROL_REG(JM_CONFIG)); ++#if MALI_USE_CSF ++ kbdev->hw_quirks_gpu = ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); ++#else ++ u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); + + if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { + /* Only for tMIx */ +@@ -1746,38 +2350,38 @@ static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) + */ + if (coherency_features == + COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { +- hw_quirks_jm |= (COHERENCY_ACE_LITE | +- COHERENCY_ACE) << +- JM_FORCE_COHERENCY_FEATURES_SHIFT; ++ hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) ++ << JM_FORCE_COHERENCY_FEATURES_SHIFT; + } + } + +- if (kbase_is_gpu_lost(kbdev)) ++ if (kbase_is_gpu_removed(kbdev)) + return -EIO; + +- kbdev->hw_quirks_jm = hw_quirks_jm; ++ kbdev->hw_quirks_gpu = hw_quirks_gpu; + ++#endif /* !MALI_USE_CSF */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { + int default_idvs_group_size = 0xF; +- u32 tmp; ++ u32 group_size = 0; + +- if (of_property_read_u32(kbdev->dev->of_node, +- "idvs-group-size", &tmp)) +- tmp = default_idvs_group_size; ++ if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", ++ &group_size)) ++ group_size = default_idvs_group_size; + +- if (tmp > IDVS_GROUP_MAX_SIZE) { ++ if (group_size > IDVS_GROUP_MAX_SIZE) { + dev_err(kbdev->dev, + "idvs-group-size of %d is too large. Maximum value is %d", +- tmp, IDVS_GROUP_MAX_SIZE); +- tmp = default_idvs_group_size; ++ group_size, IDVS_GROUP_MAX_SIZE); ++ group_size = default_idvs_group_size; + } + +- kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; ++ kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT; + } + + #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) + if (corestack_driver_control) +- kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; ++ kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL; + + return 0; + } +@@ -1787,7 +2391,7 @@ static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) + u32 hw_quirks_sc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_CONFIG)); + +- if (kbase_is_gpu_lost(kbdev)) ++ if (kbase_is_gpu_removed(kbdev)) + return -EIO; + + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ +@@ -1811,7 +2415,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) + u32 hw_quirks_tiler = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_CONFIG)); + +- if (kbase_is_gpu_lost(kbdev)) ++ if (kbase_is_gpu_removed(kbdev)) + return -EIO; + + /* Set tiler clock gate override if required */ +@@ -1831,18 +2435,17 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int error = 0; + +- kbdev->hw_quirks_jm = 0; ++ kbdev->hw_quirks_gpu = 0; + kbdev->hw_quirks_sc = 0; + kbdev->hw_quirks_tiler = 0; + kbdev->hw_quirks_mmu = 0; + +- if (!of_property_read_u32(np, "quirks_jm", +- &kbdev->hw_quirks_jm)) { ++ if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { + dev_info(kbdev->dev, +- "Found quirks_jm = [0x%x] in Devicetree\n", +- kbdev->hw_quirks_jm); ++ "Found quirks_gpu = [0x%x] in Devicetree\n", ++ kbdev->hw_quirks_gpu); + } else { +- error = kbase_set_jm_quirks(kbdev, prod_id); ++ error = kbase_set_gpu_quirks(kbdev, prod_id); + if (error) + return error; + } +@@ -1891,15 +2494,20 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) + + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), + kbdev->hw_quirks_mmu); ++#if MALI_USE_CSF ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), ++ kbdev->hw_quirks_gpu); ++#else + kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), +- kbdev->hw_quirks_jm); ++ kbdev->hw_quirks_gpu); ++#endif + } + + void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) + { + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); + #endif /* CONFIG_ARM64 */ +@@ -1911,7 +2519,7 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) + void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) + { + if (kbdev->cci_snoop_enabled) { +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); +@@ -1922,6 +2530,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) + } + } + ++#if !MALI_USE_CSF + static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) + { + unsigned long irq_flags; +@@ -1934,6 +2543,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + } ++#endif + + static int kbase_pm_do_reset(struct kbase_device *kbdev) + { +@@ -1960,7 +2570,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) + + /* Initialize a structure for tracking the status of the reset */ + rtdata.kbdev = kbdev; +- rtdata.timed_out = 0; ++ rtdata.timed_out = false; + + /* Create a timer to use as a timeout on the reset */ + hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +@@ -1972,7 +2582,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + +- if (rtdata.timed_out == 0) { ++ if (!rtdata.timed_out) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); +@@ -1980,46 +2590,60 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) + } + + /* No interrupt has been received - check if the RAWSTAT register says +- * the reset has completed */ ++ * the reset has completed ++ */ + if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & +- RESET_COMPLETED) +- || kbase_is_gpu_lost(kbdev)) { ++ RESET_COMPLETED)) { + /* The interrupt is set in the RAWSTAT; this suggests that the +- * interrupts are not getting to the CPU */ ++ * interrupts are not getting to the CPU ++ */ + dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + /* If interrupts aren't working we can't continue. */ + destroy_hrtimer_on_stack(&rtdata.timer); + return -EINVAL; + } + ++ if (kbase_is_gpu_removed(kbdev)) { ++ dev_dbg(kbdev->dev, "GPU has been removed, reset no longer needed.\n"); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return -EINVAL; ++ } ++ + /* The GPU doesn't seem to be responding to the reset so try a hard +- * reset */ +- dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", +- RESET_TIMEOUT); +- KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); +- kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), +- GPU_COMMAND_HARD_RESET); ++ * reset, but only when NOT in arbitration mode. ++ */ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (!kbdev->arb.arb_if) { ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", ++ RESET_TIMEOUT); ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_HARD_RESET); + +- /* Restart the timer to wait for the hard reset to complete */ +- rtdata.timed_out = 0; ++ /* Restart the timer to wait for the hard reset to complete */ ++ rtdata.timed_out = false; + +- hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), +- HRTIMER_MODE_REL); ++ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), ++ HRTIMER_MODE_REL); + +- /* Wait for the RESET_COMPLETED interrupt to be raised */ +- kbase_pm_wait_for_reset(kbdev); ++ /* Wait for the RESET_COMPLETED interrupt to be raised */ ++ kbase_pm_wait_for_reset(kbdev); + +- if (rtdata.timed_out == 0) { +- /* GPU has been reset */ +- hrtimer_cancel(&rtdata.timer); +- destroy_hrtimer_on_stack(&rtdata.timer); +- return 0; +- } ++ if (!rtdata.timed_out) { ++ /* GPU has been reset */ ++ hrtimer_cancel(&rtdata.timer); ++ destroy_hrtimer_on_stack(&rtdata.timer); ++ return 0; ++ } + +- destroy_hrtimer_on_stack(&rtdata.timer); ++ destroy_hrtimer_on_stack(&rtdata.timer); + +- dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", +- RESET_TIMEOUT); ++ dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", ++ RESET_TIMEOUT); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ } ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + return -EINVAL; + } +@@ -2041,9 +2665,9 @@ int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) + int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) + { + unsigned long irq_flags; +- int err; ++ int err = 0; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ +@@ -2055,7 +2679,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) + } + + /* Ensure interrupts are off to begin with, this also clears any +- * outstanding interrupts */ ++ * outstanding interrupts ++ */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); +@@ -2069,10 +2694,24 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + /* Soft reset the GPU */ +- err = kbdev->protected_ops->protected_mode_disable( +- kbdev->protected_dev); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (!(flags & PM_NO_RESET)) ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ err = kbdev->protected_ops->protected_mode_disable( ++ kbdev->protected_dev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++#if MALI_USE_CSF ++ if (kbdev->protected_mode) { ++ unsigned long flags; ++ ++ kbase_ipa_control_protm_exited(kbdev); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ } ++#endif + kbdev->protected_mode = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + +@@ -2093,7 +2732,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) + GPU_STATUS_PROTECTED_MODE_ACTIVE); + + /* If cycle counter was in use re-enable it, enable_irqs will only be +- * false when called from kbase_pm_powerup */ ++ * false when called from kbase_pm_powerup ++ */ + if (kbdev->pm.backend.gpu_cycle_counter_requests && + (flags & PM_ENABLE_IRQS)) { + kbase_pm_enable_interrupts(kbdev); +@@ -2116,12 +2756,14 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) + kbase_pm_enable_interrupts(kbdev); + + exit: ++#if !MALI_USE_CSF + if (!kbdev->pm.backend.protected_entry_transition_override) { + /* Re-enable GPU hardware counters if we're resetting from + * protected mode. + */ + reenable_protected_mode_hwcnt(kbdev); + } ++#endif + + return err; + } +@@ -2148,12 +2790,21 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +- + ++kbdev->pm.backend.gpu_cycle_counter_requests; + +- if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) ++ if (kbdev->pm.backend.gpu_cycle_counter_requests == 1) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); ++ else { ++ /* This might happen after GPU reset. ++ * Then counter needs to be kicked. ++ */ ++ if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_CYCLE_COUNT_ACTIVE)) { ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CYCLE_COUNT_START); ++ } ++ } + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, +@@ -2169,6 +2820,8 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + ++ kbase_pm_wait_for_l2_powered(kbdev); ++ + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); + } + +@@ -2203,7 +2856,7 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) + + --kbdev->pm.backend.gpu_cycle_counter_requests; + +- if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) ++ if (kbdev->pm.backend.gpu_cycle_counter_requests == 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +index 95f10e0..70d009e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Power management API definitions used internally by GPU backend + */ +@@ -31,7 +28,7 @@ + + #include + +-#include "mali_kbase_pm_ca.h" ++#include "backend/gpu/mali_kbase_pm_ca.h" + #include "mali_kbase_pm_policy.h" + + +@@ -205,6 +202,30 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + */ + void kbase_pm_reset_done(struct kbase_device *kbdev); + ++#if MALI_USE_CSF ++/** ++ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be ++ * reached ++ * ++ * Wait for the L2 and MCU state machines to reach the states corresponding ++ * to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'. ++ * ++ * The usual use-case for this is to ensure that all parts of GPU have been ++ * powered up after performing a GPU Reset. ++ * ++ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, ++ * because this function will take that lock itself. ++ * ++ * NOTE: This may not wait until the correct state is reached if there is a ++ * power off in progress and kbase_pm_context_active() was called instead of ++ * kbase_csf_scheduler_pm_active(). ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 on success, error code on error ++ */ ++int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); ++#else + /** + * kbase_pm_wait_for_desired_state - Wait for the desired power state to be + * reached +@@ -224,15 +245,17 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); + * kbase_pm_wait_for_poweroff_complete() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 on success, error code on error + */ +-void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); ++int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); ++#endif + + /** + * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on + * +- * Wait for the L2 to be powered on, and for the L2 and shader state machines to +- * stabilise by reaching the states corresponding to the values of 'l2_desired' +- * and 'shaders_desired'. ++ * Wait for the L2 to be powered on, and for the L2 and the state machines of ++ * its dependent stack components to stabilise. + * + * kbdev->pm.active_count must be non-zero when calling this function. + * +@@ -240,8 +263,10 @@ void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); + * because this function will take that lock itself. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * ++ * Return: 0 on success, error code on error + */ +-void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); ++int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); + + /** + * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state +@@ -467,7 +492,8 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev); + void kbase_pm_register_access_disable(struct kbase_device *kbdev); + + /* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline +- * function */ ++ * function ++ */ + + /** + * kbase_pm_metrics_is_active - Check if the power management metrics +@@ -511,8 +537,22 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + + #ifdef CONFIG_MALI_MIDGARD_DVFS + ++#if MALI_USE_CSF ++/** ++ * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU ++ * ++ * Function provided by platform specific code when DVFS is enabled to allow ++ * the power management metrics system to report utilisation. ++ * ++ * @kbdev: The kbase device structure for the device (must be a ++ * valid pointer) ++ * @utilisation: The current calculated utilisation by the metrics system. ++ * Return: Returns 0 on failure and non zero on success. ++ */ ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); ++#else + /** +- * kbase_platform_dvfs_event - Report utilisation to DVFS code ++ * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. +@@ -525,11 +565,12 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + * group. + * Return: Returns 0 on failure and non zero on success. + */ +- + int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, +- u32 util_gl_share, u32 util_cl_share[2]); ++ u32 util_gl_share, u32 util_cl_share[2]); + #endif + ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ ++ + void kbase_pm_power_changed(struct kbase_device *kbdev); + + /** +@@ -683,6 +724,72 @@ extern bool corestack_driver_control; + */ + bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); + ++#if MALI_USE_CSF ++/** ++ * kbase_pm_is_mcu_desired - Check whether MCU is desired ++ * ++ * @kbdev: Device pointer ++ * ++ * This shall be called to check whether MCU needs to be enabled. ++ * ++ * Return: true if MCU needs to be enabled. ++ */ ++bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); ++ ++/** ++ * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be ++ * suspended to low power state when all ++ * the CSGs are idle ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if allowed to enter the suspended state. ++ */ ++static inline ++bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return !(kbdev->pm.backend.csf_pm_sched_flags & ++ CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); ++} ++ ++/** ++ * kbase_pm_no_runnables_sched_suspendable - Check whether the scheduler can be ++ * suspended to low power state when ++ * there are no runnable CSGs. ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if allowed to enter the suspended state. ++ */ ++static inline ++bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return !(kbdev->pm.backend.csf_pm_sched_flags & ++ CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); ++} ++ ++/** ++ * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the ++ * MCU core powered in accordance to the active ++ * power management policy ++ * ++ * @kbdev: Device pointer ++ * ++ * Return: true if the MCU is to retain powered. ++ */ ++static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return kbdev->pm.backend.csf_pm_sched_flags & ++ CSF_DYNAMIC_PM_CORE_KEEP_ON; ++} ++#endif ++ + /** + * kbase_pm_lock - Lock all necessary mutexes to perform PM actions + * +@@ -692,7 +799,9 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); + */ + static inline void kbase_pm_lock(struct kbase_device *kbdev) + { ++#if !MALI_USE_CSF + mutex_lock(&kbdev->js_data.runpool_mutex); ++#endif /* !MALI_USE_CSF */ + mutex_lock(&kbdev->pm.lock); + } + +@@ -704,7 +813,9 @@ static inline void kbase_pm_lock(struct kbase_device *kbdev) + static inline void kbase_pm_unlock(struct kbase_device *kbdev) + { + mutex_unlock(&kbdev->pm.lock); ++#if !MALI_USE_CSF + mutex_unlock(&kbdev->js_data.runpool_mutex); ++#endif /* !MALI_USE_CSF */ + } + + #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h +index 12cb051..ef72f60 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -25,6 +24,19 @@ + * The function-like macro KBASEP_L2_STATE() must be defined before including + * this header file. This header file can be included multiple times in the + * same compilation unit with different definitions of KBASEP_L2_STATE(). ++ * ++ * @OFF: The L2 cache and tiler are off ++ * @PEND_ON: The L2 cache and tiler are powering on ++ * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. ++ * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled ++ * @ON: The L2 cache and tiler are on, and hwcnt is enabled ++ * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled ++ * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock. ++ * Conditionally used. ++ * @POWER_DOWN: The L2 cache and tiler are about to be powered off ++ * @PEND_OFF: The L2 cache and tiler are powering off ++ * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are ++ * unknown + */ + KBASEP_L2_STATE(OFF) + KBASEP_L2_STATE(PEND_ON) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h +new file mode 100644 +index 0000000..4e99928 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h +@@ -0,0 +1,63 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * Backend-specific Power Manager MCU state definitions. ++ * The function-like macro KBASEP_MCU_STATE() must be defined before including ++ * this header file. This header file can be included multiple times in the ++ * same compilation unit with different definitions of KBASEP_MCU_STATE(). ++ * ++ * @OFF: The MCU is powered off. ++ * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with ++ * firmware reloading) is in progress. ++ * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration ++ * requests have been sent to the firmware. ++ * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now ++ * ready for use and hwcnt is being enabled. ++ * @ON: The MCU is active and hwcnt has been enabled. ++ * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores ++ * is being updated. ++ * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. ++ * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU ++ * halt would be triggered. ++ * @ON_PEND_HALT: MCU halt in progress, confirmation pending. ++ * @POWER_DOWN: MCU halted operations, pending being disabled. ++ * @PEND_OFF: MCU is being disabled, pending on powering off. ++ * @RESET_WAIT: The GPU is resetting, MCU state is unknown. ++ */ ++KBASEP_MCU_STATE(OFF) ++KBASEP_MCU_STATE(PEND_ON_RELOAD) ++KBASEP_MCU_STATE(ON_GLB_REINIT_PEND) ++KBASEP_MCU_STATE(ON_HWCNT_ENABLE) ++KBASEP_MCU_STATE(ON) ++KBASEP_MCU_STATE(ON_CORE_ATTR_UPDATE_PEND) ++KBASEP_MCU_STATE(ON_HWCNT_DISABLE) ++KBASEP_MCU_STATE(ON_HALT) ++KBASEP_MCU_STATE(ON_PEND_HALT) ++KBASEP_MCU_STATE(POWER_DOWN) ++KBASEP_MCU_STATE(PEND_OFF) ++KBASEP_MCU_STATE(RESET_WAIT) ++/* Additional MCU states with HOST_CONTROL_SHADERS */ ++KBASEP_MCU_STATE(HCTL_SHADERS_PEND_ON) ++KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND) ++KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK) ++KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF) ++KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +index de3babe..69e8dd3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Metrics for power management + */ +@@ -29,22 +26,28 @@ + #include + #include + #include ++ ++#if MALI_USE_CSF ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++#else + #include ++#endif /* !MALI_USE_CSF */ ++ + #include + #include + +-/* When VSync is being hit aim for utilisation between 70-90% */ +-#define KBASE_PM_VSYNC_MIN_UTILISATION 70 +-#define KBASE_PM_VSYNC_MAX_UTILISATION 90 +-/* Otherwise aim for 10-40% */ +-#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 +-#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 +- + /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns + * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly +- * under 11s. Exceeding this will cause overflow */ ++ * under 11s. Exceeding this will cause overflow ++ */ + #define KBASE_PM_TIME_SHIFT 8 + ++#if MALI_USE_CSF ++/* To get the GPU_ACTIVE value in nano seconds unit */ ++#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) ++#endif ++ + #ifdef CONFIG_MALI_MIDGARD_DVFS + static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) + { +@@ -71,11 +74,45 @@ static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) + + int kbasep_pm_metrics_init(struct kbase_device *kbdev) + { +- KBASE_DEBUG_ASSERT(kbdev != NULL); ++#if MALI_USE_CSF ++ struct kbase_ipa_control_perf_counter perf_counter; ++ int err; + ++ /* One counter group */ ++ const size_t NUM_PERF_COUNTERS = 1; ++ ++ KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.backend.metrics.kbdev = kbdev; ++ kbdev->pm.backend.metrics.time_period_start = ktime_get(); ++ kbdev->pm.backend.metrics.values.time_busy = 0; ++ kbdev->pm.backend.metrics.values.time_idle = 0; ++ kbdev->pm.backend.metrics.values.time_in_protm = 0; ++ ++ perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; + ++ /* Normalize values by GPU frequency */ ++ perf_counter.gpu_norm = true; ++ ++ /* We need the GPU_ACTIVE counter, which is in the CSHW group */ ++ perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW; ++ ++ /* We need the GPU_ACTIVE counter */ ++ perf_counter.idx = GPU_ACTIVE_CNT_IDX; ++ ++ err = kbase_ipa_control_register( ++ kbdev, &perf_counter, NUM_PERF_COUNTERS, ++ &kbdev->pm.backend.metrics.ipa_control_client); ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to register IPA with kbase_ipa_control: err=%d", ++ err); ++ return -1; ++ } ++#else ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ kbdev->pm.backend.metrics.kbdev = kbdev; + kbdev->pm.backend.metrics.time_period_start = ktime_get(); ++ + kbdev->pm.backend.metrics.gpu_active = false; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; +@@ -89,16 +126,25 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) + kbdev->pm.backend.metrics.values.busy_cl[1] = 0; + kbdev->pm.backend.metrics.values.busy_gl = 0; + ++#endif + spin_lock_init(&kbdev->pm.backend.metrics.lock); + + #ifdef CONFIG_MALI_MIDGARD_DVFS + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->pm.backend.metrics.timer.function = dvfs_callback; +- ++ kbdev->pm.backend.metrics.initialized = true; + kbase_pm_metrics_start(kbdev); + #endif /* CONFIG_MALI_MIDGARD_DVFS */ + ++#if MALI_USE_CSF ++ /* The sanity check on the GPU_ACTIVE performance counter ++ * is skipped for Juno platforms that have timing problems. ++ */ ++ kbdev->pm.backend.metrics.skip_gpu_active_sanity_check = ++ of_machine_is_compatible("arm,juno"); ++#endif ++ + return 0; + } + KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); +@@ -115,7 +161,13 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev) + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); ++ kbdev->pm.backend.metrics.initialized = false; + #endif /* CONFIG_MALI_MIDGARD_DVFS */ ++ ++#if MALI_USE_CSF ++ kbase_ipa_control_unregister( ++ kbdev, kbdev->pm.backend.metrics.ipa_control_client); ++#endif + } + + KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); +@@ -123,8 +175,117 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); + /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ ++#if MALI_USE_CSF ++#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) ++static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) ++{ ++ int err; ++ u64 gpu_active_counter; ++ u64 protected_time; ++ ktime_t now; ++ ++ lockdep_assert_held(&kbdev->pm.backend.metrics.lock); ++ ++ /* Query IPA_CONTROL for the latest GPU-active and protected-time ++ * info. ++ */ ++ err = kbase_ipa_control_query( ++ kbdev, kbdev->pm.backend.metrics.ipa_control_client, ++ &gpu_active_counter, 1, &protected_time); ++ ++ /* Read the timestamp after reading the GPU_ACTIVE counter value. ++ * This ensures the time gap between the 2 reads is consistent for ++ * a meaningful comparison between the increment of GPU_ACTIVE and ++ * elapsed time. The lock taken inside kbase_ipa_control_query() ++ * function can cause lot of variation. ++ */ ++ now = ktime_get(); ++ ++ if (err) { ++ dev_err(kbdev->dev, ++ "Failed to query the increment of GPU_ACTIVE counter: err=%d", ++ err); ++ } else { ++ u64 diff_ns; ++ s64 diff_ns_signed; ++ u32 ns_time; ++ ktime_t diff = ktime_sub( ++ now, kbdev->pm.backend.metrics.time_period_start); ++ ++ diff_ns_signed = ktime_to_ns(diff); ++ ++ if (diff_ns_signed < 0) ++ return; ++ ++ diff_ns = (u64)diff_ns_signed; ++ ++#if !IS_ENABLED(CONFIG_MALI_NO_MALI) ++ /* The GPU_ACTIVE counter shouldn't clock-up more time than has ++ * actually elapsed - but still some margin needs to be given ++ * when doing the comparison. There could be some drift between ++ * the CPU and GPU clock. ++ * ++ * Can do the check only in a real driver build, as an arbitrary ++ * value for GPU_ACTIVE can be fed into dummy model in no_mali ++ * configuration which may not correspond to the real elapsed ++ * time. ++ */ ++ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { ++ /* Use a margin value that is approximately 1% of the time ++ * difference. ++ */ ++ u64 margin_ns = diff_ns >> 6; ++ if (gpu_active_counter > (diff_ns + margin_ns)) { ++ dev_info( ++ kbdev->dev, ++ "GPU activity takes longer than time interval: %llu ns > %llu ns", ++ (unsigned long long)gpu_active_counter, ++ (unsigned long long)diff_ns); ++ } ++ } ++#endif ++ /* Calculate time difference in units of 256ns */ ++ ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT); ++ ++ /* Add protected_time to gpu_active_counter so that time in ++ * protected mode is included in the apparent GPU active time, ++ * then convert it from units of 1ns to units of 256ns, to ++ * match what JM GPUs use. The assumption is made here that the ++ * GPU is 100% busy while in protected mode, so we should add ++ * this since the GPU can't (and thus won't) update these ++ * counters while it's actually in protected mode. ++ * ++ * Perform the add after dividing each value down, to reduce ++ * the chances of overflows. ++ */ ++ protected_time >>= KBASE_PM_TIME_SHIFT; ++ gpu_active_counter >>= KBASE_PM_TIME_SHIFT; ++ gpu_active_counter += protected_time; ++ ++ /* Ensure the following equations don't go wrong if ns_time is ++ * slightly larger than gpu_active_counter somehow ++ */ ++ gpu_active_counter = MIN(gpu_active_counter, ns_time); ++ ++ kbdev->pm.backend.metrics.values.time_busy += ++ gpu_active_counter; ++ ++ kbdev->pm.backend.metrics.values.time_idle += ++ ns_time - gpu_active_counter; ++ ++ /* Also make time in protected mode available explicitly, ++ * so users of this data have this info, too. ++ */ ++ kbdev->pm.backend.metrics.values.time_in_protm += ++ protected_time; ++ } ++ ++ kbdev->pm.backend.metrics.time_period_start = now; ++} ++#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ ++#else + static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, +- ktime_t now) ++ ktime_t now) + { + ktime_t diff; + +@@ -149,12 +310,13 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, + if (kbdev->pm.backend.metrics.active_gl_ctx[2]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + } else { +- kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff) +- >> KBASE_PM_TIME_SHIFT); ++ kbdev->pm.backend.metrics.values.time_idle += ++ (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + } + + kbdev->pm.backend.metrics.time_period_start = now; + } ++#endif /* MALI_USE_CSF */ + + #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) + void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, +@@ -165,14 +327,23 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); ++#if MALI_USE_CSF ++ kbase_pm_get_dvfs_utilisation_calc(kbdev); ++#else + kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); ++#endif + + memset(diff, 0, sizeof(*diff)); + diff->time_busy = cur->time_busy - last->time_busy; + diff->time_idle = cur->time_idle - last->time_idle; ++ ++#if MALI_USE_CSF ++ diff->time_in_protm = cur->time_in_protm - last->time_in_protm; ++#else + diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; + diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; + diff->busy_gl = cur->busy_gl - last->busy_gl; ++#endif + + *last = *cur; + +@@ -184,26 +355,42 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); + #ifdef CONFIG_MALI_MIDGARD_DVFS + void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) + { +- int utilisation, util_gl_share; +- int util_cl_share[2]; +- int busy; ++ int utilisation; + struct kbasep_pm_metrics *diff; ++#if !MALI_USE_CSF ++ int busy; ++ int util_gl_share; ++ int util_cl_share[2]; ++#endif + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + diff = &kbdev->pm.backend.metrics.dvfs_diff; + +- kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); ++ kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, ++ diff); + + utilisation = (100 * diff->time_busy) / + max(diff->time_busy + diff->time_idle, 1u); + ++#if !MALI_USE_CSF + busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); ++ + util_gl_share = (100 * diff->busy_gl) / busy; + util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; + util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; + +- kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); ++ kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, ++ util_cl_share); ++#else ++ /* Note that, at present, we don't pass protected-mode time to the ++ * platform here. It's unlikely to be useful, however, as the platform ++ * probably just cares whether the GPU is busy or not; time in ++ * protected mode is already added to busy-time at this point, though, ++ * so we should be good. ++ */ ++ kbase_platform_dvfs_event(kbdev, utilisation); ++#endif + } + + bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) +@@ -224,11 +411,20 @@ KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + void kbase_pm_metrics_start(struct kbase_device *kbdev) + { + unsigned long flags; ++ bool update = true; ++ ++ if (unlikely(!kbdev->pm.backend.metrics.initialized)) ++ return; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); +- kbdev->pm.backend.metrics.timer_active = true; ++ if (!kbdev->pm.backend.metrics.timer_active) ++ kbdev->pm.backend.metrics.timer_active = true; ++ else ++ update = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +- hrtimer_start(&kbdev->pm.backend.metrics.timer, ++ ++ if (update) ++ hrtimer_start(&kbdev->pm.backend.metrics.timer, + HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); + } +@@ -236,16 +432,26 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev) + void kbase_pm_metrics_stop(struct kbase_device *kbdev) + { + unsigned long flags; ++ bool update = true; ++ ++ if (unlikely(!kbdev->pm.backend.metrics.initialized)) ++ return; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); +- kbdev->pm.backend.metrics.timer_active = false; ++ if (kbdev->pm.backend.metrics.timer_active) ++ kbdev->pm.backend.metrics.timer_active = false; ++ else ++ update = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +- hrtimer_cancel(&kbdev->pm.backend.metrics.timer); ++ ++ if (update) ++ hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + } + + + #endif /* CONFIG_MALI_MIDGARD_DVFS */ + ++#if !MALI_USE_CSF + /** + * kbase_pm_metrics_active_calc - Update PM active counts based on currently + * running atoms +@@ -270,7 +476,8 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + + /* Head atom may have just completed, so if it isn't running +- * then try the next atom */ ++ * then try the next atom ++ */ + if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) + katom = kbase_gpu_inspect(kbdev, js, 1); + +@@ -309,10 +516,12 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) + timestamp = &now; + } + +- /* Track how long CL and/or GL jobs have been busy for */ ++ /* Track how much of time has been spent busy or idle. For JM GPUs, ++ * this also evaluates how long CL and/or GL jobs have been busy for. ++ */ + kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + + kbase_pm_metrics_active_calc(kbdev); +- + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + } ++#endif /* !MALI_USE_CSF */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +index 17ed21e..cf61ef8 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -28,27 +27,54 @@ + #include + #include + #include ++#include + +-static const struct kbase_pm_policy *const all_policy_list[] = { +-#ifdef CONFIG_MALI_NO_MALI +- &kbase_pm_always_on_policy_ops, +- &kbase_pm_coarse_demand_policy_ops, +-#if !MALI_CUSTOMER_RELEASE +- &kbase_pm_always_on_demand_policy_ops, ++#if MALI_USE_CSF && defined CONFIG_MALI_DEBUG ++#include + #endif +-#else /* CONFIG_MALI_NO_MALI */ ++ ++#include ++ ++static const struct kbase_pm_policy *const all_policy_list[] = { + &kbase_pm_coarse_demand_policy_ops, +-#if !MALI_CUSTOMER_RELEASE +- &kbase_pm_always_on_demand_policy_ops, +-#endif + &kbase_pm_always_on_policy_ops +-#endif /* CONFIG_MALI_NO_MALI */ + }; + + void kbase_pm_policy_init(struct kbase_device *kbdev) + { +- kbdev->pm.backend.pm_current_policy = all_policy_list[0]; +- kbdev->pm.backend.pm_current_policy->init(kbdev); ++ const struct kbase_pm_policy *default_policy = all_policy_list[0]; ++ struct device_node *np = kbdev->dev->of_node; ++ const char *power_policy_name; ++ unsigned long flags; ++ int i; ++ ++ if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { ++ for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) ++ if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { ++ default_policy = all_policy_list[i]; ++ break; ++ } ++ } ++ ++#if MALI_USE_CSF && defined(CONFIG_MALI_DEBUG) ++ /* Use always_on policy if module param fw_debug=1 is ++ * passed, to aid firmware debugging. ++ */ ++ if (fw_debug) ++ default_policy = &kbase_pm_always_on_policy_ops; ++#endif ++ ++ default_policy->init(kbdev); ++ ++#if MALI_USE_CSF ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.pm_current_policy = default_policy; ++ kbdev->pm.backend.csf_pm_sched_flags = default_policy->pm_sched_flags; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#else ++ CSTD_UNUSED(flags); ++ kbdev->pm.backend.pm_current_policy = default_policy; ++#endif + } + + void kbase_pm_policy_term(struct kbase_device *kbdev) +@@ -93,13 +119,17 @@ void kbase_pm_update_active(struct kbase_device *kbdev) + pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; + pm->backend.poweroff_wait_in_progress = false; + pm->backend.l2_desired = true; ++#if MALI_USE_CSF ++ pm->backend.mcu_desired = true; ++#endif + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_do_poweron(kbdev, false); + } + } else { + /* It is an error for the power policy to power off the GPU +- * when there are contexts active */ ++ * when there are contexts active ++ */ + KBASE_DEBUG_ASSERT(pm->active_count == 0); + + pm->backend.poweron_required = false; +@@ -127,6 +157,16 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) + return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; ++ ++#if MALI_USE_CSF ++ CSTD_UNUSED(shaders_desired); ++ /* Invoke the MCU state machine to send a request to FW for updating ++ * the mask of shader cores that can be used for allocation of ++ * endpoints requested by CSGs. ++ */ ++ if (kbase_pm_is_mcu_desired(kbdev)) ++ kbase_pm_update_state(kbdev); ++#else + /* In protected transition, don't allow outside shader core request + * affect transition, return directly + */ +@@ -138,6 +178,7 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) + if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) { + kbase_pm_update_state(kbdev); + } ++#endif + } + + void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) +@@ -153,11 +194,20 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) + + if (kbdev->pm.backend.protected_transition_override) + /* We are trying to change in/out of protected mode - force all +- * cores off so that the L2 powers down */ ++ * cores off so that the L2 powers down ++ */ + shaders_desired = false; + else + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); + ++#if MALI_USE_CSF ++ /* On CSF GPUs, Host driver isn't supposed to do the power management ++ * for shader cores. CSF firmware will power up the cores appropriately ++ * and so from Driver's standpoint 'shaders_desired' flag shall always ++ * remain 0. ++ */ ++ shaders_desired = false; ++#endif + if (kbdev->pm.backend.shaders_desired != shaders_desired) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); + +@@ -197,20 +247,106 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) + + KBASE_EXPORT_TEST_API(kbase_pm_get_policy); + ++#if MALI_USE_CSF ++static int policy_change_wait_for_L2_off(struct kbase_device *kbdev) ++{ ++#define WAIT_DURATION_MS (3000) ++ long remaining; ++ long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS); ++ int err = 0; ++ ++ /* Wait for L2 becoming off, by which the MCU is also implicitly off ++ * since the L2 state machine would only start its power-down ++ * sequence when the MCU is in off state. The L2 off is required ++ * as the tiler may need to be power cycled for MCU reconfiguration ++ * for host control of shader cores. ++ */ ++#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE ++ remaining = wait_event_killable_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); ++#else ++ remaining = wait_event_timeout( ++ kbdev->pm.backend.gpu_in_desired_state_wait, ++ kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); ++#endif ++ ++ if (!remaining) { ++ err = -ETIMEDOUT; ++ } else if (remaining < 0) { ++ dev_info(kbdev->dev, ++ "Wait for L2_off got interrupted"); ++ err = (int)remaining; ++ } ++ ++ dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, ++ err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); ++ ++ return err; ++} ++#endif ++ + void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *new_policy) + { + const struct kbase_pm_policy *old_policy; + unsigned long flags; ++#if MALI_USE_CSF ++ unsigned int new_policy_csf_pm_sched_flags; ++ bool sched_suspend; ++ bool reset_gpu = false; ++#endif + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); + ++#if MALI_USE_CSF ++ /* Serialize calls on kbase_pm_set_policy() */ ++ mutex_lock(&kbdev->pm.backend.policy_change_lock); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* policy_change_clamp_state_to_off, when needed, is set/cleared in ++ * this function, a very limited temporal scope for covering the ++ * change transition. ++ */ ++ WARN_ON(kbdev->pm.backend.policy_change_clamp_state_to_off); ++ new_policy_csf_pm_sched_flags = new_policy->pm_sched_flags; ++ ++ /* Requiring the scheduler PM suspend operation when changes involving ++ * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON ++ * flag bit. ++ */ ++ sched_suspend = kbdev->csf.firmware_inited && ++ (CSF_DYNAMIC_PM_CORE_KEEP_ON & ++ (new_policy_csf_pm_sched_flags | ++ kbdev->pm.backend.csf_pm_sched_flags)); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (sched_suspend) ++ kbase_csf_scheduler_pm_suspend(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ /* If the current active policy is always_on, one needs to clamp the ++ * MCU/L2 for reaching off-state ++ */ ++ if (sched_suspend) ++ kbdev->pm.backend.policy_change_clamp_state_to_off = ++ CSF_DYNAMIC_PM_CORE_KEEP_ON & kbdev->pm.backend.csf_pm_sched_flags; ++ ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (sched_suspend) ++ reset_gpu = policy_change_wait_for_L2_off(kbdev); ++#endif ++ + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a +- * userspace thread */ ++ * userspace thread ++ */ + kbase_pm_context_active(kbdev); + + kbase_pm_lock(kbdev); +@@ -225,25 +361,49 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, + if (old_policy->term) + old_policy->term(kbdev); + ++ memset(&kbdev->pm.backend.pm_policy_data, 0, ++ sizeof(union kbase_pm_policy_data)); ++ + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.pm_current_policy = new_policy; ++#if MALI_USE_CSF ++ kbdev->pm.backend.csf_pm_sched_flags = new_policy_csf_pm_sched_flags; ++ /* New policy in place, release the clamping on mcu/L2 off state */ ++ kbdev->pm.backend.policy_change_clamp_state_to_off = false; ++ kbase_pm_update_state(kbdev); ++#endif + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was +- * NULL), then re-try them here. */ ++ * NULL), then re-try them here. ++ */ + kbase_pm_update_active(kbdev); + kbase_pm_update_cores_state(kbdev); + + kbase_pm_unlock(kbdev); + + /* Now the policy change is finished, we release our fake context active +- * reference */ ++ * reference ++ */ + kbase_pm_context_idle(kbdev); ++ ++#if MALI_USE_CSF ++ /* Reverse the suspension done */ ++ if (reset_gpu) { ++ dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n"); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ kbase_reset_gpu_wait(kbdev); ++ } else if (sched_suspend) ++ kbase_csf_scheduler_pm_resume(kbdev); ++ ++ mutex_unlock(&kbdev->pm.backend.policy_change_lock); ++#endif + } + + KBASE_EXPORT_TEST_API(kbase_pm_set_policy); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h +index f103ef0..e811365 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h +index 2bd9e47..8622ef7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -26,6 +25,41 @@ + * including this header file. This header file can be included multiple + * times in the same compilation unit with different definitions of + * KBASEP_SHADER_STATE(). ++ * ++ * @OFF_CORESTACK_OFF: The shaders and core stacks are off ++ * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been ++ * requested to power on and hwcnt is being ++ * disabled ++ * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been ++ * requested to power on. Or after doing ++ * partial shader on/off, checking whether ++ * it's the desired state. ++ * @ON_CORESTACK_ON: The shaders and core stacks are on, and ++ * hwcnt already enabled. ++ * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt ++ * disabled, and checks to powering down or ++ * re-enabling hwcnt. ++ * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power ++ * off, but they remain on for the duration ++ * of the hysteresis timer ++ * @WAIT_GPU_IDLE: The shaders partial poweroff needs to ++ * reach a state where jobs on the GPU are ++ * finished including jobs currently running ++ * and in the GPU queue because of ++ * GPU2017-861 ++ * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired ++ * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2 ++ * cache is being flushed. ++ * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are ++ * ready to be powered off. ++ * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders ++ * have been requested to power off ++ * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks ++ * have been requested to power off ++ * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the ++ * tick timer cancellation is still pending. ++ * @RESET_WAIT: The GPU is resetting, shader and core ++ * stack power states are unknown + */ + KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) + KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) +@@ -33,7 +67,9 @@ KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON) + KBASEP_SHADER_STATE(ON_CORESTACK_ON) + KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK) + KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON) ++#if !MALI_USE_CSF + KBASEP_SHADER_STATE(WAIT_GPU_IDLE) ++#endif /* !MALI_USE_CSF */ + KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON) + KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON) + KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +index cb10518..d10e404 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,22 +17,20 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include +-#include ++#include + #include + +-void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, +- u64 *system_time, struct timespec64 *ts) ++void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, ++ u64 *cycle_counter, ++ u64 *system_time, ++ struct timespec64 *ts) + { + u32 hi1, hi2; + +- kbase_pm_request_gpu_cycle_counter(kbdev); +- + if (cycle_counter) { + /* Read hi, lo, hi to ensure a coherent u64 */ + do { +@@ -65,6 +64,46 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + #else + ktime_get_raw_ts64(ts); + #endif ++} ++ ++#if !MALI_USE_CSF ++/** ++ * timedwait_cycle_count_active() - Timed wait till CYCLE_COUNT_ACTIVE is active ++ * ++ * @kbdev: Kbase device ++ * ++ * Return: true if CYCLE_COUNT_ACTIVE is active within the timeout. ++ */ ++static bool timedwait_cycle_count_active(struct kbase_device *kbdev) ++{ ++ bool success = false; ++ const unsigned int timeout = 100; ++ const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); + ++ while (time_is_after_jiffies(remaining)) { ++ if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & ++ GPU_STATUS_CYCLE_COUNT_ACTIVE)) { ++ success = true; ++ break; ++ } ++ } ++ return success; ++} ++#endif ++ ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts) ++{ ++#if !MALI_USE_CSF ++ kbase_pm_request_gpu_cycle_counter(kbdev); ++ WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, ++ "L2 not powered up"); ++ WARN_ONCE((!timedwait_cycle_count_active(kbdev)), ++ "Timed out on CYCLE_COUNT_ACTIVE"); ++#endif ++ kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, ++ ts); ++#if !MALI_USE_CSF + kbase_pm_release_gpu_cycle_counter(kbdev); ++#endif + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/build.bp +index 51aeecd..979e06f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/build.bp ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/build.bp +@@ -1,15 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +@@ -19,11 +25,17 @@ + * both mali_kbase and the test modules. */ + bob_defaults { + name: "mali_kbase_shared_config_defaults", ++ defaults: [ ++ "kernel_defaults", ++ ], + no_mali: { +- kbuild_options: ["CONFIG_MALI_NO_MALI=y"], ++ kbuild_options: [ ++ "CONFIG_MALI_NO_MALI=y", ++ "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", ++ ], + }, +- mali_real_hw: { +- kbuild_options: ["CONFIG_MALI_REAL_HW=y"], ++ gpu_has_csf: { ++ kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"], + }, + mali_devfreq: { + kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], +@@ -31,8 +43,62 @@ bob_defaults { + mali_midgard_dvfs: { + kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"], + }, ++ mali_gator_support: { ++ kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], ++ }, ++ mali_midgard_enable_trace: { ++ kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"], ++ }, ++ mali_dma_fence: { ++ kbuild_options: ["CONFIG_MALI_DMA_FENCE=y"], ++ }, ++ mali_arbiter_support: { ++ kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], ++ }, ++ mali_dma_buf_map_on_demand: { ++ kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], ++ }, ++ mali_dma_buf_legacy_compat: { ++ kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], ++ }, ++ mali_2mb_alloc: { ++ kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], ++ }, ++ mali_memory_fully_backed: { ++ kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], ++ }, ++ mali_corestack: { ++ kbuild_options: ["CONFIG_MALI_CORESTACK=y"], ++ }, ++ mali_real_hw: { ++ kbuild_options: ["CONFIG_MALI_REAL_HW=y"], ++ }, ++ mali_error_inject_none: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], ++ }, ++ mali_error_inject_track_list: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], ++ }, ++ mali_error_inject_random: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], ++ }, ++ mali_error_inject: { ++ kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], ++ }, ++ mali_gem5_build: { ++ kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], ++ }, + mali_debug: { +- kbuild_options: ["CONFIG_MALI_DEBUG=y"], ++ kbuild_options: [ ++ "CONFIG_MALI_DEBUG=y", ++ "MALI_KERNEL_TEST_API={{.debug}}", ++ ], ++ }, ++ mali_fence_debug: { ++ kbuild_options: ["CONFIG_MALI_FENCE_DEBUG=y"], ++ }, ++ mali_system_trace: { ++ kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], + }, + buslog: { + kbuild_options: ["CONFIG_MALI_BUSLOG=y"], +@@ -43,44 +109,60 @@ bob_defaults { + cinstr_gwt: { + kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], + }, +- mali_gator_support: { +- kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], ++ cinstr_primary_hwc: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_PRIMARY=y"], + }, +- mali_midgard_enable_trace: { +- kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"], ++ cinstr_secondary_hwc: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], + }, +- mali_system_trace: { +- kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], ++ cinstr_tertiary_hwc: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_TERTIARY=y"], + }, +- mali_pwrsoft_765: { +- kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], ++ cinstr_hwc_set_select_via_debug_fs: { ++ kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS=y"], + }, +- mali_memory_fully_backed: { +- kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], ++ mali_job_dump: { ++ kbuild_options: ["CONFIG_MALI_JOB_DUMP"], + }, +- mali_dma_buf_map_on_demand: { +- kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], +- }, +- mali_dma_buf_legacy_compat: { +- kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], ++ mali_pwrsoft_765: { ++ kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], + }, +- mali_arbiter_support: { +- kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], ++ mali_hw_errata_1485982_not_affected: { ++ kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], + }, +- mali_gem5_build: { +- kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], ++ mali_hw_errata_1485982_use_clock_alternative: { ++ kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], + }, + kbuild_options: [ +- "MALI_UNIT_TEST={{.unit_test_code}}", ++ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", + "MALI_CUSTOMER_RELEASE={{.release}}", ++ "MALI_UNIT_TEST={{.unit_test_code}}", + "MALI_USE_CSF={{.gpu_has_csf}}", +- "MALI_KERNEL_TEST_API={{.debug}}", ++ "MALI_JIT_PRESSURE_LIMIT_BASE={{.jit_pressure_limit_base}}", ++ ++ // Start of CS experimental features definitions. ++ // If there is nothing below, definition should be added as follows: ++ // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" ++ // experimental_feature above comes from Mconfig in ++ // /product/base/ ++ // However, in Mconfig, experimental_feature should be looked up (for ++ // similar explanation to this one) as ALLCAPS, i.e. ++ // EXPERIMENTAL_FEATURE. ++ // ++ // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it ++ // is an umbrella feature that would be open for inappropriate use ++ // (catch-all for experimental CS code without separating it into ++ // different features). ++ "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}", ++ "GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}", + ], +- defaults: ["kernel_defaults"], + } + + bob_kernel_module { + name: "mali_kbase", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ ], + srcs: [ + "*.c", + "*.h", +@@ -90,6 +172,7 @@ bob_kernel_module { + "backend/gpu/Kbuild", + "context/*.c", + "context/*.h", ++ "context/Kbuild", + "ipa/*.c", + "ipa/*.h", + "ipa/Kbuild", +@@ -98,52 +181,23 @@ bob_kernel_module { + "platform/*/*.h", + "platform/*/Kbuild", + "thirdparty/*.c", ++ "thirdparty/Kbuild", + "debug/*.c", + "debug/*.h", ++ "debug/Kbuild", + "device/*.c", + "device/*.h", ++ "device/Kbuild", + "gpu/*.c", + "gpu/*.h", ++ "gpu/Kbuild", + "tl/*.c", + "tl/*.h", ++ "tl/Kbuild", + "mmu/*.c", + "mmu/*.h", ++ "mmu/Kbuild", + ], +- kbuild_options: [ +- "CONFIG_MALI_KUTF=n", +- "CONFIG_MALI_MIDGARD=m", +- "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", +- "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", +- ], +- buslog: { +- extra_symbols: [ +- "bus_logger", +- ], +- }, +- mali_corestack: { +- kbuild_options: ["CONFIG_MALI_CORESTACK=y"], +- }, +- mali_error_inject: { +- kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], +- }, +- mali_error_inject_random: { +- kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], +- }, +- cinstr_secondary_hwc: { +- kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], +- }, +- cinstr_secondary_hwc_via_debug_fs: { +- kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"], +- }, +- mali_2mb_alloc: { +- kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], +- }, +- mali_hw_errata_1485982_not_affected: { +- kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], +- }, +- mali_hw_errata_1485982_use_clock_alternative: { +- kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], +- }, + gpu_has_job_manager: { + srcs: [ + "context/backend/*_jm.c", +@@ -155,6 +209,8 @@ bob_kernel_module { + "jm/*.h", + "tl/backend/*_jm.c", + "mmu/backend/*_jm.c", ++ "ipa/backend/*_jm.c", ++ "ipa/backend/*_jm.h", + ], + }, + gpu_has_csf: { +@@ -163,6 +219,9 @@ bob_kernel_module { + "csf/*.c", + "csf/*.h", + "csf/Kbuild", ++ "csf/ipa_control/*.c", ++ "csf/ipa_control/*.h", ++ "csf/ipa_control/Kbuild", + "debug/backend/*_csf.c", + "debug/backend/*_csf.h", + "device/backend/*_csf.c", +@@ -170,6 +229,8 @@ bob_kernel_module { + "gpu/backend/*_csf.h", + "tl/backend/*_csf.c", + "mmu/backend/*_csf.c", ++ "ipa/backend/*_csf.c", ++ "ipa/backend/*_csf.h", + ], + }, + mali_arbiter_support: { +@@ -179,5 +240,13 @@ bob_kernel_module { + "arbiter/Kbuild", + ], + }, +- defaults: ["mali_kbase_shared_config_defaults"], ++ kbuild_options: [ ++ "CONFIG_MALI_MIDGARD=m", ++ "CONFIG_MALI_KUTF=n", ++ ], ++ buslog: { ++ extra_symbols: [ ++ "bus_logger", ++ ], ++ }, + } +diff --git a/dvalin/kernel/drivers/base/memory_group_manager/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild +similarity index 63% +rename from dvalin/kernel/drivers/base/memory_group_manager/Kconfig +rename to dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild +index da464ec..1d9c00a 100644 +--- a/dvalin/kernel/drivers/base/memory_group_manager/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,14 +16,12 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + ++mali_kbase-y += context/mali_kbase_context.o + +-config MALI_MEMORY_GROUP_MANAGER +- tristate "MALI_MEMORY_GROUP_MANAGER" +- help +- This option enables an example implementation of a memory group manager +- for allocation and release of pages for memory pools managed by Mali GPU +- device drivers. ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += context/backend/mali_kbase_context_csf.o ++else ++ mali_kbase-y += context/backend/mali_kbase_context_jm.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c +new file mode 100644 +index 0000000..1ce806f +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c +@@ -0,0 +1,201 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * Base kernel context APIs for CSF GPUs ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include ++#include ++#include ++#include ++#include ++#include ++ ++void kbase_context_debugfs_init(struct kbase_context *const kctx) ++{ ++ kbase_debug_mem_view_init(kctx); ++ kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); ++ kbase_jit_debugfs_init(kctx); ++ kbase_csf_queue_group_debugfs_init(kctx); ++ kbase_csf_kcpu_debugfs_init(kctx); ++ kbase_csf_tiler_heap_debugfs_init(kctx); ++ kbase_csf_cpu_queue_debugfs_init(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); ++ ++void kbase_context_debugfs_term(struct kbase_context *const kctx) ++{ ++ debugfs_remove_recursive(kctx->kctx_dentry); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); ++#else ++void kbase_context_debugfs_init(struct kbase_context *const kctx) ++{ ++ CSTD_UNUSED(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); ++ ++void kbase_context_debugfs_term(struct kbase_context *const kctx) ++{ ++ CSTD_UNUSED(kctx); ++} ++KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); ++#endif /* CONFIG_DEBUG_FS */ ++ ++static void kbase_context_free(struct kbase_context *kctx) ++{ ++ kbase_timeline_post_kbase_context_destroy(kctx); ++ ++ vfree(kctx); ++} ++ ++static const struct kbase_context_init context_init[] = { ++ { NULL, kbase_context_free, NULL }, ++ { kbase_context_common_init, kbase_context_common_term, ++ "Common context initialization failed" }, ++ { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, ++ "Memory pool group initialization failed" }, ++ { kbase_mem_evictable_init, kbase_mem_evictable_deinit, ++ "Memory evictable initialization failed" }, ++ { kbase_context_mmu_init, kbase_context_mmu_term, ++ "MMU initialization failed" }, ++ { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, ++ "Memory alloc page failed" }, ++ { kbase_region_tracker_init, kbase_region_tracker_term, ++ "Region tracker initialization failed" }, ++ { kbase_sticky_resource_init, kbase_context_sticky_resource_term, ++ "Sticky resource initialization failed" }, ++ { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, ++ { kbase_csf_ctx_init, kbase_csf_ctx_term, ++ "CSF context initialization failed" }, ++ { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, ++ "Adding kctx to device failed" }, ++}; ++ ++static void kbase_context_term_partial( ++ struct kbase_context *kctx, ++ unsigned int i) ++{ ++ while (i-- > 0) { ++ if (context_init[i].term) ++ context_init[i].term(kctx); ++ } ++} ++ ++struct kbase_context *kbase_create_context(struct kbase_device *kbdev, ++ bool is_compat, ++ base_context_create_flags const flags, ++ unsigned long const api_version, ++ struct file *const filp) ++{ ++ struct kbase_context *kctx; ++ unsigned int i = 0; ++ ++ if (WARN_ON(!kbdev)) ++ return NULL; ++ ++ /* Validate flags */ ++ if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) ++ return NULL; ++ ++ /* zero-inited as lot of code assume it's zero'ed out on create */ ++ kctx = vzalloc(sizeof(*kctx)); ++ if (WARN_ON(!kctx)) ++ return NULL; ++ ++ kctx->kbdev = kbdev; ++ kctx->api_version = api_version; ++ kctx->filp = filp; ++ kctx->create_flags = flags; ++ ++ if (is_compat) ++ kbase_ctx_flag_set(kctx, KCTX_COMPAT); ++#if defined(CONFIG_64BIT) ++ else ++ kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); ++#endif /* defined(CONFIG_64BIT) */ ++ ++ for (i = 0; i < ARRAY_SIZE(context_init); i++) { ++ int err = 0; ++ ++ if (context_init[i].init) ++ err = context_init[i].init(kctx); ++ ++ if (err) { ++ dev_err(kbdev->dev, "%s error = %d\n", ++ context_init[i].err_mes, err); ++ ++ /* kctx should be freed by kbase_context_free(). ++ * Otherwise it will result in memory leak. ++ */ ++ WARN_ON(i == 0); ++ ++ kbase_context_term_partial(kctx, i); ++ return NULL; ++ } ++ } ++ ++ return kctx; ++} ++KBASE_EXPORT_SYMBOL(kbase_create_context); ++ ++void kbase_destroy_context(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev; ++ ++ if (WARN_ON(!kctx)) ++ return; ++ ++ kbdev = kctx->kbdev; ++ if (WARN_ON(!kbdev)) ++ return; ++ ++ /* Context termination could happen whilst the system suspend of ++ * the GPU device is ongoing or has completed. It has been seen on ++ * Customer side that a hang could occur if context termination is ++ * not blocked until the resume of GPU device. ++ */ ++ while (kbase_pm_context_active_handle_suspend( ++ kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ dev_info(kbdev->dev, ++ "Suspend in progress when destroying context"); ++ wait_event(kbdev->pm.resume_wait, ++ !kbase_pm_is_suspending(kbdev)); ++ } ++ ++ kbase_mem_pool_group_mark_dying(&kctx->mem_pools); ++ ++ kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); ++ ++ kbase_pm_context_idle(kbdev); ++} ++KBASE_EXPORT_SYMBOL(kbase_destroy_context); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c +index 2cd2551..8ce81e7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -30,13 +28,13 @@ + #include + #include + #include ++#include + #include + #include + #include + #include +-#include + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + #include + #include + +@@ -46,14 +44,12 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx) + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); + kbase_jit_debugfs_init(kctx); + kbasep_jd_debugfs_ctx_init(kctx); +- kbase_debug_job_fault_context_init(kctx); + } + KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + + void kbase_context_debugfs_term(struct kbase_context *const kctx) + { + debugfs_remove_recursive(kctx->kctx_dentry); +- kbase_debug_job_fault_context_term(kctx); + } + KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); + #else +@@ -70,6 +66,16 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx) + KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); + #endif /* CONFIG_DEBUG_FS */ + ++static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx) ++{ ++ return kbase_kinstr_jm_init(&kctx->kinstr_jm); ++} ++ ++static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) ++{ ++ kbase_kinstr_jm_term(kctx->kinstr_jm); ++} ++ + static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) + { + kbase_timer_setup(&kctx->soft_job_timeout, +@@ -98,32 +104,59 @@ static int kbase_context_submit_check(struct kbase_context *kctx) + return 0; + } + ++static void kbase_context_flush_jobs(struct kbase_context *kctx) ++{ ++ kbase_jd_zap_context(kctx); ++ flush_workqueue(kctx->jctx.job_done_wq); ++} ++ ++static void kbase_context_free(struct kbase_context *kctx) ++{ ++ kbase_timeline_post_kbase_context_destroy(kctx); ++ ++ vfree(kctx); ++} ++ + static const struct kbase_context_init context_init[] = { +- {kbase_context_common_init, kbase_context_common_term, NULL}, +- {kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, +- "Memory pool goup initialization failed"}, +- {kbase_mem_evictable_init, kbase_mem_evictable_deinit, +- "Memory evictable initialization failed"}, +- {kbasep_js_kctx_init, kbasep_js_kctx_term, +- "JS kctx initialization failed"}, +- {kbase_jd_init, kbase_jd_exit, +- "JD initialization failed"}, +- {kbase_event_init, kbase_event_cleanup, +- "Event initialization failed"}, +- {kbase_dma_fence_init, kbase_dma_fence_term, +- "DMA fence initialization failed"}, +- {kbase_context_mmu_init, kbase_context_mmu_term, +- "MMU initialization failed"}, +- {kbase_context_mem_alloc_page, kbase_context_mem_pool_free, +- "Memory alloc page failed"}, +- {kbase_region_tracker_init, kbase_region_tracker_term, +- "Region tracker initialization failed"}, +- {kbase_sticky_resource_init, kbase_context_sticky_resource_term, +- "Sticky resource initialization failed"}, +- {kbase_jit_init, kbase_jit_term, +- "JIT initialization failed"}, +- {kbase_context_kbase_timer_setup, NULL, NULL}, +- {kbase_context_submit_check, NULL, NULL}, ++ { NULL, kbase_context_free, NULL }, ++ { kbase_context_common_init, kbase_context_common_term, ++ "Common context initialization failed" }, ++ { kbase_dma_fence_init, kbase_dma_fence_term, ++ "DMA fence initialization failed" }, ++ { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, ++ "Memory pool group initialization failed" }, ++ { kbase_mem_evictable_init, kbase_mem_evictable_deinit, ++ "Memory evictable initialization failed" }, ++ { kbase_context_mmu_init, kbase_context_mmu_term, ++ "MMU initialization failed" }, ++ { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, ++ "Memory alloc page failed" }, ++ { kbase_region_tracker_init, kbase_region_tracker_term, ++ "Region tracker initialization failed" }, ++ { kbase_sticky_resource_init, kbase_context_sticky_resource_term, ++ "Sticky resource initialization failed" }, ++ { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, ++ { kbase_context_kbase_kinstr_jm_init, ++ kbase_context_kbase_kinstr_jm_term, ++ "JM instrumentation initialization failed" }, ++ { kbase_context_kbase_timer_setup, NULL, ++ "Timers initialization failed" }, ++ { kbase_event_init, kbase_event_cleanup, ++ "Event initialization failed" }, ++ { kbasep_js_kctx_init, kbasep_js_kctx_term, ++ "JS kctx initialization failed" }, ++ { kbase_jd_init, kbase_jd_exit, "JD initialization failed" }, ++ { kbase_context_submit_check, NULL, "Enabling job submission failed" }, ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ { kbase_debug_job_fault_context_init, ++ kbase_debug_job_fault_context_term, ++ "Job fault context initialization failed" }, ++#endif ++ { NULL, kbase_context_flush_jobs, NULL }, ++ { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, ++ "Adding kctx to device failed" }, ++ { kbasep_platform_context_init, kbasep_platform_context_term, ++ "Platform callback for kctx initialization failed" }, + }; + + static void kbase_context_term_partial( +@@ -167,14 +200,23 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, + #if defined(CONFIG_64BIT) + else + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +-#endif /* !defined(CONFIG_64BIT) */ ++#endif /* defined(CONFIG_64BIT) */ + + for (i = 0; i < ARRAY_SIZE(context_init); i++) { +- int err = context_init[i].init(kctx); ++ int err = 0; ++ ++ if (context_init[i].init) ++ err = context_init[i].init(kctx); + + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + context_init[i].err_mes, err); ++ ++ /* kctx should be freed by kbase_context_free(). ++ * Otherwise it will result in memory leak. ++ */ ++ WARN_ON(i == 0); ++ + kbase_context_term_partial(kctx, i); + return NULL; + } +@@ -195,17 +237,27 @@ void kbase_destroy_context(struct kbase_context *kctx) + if (WARN_ON(!kbdev)) + return; + +- /* Ensure the core is powered up for the destroy process +- * A suspend won't happen here, because we're in a syscall +- * from a userspace thread. ++ /* Context termination could happen whilst the system suspend of ++ * the GPU device is ongoing or has completed. It has been seen on ++ * Customer side that a hang could occur if context termination is ++ * not blocked until the resume of GPU device. + */ +- kbase_pm_context_active(kbdev); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_inc(&kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ while (kbase_pm_context_active_handle_suspend( ++ kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ dev_dbg(kbdev->dev, ++ "Suspend in progress when destroying context"); ++ wait_event(kbdev->pm.resume_wait, ++ !kbase_pm_is_suspending(kbdev)); ++ } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_dec(&kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); + +- kbase_jd_zap_context(kctx); +- flush_workqueue(kctx->jctx.job_done_wq); +- + kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); + + kbase_pm_context_idle(kbdev); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c +index 93fe431..b2e7025 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -28,17 +26,105 @@ + #include + #include + #include +-#include + #include + #include +-#include + #include + #include + #include + ++/** ++ * find_process_node - Used to traverse the process rb_tree to find if ++ * process exists already in process rb_tree. ++ * ++ * @node: Pointer to root node to start search. ++ * @tgid: Thread group PID to search for. ++ * ++ * Return: Pointer to kbase_process if exists otherwise NULL. ++ */ ++static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) ++{ ++ struct kbase_process *kprcs = NULL; ++ ++ /* Check if the kctx creation request is from a existing process.*/ ++ while (node) { ++ struct kbase_process *prcs_node = ++ rb_entry(node, struct kbase_process, kprcs_node); ++ if (prcs_node->tgid == tgid) { ++ kprcs = prcs_node; ++ break; ++ } ++ ++ if (tgid < prcs_node->tgid) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } ++ ++ return kprcs; ++} ++ ++/** ++ * kbase_insert_kctx_to_process - Initialise kbase process context. ++ * ++ * @kctx: Pointer to kbase context. ++ * ++ * Here we initialise per process rb_tree managed by kbase_device. ++ * We maintain a rb_tree of each unique process that gets created. ++ * and Each process maintains a list of kbase context. ++ * This setup is currently used by kernel trace functionality ++ * to trace and visualise gpu memory consumption. ++ * ++ * Return: 0 on success and error number on failure. ++ */ ++static int kbase_insert_kctx_to_process(struct kbase_context *kctx) ++{ ++ struct rb_root *const prcs_root = &kctx->kbdev->process_root; ++ const pid_t tgid = kctx->tgid; ++ struct kbase_process *kprcs = NULL; ++ ++ lockdep_assert_held(&kctx->kbdev->kctx_list_lock); ++ ++ kprcs = find_process_node(prcs_root->rb_node, tgid); ++ ++ /* if the kctx is from new process then create a new kbase_process ++ * and add it to the &kbase_device->rb_tree ++ */ ++ if (!kprcs) { ++ struct rb_node **new = &prcs_root->rb_node, *parent = NULL; ++ ++ kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL); ++ if (kprcs == NULL) ++ return -ENOMEM; ++ kprcs->tgid = tgid; ++ INIT_LIST_HEAD(&kprcs->kctx_list); ++ kprcs->dma_buf_root = RB_ROOT; ++ kprcs->total_gpu_pages = 0; ++ ++ while (*new) { ++ struct kbase_process *prcs_node; ++ ++ parent = *new; ++ prcs_node = rb_entry(parent, struct kbase_process, ++ kprcs_node); ++ if (tgid < prcs_node->tgid) ++ new = &(*new)->rb_left; ++ else ++ new = &(*new)->rb_right; ++ } ++ rb_link_node(&kprcs->kprcs_node, parent, new); ++ rb_insert_color(&kprcs->kprcs_node, prcs_root); ++ } ++ ++ kctx->kprcs = kprcs; ++ list_add(&kctx->kprcs_link, &kprcs->kctx_list); ++ ++ return 0; ++} ++ + int kbase_context_common_init(struct kbase_context *kctx) + { + const unsigned long cookies_mask = KBASE_COOKIE_MASK; ++ int err = 0; + + /* creating a context is considered a disjoint event */ + kbase_disjoint_event(kctx->kbdev); +@@ -66,30 +152,95 @@ int kbase_context_common_init(struct kbase_context *kctx) + + init_waitqueue_head(&kctx->event_queue); + atomic_set(&kctx->event_count, 0); ++#if !MALI_USE_CSF + atomic_set(&kctx->event_closed, false); +- +- bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); +- +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + atomic_set(&kctx->jctx.work_id, 0); ++#endif + #endif + ++ bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); ++ + kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; + + mutex_init(&kctx->legacy_hwcnt_lock); + + mutex_lock(&kctx->kbdev->kctx_list_lock); +- list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); + +- KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id, +- kctx->kbdev->gpu_props.props.raw_props.gpu_id); +- KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id, +- (u32)(kctx->tgid)); ++ err = kbase_insert_kctx_to_process(kctx); ++ if (err) ++ dev_err(kctx->kbdev->dev, ++ "(err:%d) failed to insert kctx to kbase_process\n", err); ++ + mutex_unlock(&kctx->kbdev->kctx_list_lock); + ++ return err; ++} ++ ++int kbase_context_add_to_dev_list(struct kbase_context *kctx) ++{ ++ if (WARN_ON(!kctx)) ++ return -EINVAL; ++ ++ if (WARN_ON(!kctx->kbdev)) ++ return -EINVAL; ++ ++ mutex_lock(&kctx->kbdev->kctx_list_lock); ++ list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); ++ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++ ++ kbase_timeline_post_kbase_context_create(kctx); ++ + return 0; + } + ++void kbase_context_remove_from_dev_list(struct kbase_context *kctx) ++{ ++ if (WARN_ON(!kctx)) ++ return; ++ ++ if (WARN_ON(!kctx->kbdev)) ++ return; ++ ++ kbase_timeline_pre_kbase_context_destroy(kctx); ++ ++ mutex_lock(&kctx->kbdev->kctx_list_lock); ++ list_del_init(&kctx->kctx_list_link); ++ mutex_unlock(&kctx->kbdev->kctx_list_lock); ++} ++ ++/** ++ * kbase_remove_kctx_from_process - remove a terminating context from ++ * the process list. ++ * ++ * @kctx: Pointer to kbase context. ++ * ++ * Remove the tracking of context from the list of contexts maintained under ++ * kbase process and if the list if empty then there no outstanding contexts ++ * we can remove the process node as well. ++ */ ++ ++static void kbase_remove_kctx_from_process(struct kbase_context *kctx) ++{ ++ struct kbase_process *kprcs = kctx->kprcs; ++ ++ lockdep_assert_held(&kctx->kbdev->kctx_list_lock); ++ list_del(&kctx->kprcs_link); ++ ++ /* if there are no outstanding contexts in current process node, ++ * we can remove it from the process rb_tree. ++ */ ++ if (list_empty(&kprcs->kctx_list)) { ++ rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root); ++ /* Add checks, so that the terminating process Should not ++ * hold any gpu_memory. ++ */ ++ WARN_ON(kprcs->total_gpu_pages); ++ WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); ++ kfree(kprcs); ++ } ++} ++ + void kbase_context_common_term(struct kbase_context *kctx) + { + unsigned long flags; +@@ -109,24 +260,10 @@ void kbase_context_common_term(struct kbase_context *kctx) + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + + mutex_lock(&kctx->kbdev->kctx_list_lock); +- +- KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id); +- +- KBASE_TLSTREAM_TL_DEL_CTX(kctx->kbdev, kctx); +- list_del(&kctx->kctx_list_link); ++ kbase_remove_kctx_from_process(kctx); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); +- +- /* Flush the timeline stream, so the user can see the termination +- * tracepoints being fired. +- * The "if" statement below is for optimization. It is safe to call +- * kbase_timeline_streams_flush when timeline is disabled. +- */ +- if (atomic_read(&kctx->kbdev->timeline_flags) != 0) +- kbase_timeline_streams_flush(kctx->kbdev->timeline); +- +- vfree(kctx); + } + + int kbase_context_mem_pool_group_init(struct kbase_context *kctx) +@@ -144,11 +281,9 @@ void kbase_context_mem_pool_group_term(struct kbase_context *kctx) + + int kbase_context_mmu_init(struct kbase_context *kctx) + { +- kbase_mmu_init(kctx->kbdev, +- &kctx->mmu, kctx, ++ return kbase_mmu_init( ++ kctx->kbdev, &kctx->mmu, kctx, + base_context_mmu_group_id_get(kctx->create_flags)); +- +- return 0; + } + + void kbase_context_mmu_term(struct kbase_context *kctx) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h +index e4ed894..a0c51c9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,18 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * +- * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * + */ + + #ifndef _KBASE_CONTEXT_H_ +@@ -117,25 +106,7 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx, + static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, + enum kbase_context_flags flag) + { +-#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE +- /* +- * Earlier kernel versions doesn't have atomic_andnot() or +- * atomic_and(). atomic_clear_mask() was only available on some +- * architectures and removed on arm in v3.13 on arm and arm64. +- * +- * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, +- * when atomic_andnot() becomes available. +- */ +- int old, new; +- +- do { +- old = atomic_read(&kctx->flags); +- new = old & ~flag; +- +- } while (atomic_cmpxchg(&kctx->flags, old, new) != old); +-#else + atomic_andnot(flag, &kctx->flags); +-#endif + } + + /** +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h +index 818cdbe..1cde739 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. + */ + + #include +@@ -58,3 +49,6 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx); + void kbase_context_mem_pool_free(struct kbase_context *kctx); + + void kbase_context_sticky_resource_term(struct kbase_context *kctx); ++ ++int kbase_context_add_to_dev_list(struct kbase_context *kctx); ++void kbase_context_remove_from_dev_list(struct kbase_context *kctx); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild +new file mode 100644 +index 0000000..765e419 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild +@@ -0,0 +1,47 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++mali_kbase-y += \ ++ csf/mali_kbase_csf_firmware_cfg.o \ ++ csf/mali_kbase_csf_trace_buffer.o \ ++ csf/mali_kbase_csf.o \ ++ csf/mali_kbase_csf_scheduler.o \ ++ csf/mali_kbase_csf_kcpu.o \ ++ csf/mali_kbase_csf_tiler_heap.o \ ++ csf/mali_kbase_csf_timeout.o \ ++ csf/mali_kbase_csf_tl_reader.o \ ++ csf/mali_kbase_csf_heap_context_alloc.o \ ++ csf/mali_kbase_csf_reset_gpu.o \ ++ csf/mali_kbase_csf_csg_debugfs.o \ ++ csf/mali_kbase_csf_kcpu_debugfs.o \ ++ csf/mali_kbase_csf_protected_memory.o \ ++ csf/mali_kbase_csf_tiler_heap_debugfs.o \ ++ csf/mali_kbase_csf_cpu_queue_debugfs.o ++ ++mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o ++ ++ ++ifeq ($(KBUILD_EXTMOD),) ++# in-tree ++ -include $(src)/csf/ipa_control/Kbuild ++else ++# out-of-tree ++ include $(src)/csf/ipa_control/Kbuild ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild +similarity index 75% +rename from dvalin/kernel/drivers/gpu/arm/midgard/Makefile.kbase +rename to dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild +index 6b0f81e..08824b2 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile.kbase ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,9 +16,7 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- +-EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM) + ++mali_kbase-y += \ ++ csf/ipa_control/mali_kbase_csf_ipa_control.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c +new file mode 100644 +index 0000000..e23d681 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c +@@ -0,0 +1,925 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include "mali_kbase_csf_ipa_control.h" ++ ++/* ++ * Status flags from the STATUS register of the IPA Control interface. ++ */ ++#define STATUS_COMMAND_ACTIVE ((u32)1 << 0) ++#define STATUS_TIMER_ACTIVE ((u32)1 << 1) ++#define STATUS_AUTO_ACTIVE ((u32)1 << 2) ++#define STATUS_PROTECTED_MODE ((u32)1 << 8) ++#define STATUS_RESET ((u32)1 << 9) ++#define STATUS_TIMER_ENABLED ((u32)1 << 31) ++ ++/* ++ * Commands for the COMMAND register of the IPA Control interface. ++ */ ++#define COMMAND_NOP ((u32)0) ++#define COMMAND_APPLY ((u32)1) ++#define COMMAND_CLEAR ((u32)2) ++#define COMMAND_SAMPLE ((u32)3) ++#define COMMAND_PROTECTED_ACK ((u32)4) ++#define COMMAND_RESET_ACK ((u32)5) ++ ++/** ++ * Default value for the TIMER register of the IPA Control interface, ++ * expressed in milliseconds. ++ * ++ * The chosen value is a trade off between two requirements: the IPA Control ++ * interface should sample counters with a resolution in the order of ++ * milliseconds, while keeping GPU overhead as limited as possible. ++ */ ++#define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ ++ ++/** ++ * Number of timer events per second. ++ */ ++#define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS) ++ ++/** ++ * Maximum number of loops polling the GPU before we assume the GPU has hung. ++ */ ++#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000) ++ ++/** ++ * Number of bits used to configure a performance counter in SELECT registers. ++ */ ++#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8) ++ ++/** ++ * Maximum value of a performance counter. ++ */ ++#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1) ++ ++/** ++ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency ++ * listener ++ * ++ * @listener: GPU clock frequency listener. ++ * @kbdev: Pointer to kbase device. ++ */ ++struct kbase_ipa_control_listener_data { ++ struct kbase_clk_rate_listener listener; ++ struct kbase_device *kbdev; ++}; ++ ++static u32 timer_value(u32 gpu_rate) ++{ ++ return gpu_rate / TIMER_EVENTS_PER_SECOND; ++} ++ ++static int wait_status(struct kbase_device *kbdev, u32 flags) ++{ ++ unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS; ++ u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ ++ /* ++ * Wait for the STATUS register to indicate that flags have been ++ * cleared, in case a transition is pending. ++ */ ++ while (--max_loops && (status & flags)) ++ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ if (max_loops == 0) { ++ dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck"); ++ return -EBUSY; ++ } ++ ++ return 0; ++} ++ ++static int apply_select_config(struct kbase_device *kbdev, u64 *select) ++{ ++ int ret; ++ ++ u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX); ++ u32 select_cshw_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX); ++ u32 select_memsys_lo = ++ (u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX); ++ u32 select_memsys_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX); ++ u32 select_tiler_lo = ++ (u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX); ++ u32 select_tiler_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX); ++ u32 select_shader_lo = ++ (u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX); ++ u32 select_shader_hi = ++ (u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX); ++ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO), ++ select_memsys_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI), ++ select_memsys_hi); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO), ++ select_tiler_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI), ++ select_tiler_hi); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO), ++ select_shader_lo); ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI), ++ select_shader_hi); ++ ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ ++ if (!ret) ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); ++ ++ return ret; ++} ++ ++static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx) ++{ ++ u32 value_lo, value_hi; ++ ++ switch (type) { ++ case KBASE_IPA_CORE_TYPE_CSHW: ++ value_lo = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx))); ++ break; ++ case KBASE_IPA_CORE_TYPE_MEMSYS: ++ value_lo = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx))); ++ break; ++ case KBASE_IPA_CORE_TYPE_TILER: ++ value_lo = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx))); ++ break; ++ case KBASE_IPA_CORE_TYPE_SHADER: ++ value_lo = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx))); ++ value_hi = kbase_reg_read( ++ kbdev, ++ IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx))); ++ break; ++ default: ++ WARN(1, "Unknown core type: %u\n", type); ++ value_lo = value_hi = 0; ++ break; ++ } ++ ++ return (((u64)value_hi << 32) | value_lo); ++} ++ ++static void build_select_config(struct kbase_ipa_control *ipa_ctrl, ++ u64 *select_config) ++{ ++ size_t i; ++ ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { ++ size_t j; ++ ++ select_config[i] = 0ULL; ++ ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config = ++ &ipa_ctrl->blocks[i].select[j]; ++ ++ select_config[i] |= ++ ((u64)prfcnt_config->idx ++ << (IPA_CONTROL_SELECT_BITS_PER_CNT * j)); ++ } ++ } ++} ++ ++static inline void calc_prfcnt_delta(struct kbase_device *kbdev, ++ struct kbase_ipa_control_prfcnt *prfcnt, ++ bool gpu_ready) ++{ ++ u64 delta_value, raw_value; ++ ++ if (gpu_ready) ++ raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, ++ prfcnt->select_idx); ++ else ++ raw_value = prfcnt->latest_raw_value; ++ ++ if (raw_value < prfcnt->latest_raw_value) { ++ delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + ++ raw_value; ++ } else { ++ delta_value = raw_value - prfcnt->latest_raw_value; ++ } ++ ++ delta_value *= prfcnt->scaling_factor; ++ ++ if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0)) ++ if (prfcnt->gpu_norm) ++ delta_value /= kbdev->csf.ipa_control.cur_gpu_rate; ++ ++ prfcnt->latest_raw_value = raw_value; ++ ++ /* Accumulate the difference */ ++ prfcnt->accumulated_diff += delta_value; ++} ++ ++/** ++ * kbase_ipa_control_rate_change_notify - GPU frequency change callback ++ * ++ * @listener: Clock frequency change listener. ++ * @clk_index: Index of the clock for which the change has occurred. ++ * @clk_rate_hz: Clock frequency(Hz). ++ * ++ * This callback notifies kbase_ipa_control about GPU frequency changes. ++ * Only top-level clock changes are meaningful. GPU frequency updates ++ * affect all performance counters which require GPU normalization ++ * in every session. ++ */ ++static void ++kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, ++ u32 clk_index, u32 clk_rate_hz) ++{ ++ if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) { ++ size_t i; ++ unsigned long flags; ++ struct kbase_ipa_control_listener_data *listener_data = ++ container_of(listener, ++ struct kbase_ipa_control_listener_data, ++ listener); ++ struct kbase_device *kbdev = listener_data->kbdev; ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if (!kbdev->pm.backend.gpu_ready) { ++ dev_err(kbdev->dev, ++ "%s: GPU frequency cannot change while GPU is off", ++ __func__); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return; ++ } ++ ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); ++ ++ for (i = 0; i < ipa_ctrl->num_active_sessions; i++) { ++ size_t j; ++ struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; ++ ++ for (j = 0; j < session->num_prfcnts; j++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = ++ &session->prfcnts[j]; ++ ++ if (prfcnt->gpu_norm) ++ calc_prfcnt_delta(kbdev, prfcnt, true); ++ } ++ } ++ ++ ipa_ctrl->cur_gpu_rate = clk_rate_hz; ++ ++ /* Update the timer for automatic sampling if active sessions ++ * are present. Counters have already been manually sampled. ++ */ ++ if (ipa_ctrl->num_active_sessions > 0) { ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), ++ timer_value(ipa_ctrl->cur_gpu_rate)); ++ } ++ ++ spin_unlock(&ipa_ctrl->lock); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++} ++ ++void kbase_ipa_control_init(struct kbase_device *kbdev) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ struct kbase_ipa_control_listener_data *listener_data; ++ size_t i, j; ++ ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ ipa_ctrl->blocks[i].select[j].idx = 0; ++ ipa_ctrl->blocks[i].select[j].refcount = 0; ++ } ++ ipa_ctrl->blocks[i].num_available_counters = ++ KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; ++ } ++ ++ spin_lock_init(&ipa_ctrl->lock); ++ ipa_ctrl->num_active_sessions = 0; ++ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { ++ ipa_ctrl->sessions[i].active = false; ++ } ++ ++ listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), ++ GFP_KERNEL); ++ if (listener_data) { ++ listener_data->listener.notify = ++ kbase_ipa_control_rate_change_notify; ++ listener_data->kbdev = kbdev; ++ ipa_ctrl->rtm_listener_data = listener_data; ++ } ++ ++ spin_lock(&clk_rtm->lock); ++ if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]) ++ ipa_ctrl->cur_gpu_rate = ++ clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val; ++ if (listener_data) ++ kbase_clk_rate_trace_manager_subscribe_no_lock( ++ clk_rtm, &listener_data->listener); ++ spin_unlock(&clk_rtm->lock); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_init); ++ ++void kbase_ipa_control_term(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ struct kbase_ipa_control_listener_data *listener_data = ++ ipa_ctrl->rtm_listener_data; ++ ++ WARN_ON(ipa_ctrl->num_active_sessions); ++ ++ if (listener_data) ++ kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener); ++ kfree(ipa_ctrl->rtm_listener_data); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_term); ++ ++int kbase_ipa_control_register( ++ struct kbase_device *kbdev, ++ const struct kbase_ipa_control_perf_counter *perf_counters, ++ size_t num_counters, void **client) ++{ ++ int ret = 0; ++ size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM]; ++ bool already_configured[KBASE_IPA_CONTROL_MAX_COUNTERS]; ++ bool new_config = false; ++ struct kbase_ipa_control *ipa_ctrl; ++ struct kbase_ipa_control_session *session = NULL; ++ unsigned long flags; ++ ++ if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) || ++ WARN_ON(client == NULL) || ++ WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) { ++ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); ++ return -EINVAL; ++ } ++ ++ kbase_pm_context_active(kbdev); ++ ++ ipa_ctrl = &kbdev->csf.ipa_control; ++ spin_lock_irqsave(&ipa_ctrl->lock, flags); ++ ++ if (ipa_ctrl->num_active_sessions == KBASE_IPA_CONTROL_MAX_SESSIONS) { ++ dev_err(kbdev->dev, "%s: too many sessions", __func__); ++ ret = -EBUSY; ++ goto exit; ++ } ++ ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) ++ req_counters[i] = 0; ++ ++ /* ++ * Count how many counters would need to be configured in order to ++ * satisfy the request. Requested counters which happen to be already ++ * configured can be skipped. ++ */ ++ for (i = 0; i < num_counters; i++) { ++ size_t j; ++ enum kbase_ipa_core_type type = perf_counters[i].type; ++ u8 idx = perf_counters[i].idx; ++ ++ if ((type >= KBASE_IPA_CORE_TYPE_NUM) || ++ (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) { ++ dev_err(kbdev->dev, ++ "%s: invalid requested type %u and/or index %u", ++ __func__, type, idx); ++ ret = -EINVAL; ++ goto exit; ++ } ++ ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config = ++ &ipa_ctrl->blocks[type].select[j]; ++ ++ if (prfcnt_config->refcount > 0) { ++ if (prfcnt_config->idx == idx) { ++ already_configured[i] = true; ++ break; ++ } ++ } ++ } ++ ++ if (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) { ++ already_configured[i] = false; ++ req_counters[type]++; ++ new_config = true; ++ } ++ } ++ ++ for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) ++ if (req_counters[i] > ++ ipa_ctrl->blocks[i].num_available_counters) { ++ dev_err(kbdev->dev, ++ "%s: more counters (%zu) than available (%zu) have been requested for type %zu", ++ __func__, req_counters[i], ++ ipa_ctrl->blocks[i].num_available_counters, i); ++ ret = -EINVAL; ++ goto exit; ++ } ++ ++ /* ++ * The request has been validated. ++ * Firstly, find an available session and then set up the initial state ++ * of the session and update the configuration of performance counters ++ * in the internal state of kbase_ipa_control. ++ */ ++ for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; ++ session_idx++) { ++ session = &ipa_ctrl->sessions[session_idx]; ++ if (!session->active) ++ break; ++ } ++ ++ if (!session) { ++ dev_err(kbdev->dev, "%s: wrong or corrupt session state", ++ __func__); ++ ret = -EBUSY; ++ goto exit; ++ } ++ ++ for (i = 0; i < num_counters; i++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config; ++ size_t j; ++ u8 type = perf_counters[i].type; ++ u8 idx = perf_counters[i].idx; ++ ++ for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { ++ prfcnt_config = &ipa_ctrl->blocks[type].select[j]; ++ ++ if (already_configured[i]) { ++ if ((prfcnt_config->refcount > 0) && ++ (prfcnt_config->idx == idx)) { ++ break; ++ } ++ } else { ++ if (prfcnt_config->refcount == 0) ++ break; ++ } ++ } ++ ++ if (WARN_ON((prfcnt_config->refcount > 0 && ++ prfcnt_config->idx != idx) || ++ (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) { ++ dev_err(kbdev->dev, ++ "%s: invalid internal state: counter already configured or no counter available to configure", ++ __func__); ++ ret = -EBUSY; ++ goto exit; ++ } ++ ++ if (prfcnt_config->refcount == 0) { ++ prfcnt_config->idx = idx; ++ ipa_ctrl->blocks[type].num_available_counters--; ++ } ++ ++ session->prfcnts[i].accumulated_diff = 0; ++ session->prfcnts[i].type = type; ++ session->prfcnts[i].select_idx = j; ++ session->prfcnts[i].scaling_factor = ++ perf_counters[i].scaling_factor; ++ session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm; ++ ++ /* Reports to this client for GPU time spent in protected mode ++ * should begin from the point of registration. ++ */ ++ session->last_query_time = ktime_get_ns(); ++ ++ /* Initially, no time has been spent in protected mode */ ++ session->protm_time = 0; ++ ++ prfcnt_config->refcount++; ++ } ++ ++ /* ++ * Apply new configuration, if necessary. ++ * As a temporary solution, make sure that the GPU is on ++ * before applying the new configuration. ++ */ ++ if (new_config) { ++ u64 select_config[KBASE_IPA_CORE_TYPE_NUM]; ++ ++ build_select_config(ipa_ctrl, select_config); ++ ret = apply_select_config(kbdev, select_config); ++ if (ret) ++ dev_err(kbdev->dev, ++ "%s: failed to apply SELECT configuration", ++ __func__); ++ } ++ ++ if (!ret) { ++ /* Accumulator registers don't contain any sample if the timer ++ * has not been enabled first. Take a sample manually before ++ * enabling the timer. ++ */ ++ if (ipa_ctrl->num_active_sessions == 0) { ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), ++ COMMAND_SAMPLE); ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ if (!ret) { ++ kbase_reg_write( ++ kbdev, IPA_CONTROL_REG(TIMER), ++ timer_value(ipa_ctrl->cur_gpu_rate)); ++ } else { ++ dev_err(kbdev->dev, ++ "%s: failed to sample new counters", ++ __func__); ++ } ++ } ++ } ++ ++ if (!ret) { ++ session->num_prfcnts = num_counters; ++ session->active = true; ++ ipa_ctrl->num_active_sessions++; ++ *client = session; ++ ++ /* ++ * Read current raw value to initialize the session. ++ * This is necessary to put the first query in condition ++ * to generate a correct value by calculating the difference ++ * from the beginning of the session. ++ */ ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = ++ &session->prfcnts[i]; ++ u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, ++ prfcnt->select_idx); ++ prfcnt->latest_raw_value = raw_value; ++ } ++ } ++ ++exit: ++ spin_unlock_irqrestore(&ipa_ctrl->lock, flags); ++ kbase_pm_context_idle(kbdev); ++ return ret; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_register); ++ ++int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client) ++{ ++ struct kbase_ipa_control *ipa_ctrl; ++ struct kbase_ipa_control_session *session; ++ int ret = 0; ++ size_t i; ++ unsigned long flags; ++ bool new_config = false, valid_session = false; ++ ++ if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) { ++ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); ++ return -EINVAL; ++ } ++ ++ kbase_pm_context_active(kbdev); ++ ++ ipa_ctrl = &kbdev->csf.ipa_control; ++ session = (struct kbase_ipa_control_session *)client; ++ ++ spin_lock_irqsave(&ipa_ctrl->lock, flags); ++ ++ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { ++ if (session == &ipa_ctrl->sessions[i]) { ++ valid_session = true; ++ break; ++ } ++ } ++ ++ if (!valid_session) { ++ dev_err(kbdev->dev, "%s: invalid session handle", __func__); ++ ret = -EINVAL; ++ goto exit; ++ } ++ ++ if (ipa_ctrl->num_active_sessions == 0) { ++ dev_err(kbdev->dev, "%s: no active sessions found", __func__); ++ ret = -EINVAL; ++ goto exit; ++ } ++ ++ if (!session->active) { ++ dev_err(kbdev->dev, "%s: session is already inactive", ++ __func__); ++ ret = -EINVAL; ++ goto exit; ++ } ++ ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt_config *prfcnt_config; ++ u8 type = session->prfcnts[i].type; ++ u8 idx = session->prfcnts[i].select_idx; ++ ++ prfcnt_config = &ipa_ctrl->blocks[type].select[idx]; ++ ++ if (!WARN_ON(prfcnt_config->refcount == 0)) { ++ prfcnt_config->refcount--; ++ if (prfcnt_config->refcount == 0) { ++ new_config = true; ++ ipa_ctrl->blocks[type].num_available_counters++; ++ } ++ } ++ } ++ ++ if (new_config) { ++ u64 select_config[KBASE_IPA_CORE_TYPE_NUM]; ++ ++ build_select_config(ipa_ctrl, select_config); ++ ret = apply_select_config(kbdev, select_config); ++ if (ret) ++ dev_err(kbdev->dev, ++ "%s: failed to apply SELECT configuration", ++ __func__); ++ } ++ ++ session->num_prfcnts = 0; ++ session->active = false; ++ ipa_ctrl->num_active_sessions--; ++ ++exit: ++ spin_unlock_irqrestore(&ipa_ctrl->lock, flags); ++ kbase_pm_context_idle(kbdev); ++ return ret; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister); ++ ++int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, ++ u64 *values, size_t num_values, u64 *protected_time) ++{ ++ struct kbase_ipa_control *ipa_ctrl; ++ struct kbase_ipa_control_session *session; ++ size_t i; ++ unsigned long flags; ++ bool gpu_ready; ++ ++ if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) || ++ WARN_ON(values == NULL)) { ++ dev_err(kbdev->dev, "%s: wrong input arguments", __func__); ++ return -EINVAL; ++ } ++ ++ ipa_ctrl = &kbdev->csf.ipa_control; ++ session = (struct kbase_ipa_control_session *)client; ++ ++ if (WARN_ON(num_values < session->num_prfcnts)) { ++ dev_err(kbdev->dev, ++ "%s: not enough space (%zu) to return all counter values (%zu)", ++ __func__, num_values, session->num_prfcnts); ++ return -EINVAL; ++ } ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ gpu_ready = kbdev->pm.backend.gpu_ready; ++ ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; ++ ++ calc_prfcnt_delta(kbdev, prfcnt, gpu_ready); ++ /* Return all the accumulated difference */ ++ values[i] = prfcnt->accumulated_diff; ++ prfcnt->accumulated_diff = 0; ++ } ++ ++ if (protected_time) { ++ u64 time_now = ktime_get_ns(); ++ ++ /* This is the amount of protected-mode time spent prior to ++ * the current protm period. ++ */ ++ *protected_time = session->protm_time; ++ ++ if (kbdev->protected_mode) { ++ *protected_time += ++ time_now - MAX(session->last_query_time, ++ ipa_ctrl->protm_start); ++ } ++ session->last_query_time = time_now; ++ session->protm_time = 0; ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ for (i = session->num_prfcnts; i < num_values; i++) ++ values[i] = 0; ++ ++ return 0; ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_query); ++ ++void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ size_t session_idx; ++ int ret; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* GPU should still be ready for use when this function gets called */ ++ WARN_ON(!kbdev->pm.backend.gpu_ready); ++ ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); ++ ++ /* First disable the automatic sampling through TIMER */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); ++ ret = wait_status(kbdev, STATUS_TIMER_ENABLED); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for disabling of IPA control timer failed: %d", ++ ret); ++ } ++ ++ /* Now issue the manual SAMPLE command */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE); ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for the completion of manual sample failed: %d", ++ ret); ++ } ++ ++ for (session_idx = 0; session_idx < ipa_ctrl->num_active_sessions; ++ session_idx++) { ++ struct kbase_ipa_control_session *session = ++ &ipa_ctrl->sessions[session_idx]; ++ size_t i; ++ ++ for (i = 0; i < session->num_prfcnts; i++) { ++ struct kbase_ipa_control_prfcnt *prfcnt = ++ &session->prfcnts[i]; ++ ++ calc_prfcnt_delta(kbdev, prfcnt, true); ++ } ++ } ++ ++ spin_unlock(&ipa_ctrl->lock); ++} ++ ++void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ int ret; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* GPU should have become ready for use when this function gets called */ ++ WARN_ON(!kbdev->pm.backend.gpu_ready); ++ ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); ++ ++ /* Re-issue the APPLY command, this is actually needed only for CSHW */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); ++ ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for the completion of apply command failed: %d", ++ ret); ++ } ++ ++ /* Re-enable the timer for periodic sampling */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), ++ timer_value(ipa_ctrl->cur_gpu_rate)); ++ ++ spin_unlock(&ipa_ctrl->lock); ++} ++ ++void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev) ++{ ++ /* A soft reset is treated as a power down */ ++ kbase_ipa_control_handle_gpu_power_off(kbdev); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_pre); ++ ++void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ int ret; ++ u32 status; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ /* GPU should have become ready for use when this function gets called */ ++ WARN_ON(!kbdev->pm.backend.gpu_ready); ++ ++ /* Interrupts are already disabled and interrupt state is also saved */ ++ spin_lock(&ipa_ctrl->lock); ++ ++ /* Check the status reset bit is set before acknowledging it */ ++ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ if (status & STATUS_RESET) { ++ /* Acknowledge the reset command */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK); ++ ret = wait_status(kbdev, STATUS_RESET); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for the reset ack command failed: %d", ++ ret); ++ } ++ } ++ ++ spin_unlock(&ipa_ctrl->lock); ++ ++ kbase_ipa_control_handle_gpu_power_on(kbdev); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post); ++ ++#if MALI_UNIT_TEST ++void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, ++ u32 clk_index, u32 clk_rate_hz) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ struct kbase_ipa_control_listener_data *listener_data = ++ ipa_ctrl->rtm_listener_data; ++ ++ kbase_ipa_control_rate_change_notify(&listener_data->listener, ++ clk_index, clk_rate_hz); ++} ++KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test); ++#endif ++ ++void kbase_ipa_control_protm_entered(struct kbase_device *kbdev) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ipa_ctrl->protm_start = ktime_get_ns(); ++} ++ ++void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) ++{ ++ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; ++ size_t i; ++ u64 time_now = ktime_get_ns(); ++ u32 status; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ for (i = 0; i < ipa_ctrl->num_active_sessions; i++) { ++ struct kbase_ipa_control_session *session = ++ &ipa_ctrl->sessions[i]; ++ u64 protm_time = time_now - MAX(session->last_query_time, ++ ipa_ctrl->protm_start); ++ ++ session->protm_time += protm_time; ++ } ++ ++ /* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS ++ * register ++ */ ++ status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); ++ if (status & STATUS_PROTECTED_MODE) { ++ int ret; ++ ++ /* Acknowledge the protm command */ ++ kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), ++ COMMAND_PROTECTED_ACK); ++ ret = wait_status(kbdev, STATUS_PROTECTED_MODE); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Wait for the protm ack command failed: %d", ++ ret); ++ } ++ } ++} ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h +new file mode 100644 +index 0000000..348a52f +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h +@@ -0,0 +1,244 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_IPA_CONTROL_H_ ++#define _KBASE_CSF_IPA_CONTROL_H_ ++ ++#include ++ ++/** ++ * Maximum index accepted to configure an IPA Control performance counter. ++ */ ++#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3) ++ ++/** ++ * struct kbase_ipa_control_perf_counter - Performance counter description ++ * ++ * @scaling_factor: Scaling factor by which the counter's value shall be ++ * multiplied. A scaling factor of 1 corresponds to units ++ * of 1 second if values are normalised by GPU frequency. ++ * @gpu_norm: Indicating whether counter values shall be normalized by ++ * GPU frequency. If true, returned values represent ++ * an interval of time expressed in seconds (when the scaling ++ * factor is set to 1). ++ * @type: Type of counter block for performance counter. ++ * @idx: Index of the performance counter inside the block. ++ * It may be dependent on GPU architecture. ++ * It cannot be greater than KBASE_IPA_CONTROL_CNT_MAX_IDX. ++ * ++ * This structure is used by clients of the IPA Control component to describe ++ * a performance counter that they intend to read. The counter is identified ++ * by block and index. In addition to that, the client also specifies how ++ * values shall be represented. Raw values are a number of GPU cycles; ++ * if normalized, they are divided by GPU frequency and become an interval ++ * of time expressed in seconds, since the GPU frequency is given in Hz. ++ * The client may specify a scaling factor to multiply counter values before ++ * they are divided by frequency, in case the unit of time of 1 second is ++ * too low in resolution. For instance: a scaling factor of 1000 implies ++ * that the returned value is a time expressed in milliseconds; a scaling ++ * factor of 1000 * 1000 implies that the returned value is a time expressed ++ * in microseconds. ++ */ ++struct kbase_ipa_control_perf_counter { ++ u64 scaling_factor; ++ bool gpu_norm; ++ enum kbase_ipa_core_type type; ++ u8 idx; ++}; ++ ++/** ++ * kbase_ipa_control_init - Initialize the IPA Control component ++ * ++ * @kbdev: Pointer to Kbase device. ++ */ ++void kbase_ipa_control_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_ipa_control_term - Terminate the IPA Control component ++ * ++ * @kbdev: Pointer to Kbase device. ++ */ ++void kbase_ipa_control_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_ipa_control_register - Register a client to the IPA Control component ++ * ++ * @kbdev: Pointer to Kbase device. ++ * @perf_counters: Array of performance counters the client intends to read. ++ * For each counter the client specifies block, index, ++ * scaling factor and whether it must be normalized by GPU ++ * frequency. ++ * @num_counters: Number of performance counters. It cannot exceed the total ++ * number of counters that exist on the IPA Control interface. ++ * @client: Handle to an opaque structure set by IPA Control if ++ * the registration is successful. This handle identifies ++ * a client's session and shall be provided in its future ++ * queries. ++ * ++ * A client needs to subscribe to the IPA Control component by declaring which ++ * performance counters it intends to read, and specifying a scaling factor ++ * and whether normalization is requested for each performance counter. ++ * The function shall configure the IPA Control interface accordingly and start ++ * a session for the client that made the request. A unique handle is returned ++ * if registration is successful in order to identify the client's session ++ * and be used for future queries. ++ * ++ * Return: 0 on success, negative -errno on error ++ */ ++int kbase_ipa_control_register( ++ struct kbase_device *kbdev, ++ const struct kbase_ipa_control_perf_counter *perf_counters, ++ size_t num_counters, void **client); ++ ++/** ++ * kbase_ipa_control_unregister - Unregister a client from IPA Control ++ * ++ * @kbdev: Pointer to kbase device. ++ * @client: Handle to an opaque structure that identifies the client session ++ * to terminate, as returned by kbase_ipa_control_register. ++ * ++ * Return: 0 on success, negative -errno on error ++ */ ++int kbase_ipa_control_unregister(struct kbase_device *kbdev, ++ const void *client); ++ ++/** ++ * kbase_ipa_control_query - Query performance counters ++ * ++ * @kbdev: Pointer to kbase device. ++ * @client: Handle to an opaque structure that identifies the client ++ * session, as returned by kbase_ipa_control_register. ++ * @values: Array of values queried from performance counters, whose ++ * length depends on the number of counters requested at ++ * the time of registration. Values are scaled and normalized ++ * and represent the difference since the last query. ++ * @num_values: Number of entries in the array of values that has been ++ * passed by the caller. It must be at least equal to the ++ * number of performance counters the client registered itself ++ * to read. ++ * @protected_time: Time spent in protected mode since last query, ++ * expressed in nanoseconds. This pointer may be NULL if the ++ * client doesn't want to know about this. ++ * ++ * A client that has already opened a session by registering itself to read ++ * some performance counters may use this function to query the values of ++ * those counters. The values returned are normalized by GPU frequency if ++ * requested and then multiplied by the scaling factor provided at the time ++ * of registration. Values always represent a difference since the last query. ++ * ++ * Performance counters are not updated while the GPU operates in protected ++ * mode. For this reason, returned values may be unreliable if the GPU has ++ * been in protected mode since the last query. The function returns success ++ * in that case, but it also gives a measure of how much time has been spent ++ * in protected mode. ++ * ++ * Return: 0 on success, negative -errno on error ++ */ ++int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, ++ u64 *values, size_t num_values, ++ u64 *protected_time); ++ ++/** ++ * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called after GPU has been powered and is ready for use. ++ * After the GPU power on, IPA Control component needs to ensure that the ++ * counters start incrementing again. ++ */ ++void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev); ++ ++/** ++ * kbase_ipa_control_handle_gpu_power_off - Handle the GPU power off event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called just before the GPU is powered off when it is still ++ * ready for use. ++ * IPA Control component needs to be aware of the GPU power off so that it can ++ * handle the query from Clients appropriately and return meaningful values ++ * to them. ++ */ ++void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev); ++ ++/** ++ * kbase_ipa_control_handle_gpu_reset_pre - Handle the pre GPU reset event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called when the GPU is about to be reset. ++ */ ++void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev); ++ ++/** ++ * kbase_ipa_control_handle_gpu_reset_post - Handle the post GPU reset event ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called after the GPU has been reset. ++ */ ++void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev); ++ ++#if MALI_UNIT_TEST ++/** ++ * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change ++ * (only for testing) ++ * ++ * @kbdev: Pointer to kbase device. ++ * @clk_index: Index of the clock for which the change has occurred. ++ * @clk_rate_hz: Clock frequency(Hz). ++ * ++ * Notify the IPA Control component about a GPU rate change. ++ */ ++void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, ++ u32 clk_index, u32 clk_rate_hz); ++#endif /* MALI_UNIT_TEST */ ++ ++/** ++ * kbase_ipa_control_protm_entered - Tell IPA_CONTROL that protected mode ++ * has been entered. ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function provides a means through which IPA_CONTROL can be informed ++ * that the GPU has entered protected mode. Since the GPU cannot access ++ * performance counters while in this mode, this information is useful as ++ * it implies (a) the values of these registers cannot change, so theres no ++ * point trying to read them, and (b) IPA_CONTROL has a means through which ++ * to record the duration of time the GPU is in protected mode, which can ++ * then be forwarded on to clients, who may wish, for example, to assume ++ * that the GPU was busy 100% of the time while in this mode. ++ */ ++void kbase_ipa_control_protm_entered(struct kbase_device *kbdev); ++ ++/** ++ * kbase_ipa_control_protm_exited - Tell IPA_CONTROL that protected mode ++ * has been exited. ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * This function provides a means through which IPA_CONTROL can be informed ++ * that the GPU has exited from protected mode. ++ */ ++void kbase_ipa_control_protm_exited(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_CSF_IPA_CONTROL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c +new file mode 100644 +index 0000000..d49e343 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c +@@ -0,0 +1,3069 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include "mali_kbase_csf.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include ++#include ++#include ++#include ++#include "mali_kbase_csf_tiler_heap.h" ++#include ++#include "mali_kbase_csf_timeout.h" ++#include ++ ++#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) ++#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) ++#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) ++ ++/** ++ * struct kbase_csf_event - CSF event callback. ++ * ++ * This structure belongs to the list of events which is part of a Kbase ++ * context, and describes a callback function with a custom parameter to pass ++ * to it when a CSF event is signalled. ++ * ++ * @link: Link to the rest of the list. ++ * @kctx: Pointer to the Kbase context this event belongs to. ++ * @callback: Callback function to call when a CSF event is signalled. ++ * @param: Parameter to pass to the callback function. ++ */ ++struct kbase_csf_event { ++ struct list_head link; ++ struct kbase_context *kctx; ++ kbase_csf_event_callback *callback; ++ void *param; ++}; ++ ++const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { ++ KBASE_QUEUE_GROUP_PRIORITY_HIGH, ++ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ KBASE_QUEUE_GROUP_PRIORITY_LOW, ++ KBASE_QUEUE_GROUP_PRIORITY_REALTIME ++}; ++const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = { ++ BASE_QUEUE_GROUP_PRIORITY_REALTIME, ++ BASE_QUEUE_GROUP_PRIORITY_HIGH, ++ BASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ BASE_QUEUE_GROUP_PRIORITY_LOW ++}; ++ ++static void put_user_pages_mmap_handle(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ unsigned long cookie_nr; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ if (queue->handle == BASEP_MEM_INVALID_HANDLE) ++ return; ++ ++ cookie_nr = ++ PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); ++ ++ if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) { ++ /* free up cookie */ ++ kctx->csf.user_pages_info[cookie_nr] = NULL; ++ bitmap_set(kctx->csf.cookies, cookie_nr, 1); ++ } ++ ++ queue->handle = BASEP_MEM_INVALID_HANDLE; ++} ++ ++/* Reserve a cookie, to be returned as a handle to userspace for creating ++ * the CPU mapping of the pair of input/output pages and Hw doorbell page. ++ * Will return 0 in case of success otherwise negative on failure. ++ */ ++static int get_user_pages_mmap_handle(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ unsigned long cookie, cookie_nr; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ if (bitmap_empty(kctx->csf.cookies, ++ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) { ++ dev_err(kctx->kbdev->dev, ++ "No csf cookies available for allocation!"); ++ return -ENOMEM; ++ } ++ ++ /* allocate a cookie */ ++ cookie_nr = find_first_bit(kctx->csf.cookies, ++ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); ++ if (kctx->csf.user_pages_info[cookie_nr]) { ++ dev_err(kctx->kbdev->dev, ++ "Inconsistent state of csf cookies!"); ++ return -EINVAL; ++ } ++ kctx->csf.user_pages_info[cookie_nr] = queue; ++ bitmap_clear(kctx->csf.cookies, cookie_nr, 1); ++ ++ /* relocate to correct base */ ++ cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); ++ cookie <<= PAGE_SHIFT; ++ ++ queue->handle = (u64)cookie; ++ ++ return 0; ++} ++ ++static void gpu_munmap_user_io_pages(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ size_t num_pages = 2; ++ ++ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, ++ reg->start_pfn, num_pages, MCU_AS_NR); ++ ++ WARN_ON(reg->flags & KBASE_REG_FREE); ++ ++ mutex_lock(&kctx->kbdev->csf.reg_lock); ++ kbase_remove_va_region(reg); ++ mutex_unlock(&kctx->kbdev->csf.reg_lock); ++} ++ ++static void init_user_io_pages(struct kbase_queue *queue) ++{ ++ u32 *input_addr = (u32 *)(queue->user_io_addr); ++ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); ++ ++ input_addr[CS_INSERT_LO/4] = 0; ++ input_addr[CS_INSERT_HI/4] = 0; ++ ++ input_addr[CS_EXTRACT_INIT_LO/4] = 0; ++ input_addr[CS_EXTRACT_INIT_HI/4] = 0; ++ ++ output_addr[CS_EXTRACT_LO/4] = 0; ++ output_addr[CS_EXTRACT_HI/4] = 0; ++ ++ output_addr[CS_ACTIVE/4] = 0; ++} ++ ++/* Map the input/output pages in the shared interface segment of MCU firmware ++ * address space. ++ */ ++static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, ++ struct tagged_addr *phys, struct kbase_va_region *reg) ++{ ++ unsigned long mem_flags = KBASE_REG_GPU_RD; ++ const size_t num_pages = 2; ++ int ret; ++ ++#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ ++ ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ ++ (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) ++ mem_flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++#else ++ if (kbdev->system_coherency == COHERENCY_NONE) { ++ mem_flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ } else { ++ mem_flags |= KBASE_REG_SHARE_BOTH | ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); ++ } ++#endif ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1); ++ reg->flags &= ~KBASE_REG_FREE; ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ if (ret) ++ return ret; ++ ++ /* Map input page */ ++ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, ++ reg->start_pfn, &phys[0], ++ 1, mem_flags, MCU_AS_NR, ++ KBASE_MEM_GROUP_CSF_IO); ++ if (ret) ++ goto bad_insert; ++ ++ /* Map output page, it needs rw access */ ++ mem_flags |= KBASE_REG_GPU_WR; ++ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, ++ reg->start_pfn + 1, &phys[1], ++ 1, mem_flags, MCU_AS_NR, ++ KBASE_MEM_GROUP_CSF_IO); ++ if (ret) ++ goto bad_insert_output_page; ++ ++ return 0; ++ ++bad_insert_output_page: ++ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, ++ reg->start_pfn, 1, MCU_AS_NR); ++bad_insert: ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ return ret; ++} ++ ++static void kernel_unmap_user_io_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ const size_t num_pages = 2; ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ vunmap(queue->user_io_addr); ++ ++ WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages)); ++ atomic_sub(num_pages, &kctx->permanent_mapped_pages); ++ ++ kbase_gpu_vm_unlock(kctx); ++} ++ ++static int kernel_map_user_io_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ struct page *page_list[2]; ++ pgprot_t cpu_map_prot; ++ int ret = 0; ++ size_t i; ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - ++ atomic_read(&kctx->permanent_mapped_pages))) { ++ ret = -ENOMEM; ++ goto unlock; ++ } ++ ++ /* The pages are mapped to Userspace also, so use the same mapping ++ * attributes as used inside the CPU page fault handler. ++ */ ++#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ ++ ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ ++ (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) ++ cpu_map_prot = pgprot_device(PAGE_KERNEL); ++#else ++ if (kctx->kbdev->system_coherency == COHERENCY_NONE) ++ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); ++ else ++ cpu_map_prot = PAGE_KERNEL; ++#endif ++ ++ for (i = 0; i < ARRAY_SIZE(page_list); i++) ++ page_list[i] = as_page(queue->phys[i]); ++ ++ queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); ++ ++ if (!queue->user_io_addr) ++ ret = -ENOMEM; ++ else ++ atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages); ++ ++unlock: ++ kbase_gpu_vm_unlock(kctx); ++ return ret; ++} ++ ++static void term_queue_group(struct kbase_queue_group *group); ++static void get_queue(struct kbase_queue *queue); ++static void release_queue(struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated ++ * for a queue at the time of bind. ++ * ++ * @kctx: Address of the kbase context within which the queue was created. ++ * @queue: Pointer to the queue to be unlinked. ++ * ++ * This function will free the pair of physical pages allocated for a GPU ++ * command queue, and also release the hardware doorbell page, that were mapped ++ * into the process address space to enable direct submission of commands to ++ * the hardware. Also releases the reference taken on the queue when the mapping ++ * was created. ++ * ++ * This function will be called only when the mapping is being removed and ++ * so the resources for queue will not get freed up until the mapping is ++ * removed even though userspace could have terminated the queue. ++ * Kernel will ensure that the termination of Kbase context would only be ++ * triggered after the mapping is removed. ++ * ++ * If an explicit or implicit unbind was missed by the userspace then the ++ * mapping will persist. On process exit kernel itself will remove the mapping. ++ */ ++static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ const size_t num_pages = 2; ++ ++ gpu_munmap_user_io_pages(kctx, queue->reg); ++ kernel_unmap_user_io_pages(kctx, queue); ++ ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], ++ num_pages, queue->phys, true, false); ++ ++ kfree(queue->reg); ++ queue->reg = NULL; ++ ++ /* If the queue has already been terminated by userspace ++ * then the ref count for queue object will drop to 0 here. ++ */ ++ release_queue(queue); ++} ++ ++int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_va_region *reg; ++ const size_t num_pages = 2; ++ int ret; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, ++ num_pages, KBASE_REG_ZONE_MCU_SHARED); ++ if (!reg) ++ return -ENOMEM; ++ ++ ret = kbase_mem_pool_alloc_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], ++ num_pages, queue->phys, false); ++ ++ if (ret != num_pages) ++ goto phys_alloc_failed; ++ ++ ret = kernel_map_user_io_pages(kctx, queue); ++ if (ret) ++ goto kernel_map_failed; ++ ++ init_user_io_pages(queue); ++ ++ ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg); ++ if (ret) ++ goto gpu_mmap_failed; ++ ++ queue->reg = reg; ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ if (kbdev->csf.db_file_offsets > ++ (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) ++ kbdev->csf.db_file_offsets = 0; ++ ++ queue->db_file_offset = kbdev->csf.db_file_offsets; ++ kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; ++ ++ WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); ++ /* This is the second reference taken on the queue object and ++ * would be dropped only when the IO mapping is removed either ++ * explicitly by userspace or implicitly by kernel on process exit. ++ */ ++ get_queue(queue); ++ queue->bind_state = KBASE_CSF_QUEUE_BOUND; ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ return 0; ++ ++gpu_mmap_failed: ++ kernel_unmap_user_io_pages(kctx, queue); ++ ++kernel_map_failed: ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], ++ num_pages, queue->phys, false, false); ++ ++phys_alloc_failed: ++ kfree(reg); ++ ++ return -ENOMEM; ++} ++ ++static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, ++ u8 group_handle) ++{ ++ uint index = group_handle; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) { ++ if (WARN_ON(kctx->csf.queue_groups[index]->handle != index)) ++ return NULL; ++ return kctx->csf.queue_groups[index]; ++ } ++ ++ return NULL; ++} ++ ++int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, ++ u8 group_handle) ++{ ++ struct kbase_queue_group *group; ++ ++ mutex_lock(&kctx->csf.lock); ++ group = find_queue_group(kctx, group_handle); ++ mutex_unlock(&kctx->csf.lock); ++ ++ return group ? 0 : -EINVAL; ++} ++ ++static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr) ++{ ++ struct kbase_queue *queue; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ list_for_each_entry(queue, &kctx->csf.queue_list, link) { ++ if (base_addr == queue->base_addr) ++ return queue; ++ } ++ ++ return NULL; ++} ++ ++static void get_queue(struct kbase_queue *queue) ++{ ++ WARN_ON(!atomic_inc_not_zero(&queue->refcount)); ++} ++ ++static void release_queue(struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ ++ WARN_ON(atomic_read(&queue->refcount) <= 0); ++ ++ if (atomic_dec_and_test(&queue->refcount)) { ++ /* The queue can't still be on the per context list. */ ++ WARN_ON(!list_empty(&queue->link)); ++ WARN_ON(queue->group); ++ kfree(queue); ++ } ++} ++ ++static void oom_event_worker(struct work_struct *data); ++static void fatal_event_worker(struct work_struct *data); ++ ++/* Between reg and reg_ex, one and only one must be null */ ++static int csf_queue_register_internal(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg, ++ struct kbase_ioctl_cs_queue_register_ex *reg_ex) ++{ ++ struct kbase_queue *queue; ++ int ret = 0; ++ struct kbase_va_region *region; ++ u64 queue_addr; ++ size_t queue_size; ++ ++ /* Only one pointer expected, otherwise coding error */ ++ if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) { ++ dev_err(kctx->kbdev->dev, ++ "Error, one and only one param-ptr expected!"); ++ return -EINVAL; ++ } ++ ++ /* struct kbase_ioctl_cs_queue_register_ex contains a full ++ * struct kbase_ioctl_cs_queue_register at the start address. So ++ * the pointer can be safely cast to pointing to a ++ * kbase_ioctl_cs_queue_register object. ++ */ ++ if (reg_ex) ++ reg = (struct kbase_ioctl_cs_queue_register *)reg_ex; ++ ++ /* Validate the queue priority */ ++ if (reg->priority > BASE_QUEUE_MAX_PRIORITY) ++ return -EINVAL; ++ ++ queue_addr = reg->buffer_gpu_addr; ++ queue_size = reg->buffer_size >> PAGE_SHIFT; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ /* Check if queue is already registered */ ++ if (find_queue(kctx, queue_addr) != NULL) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* Check if the queue address is valid */ ++ kbase_gpu_vm_lock(kctx); ++ region = kbase_region_tracker_find_region_enclosing_address(kctx, ++ queue_addr); ++ ++ if (kbase_is_region_invalid_or_free(region)) { ++ ret = -ENOENT; ++ goto out_unlock_vm; ++ } ++ ++ if (queue_size > (region->nr_pages - ++ ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) { ++ ret = -EINVAL; ++ goto out_unlock_vm; ++ } ++ ++ /* Check address validity on cs_trace buffer etc. Don't care ++ * if not enabled (i.e. when size is 0). ++ */ ++ if (reg_ex && reg_ex->ex_buffer_size) { ++ int buf_pages = (reg_ex->ex_buffer_size + ++ (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; ++ ++ region = kbase_region_tracker_find_region_enclosing_address( ++ kctx, reg_ex->ex_buffer_base); ++ if (kbase_is_region_invalid_or_free(region)) { ++ ret = -ENOENT; ++ goto out_unlock_vm; ++ } ++ ++ if (buf_pages > (region->nr_pages - ++ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - ++ region->start_pfn))) { ++ ret = -EINVAL; ++ goto out_unlock_vm; ++ } ++ ++ region = kbase_region_tracker_find_region_enclosing_address( ++ kctx, reg_ex->ex_offset_var_addr); ++ if (kbase_is_region_invalid_or_free(region)) { ++ ret = -ENOENT; ++ goto out_unlock_vm; ++ } ++ } ++ ++ queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL); ++ ++ if (!queue) { ++ ret = -ENOMEM; ++ goto out_unlock_vm; ++ } ++ ++ queue->kctx = kctx; ++ queue->base_addr = queue_addr; ++ queue->queue_reg = region; ++ queue->size = (queue_size << PAGE_SHIFT); ++ queue->csi_index = KBASEP_IF_NR_INVALID; ++ queue->enabled = false; ++ ++ queue->priority = reg->priority; ++ atomic_set(&queue->refcount, 1); ++ ++ queue->group = NULL; ++ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; ++ queue->handle = BASEP_MEM_INVALID_HANDLE; ++ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ ++ queue->status_wait = 0; ++ queue->sync_ptr = 0; ++ queue->sync_value = 0; ++ ++ queue->sb_status = 0; ++ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; ++ ++ INIT_LIST_HEAD(&queue->link); ++ INIT_LIST_HEAD(&queue->error.link); ++ INIT_WORK(&queue->oom_event_work, oom_event_worker); ++ INIT_WORK(&queue->fatal_event_work, fatal_event_worker); ++ list_add(&queue->link, &kctx->csf.queue_list); ++ ++ region->flags |= KBASE_REG_NO_USER_FREE; ++ ++ /* Initialize the cs_trace configuration parameters, When buffer_size ++ * is 0, trace is disabled. Here we only update the fields when ++ * enabled, otherwise leave them as default zeros. ++ */ ++ if (reg_ex && reg_ex->ex_buffer_size) { ++ u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET( ++ 0, reg_ex->ex_event_size); ++ cfg = CS_INSTR_CONFIG_EVENT_STATE_SET( ++ cfg, reg_ex->ex_event_state); ++ ++ queue->trace_cfg = cfg; ++ queue->trace_buffer_size = reg_ex->ex_buffer_size; ++ queue->trace_buffer_base = reg_ex->ex_buffer_base; ++ queue->trace_offset_ptr = reg_ex->ex_offset_var_addr; ++ } ++ ++out_unlock_vm: ++ kbase_gpu_vm_unlock(kctx); ++out: ++ mutex_unlock(&kctx->csf.lock); ++ ++ return ret; ++} ++ ++int kbase_csf_queue_register(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg) ++{ ++ return csf_queue_register_internal(kctx, reg, NULL); ++} ++ ++int kbase_csf_queue_register_ex(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register_ex *reg) ++{ ++ struct kbase_csf_global_iface const *const iface = ++ &kctx->kbdev->csf.global_iface; ++ u32 const glb_version = iface->version; ++ u32 instr = iface->instr_features; ++ u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); ++ u32 min_buf_size = (1u << reg->ex_event_size) * ++ GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); ++ ++ /* If cs_trace_command not supported, the call fails */ ++ if (glb_version < kbase_csf_interface_version(1, 1, 0)) ++ return -EINVAL; ++ ++ /* Validate the cs_trace configuration parameters */ ++ if (reg->ex_buffer_size && ++ ((reg->ex_event_size > max_size) || ++ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || ++ (reg->ex_buffer_size < min_buf_size))) ++ return -EINVAL; ++ ++ return csf_queue_register_internal(kctx, NULL, reg); ++} ++ ++static void unbind_queue(struct kbase_context *kctx, ++ struct kbase_queue *queue); ++ ++void kbase_csf_queue_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_terminate *term) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_queue *queue; ++ int err; ++ bool reset_prevented = false; ++ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless", ++ term->buffer_gpu_addr); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ queue = find_queue(kctx, term->buffer_gpu_addr); ++ ++ if (queue) { ++ unsigned long flags; ++ ++ /* As the GPU queue has been terminated by the ++ * user space, undo the actions that were performed when the ++ * queue was registered i.e. remove the queue from the per ++ * context list & release the initial reference. The subsequent ++ * lookups for the queue in find_queue() would fail. ++ */ ++ list_del_init(&queue->link); ++ ++ /* Stop the CSI to which queue was bound */ ++ unbind_queue(kctx, queue); ++ ++ kbase_gpu_vm_lock(kctx); ++ if (!WARN_ON(!queue->queue_reg)) { ++ /* After this the Userspace would be able to free the ++ * memory for GPU queue. In case the Userspace missed ++ * terminating the queue, the cleanup will happen on ++ * context termination where teardown of region tracker ++ * would free up the GPU queue memory. ++ */ ++ queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; ++ } ++ kbase_gpu_vm_unlock(kctx); ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ dev_dbg(kctx->kbdev->dev, ++ "Remove any pending command queue fatal from context %pK\n", ++ (void *)kctx); ++ list_del_init(&queue->error.link); ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++ ++ release_queue(queue); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind) ++{ ++ struct kbase_queue *queue; ++ struct kbase_queue_group *group; ++ u8 max_streams; ++ int ret = -EINVAL; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ group = find_queue_group(kctx, bind->in.group_handle); ++ queue = find_queue(kctx, bind->in.buffer_gpu_addr); ++ ++ if (!group || !queue) ++ goto out; ++ ++ /* For the time being, all CSGs have the same number of CSs ++ * so we check CSG 0 for this number ++ */ ++ max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num; ++ ++ if (bind->in.csi_index >= max_streams) ++ goto out; ++ ++ if (group->run_state == KBASE_CSF_GROUP_TERMINATED) ++ goto out; ++ ++ if (queue->group || group->bound_queues[bind->in.csi_index]) ++ goto out; ++ ++ ret = get_user_pages_mmap_handle(kctx, queue); ++ if (ret) ++ goto out; ++ ++ bind->out.mmap_handle = queue->handle; ++ group->bound_queues[bind->in.csi_index] = queue; ++ queue->group = group; ++ queue->csi_index = bind->in.csi_index; ++ queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS; ++ ++out: ++ mutex_unlock(&kctx->csf.lock); ++ ++ return ret; ++} ++ ++static struct kbase_queue_group *get_bound_queue_group( ++ struct kbase_queue *queue) ++{ ++ struct kbase_context *kctx = queue->kctx; ++ struct kbase_queue_group *group; ++ ++ if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND) ++ return NULL; ++ ++ if (!queue->group) ++ return NULL; ++ ++ if (queue->csi_index == KBASEP_IF_NR_INVALID) { ++ dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n"); ++ return NULL; ++ } ++ ++ group = queue->group; ++ ++ if (group->bound_queues[queue->csi_index] != queue) { ++ dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n"); ++ return NULL; ++ } ++ ++ return group; ++} ++ ++void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) ++{ ++ if (WARN_ON(slot < 0)) ++ return; ++ ++ kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); ++} ++ ++void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, ++ u32 slot_bitmap) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ const u32 allowed_bitmap = ++ (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); ++ u32 value; ++ ++ if (WARN_ON(slot_bitmap > allowed_bitmap)) ++ return; ++ ++ value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); ++ value ^= slot_bitmap; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, ++ slot_bitmap); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, ++ struct kbase_queue *queue) ++{ ++ mutex_lock(&kbdev->csf.reg_lock); ++ ++ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) ++ kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr); ++ ++ mutex_unlock(&kbdev->csf.reg_lock); ++} ++ ++void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, ++ int csi_index, int csg_nr, ++ bool ring_csg_doorbell) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ u32 value; ++ ++ if (WARN_ON(csg_nr < 0) || ++ WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) ++ return; ++ ++ ginfo = &kbdev->csf.global_iface.groups[csg_nr]; ++ ++ if (WARN_ON(csi_index < 0) || ++ WARN_ON(csi_index >= ginfo->stream_num)) ++ return; ++ ++ value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); ++ value ^= (1 << csi_index); ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, ++ 1 << csi_index); ++ ++ if (likely(ring_csg_doorbell)) ++ kbase_csf_ring_csg_doorbell(kbdev, csg_nr); ++} ++ ++int kbase_csf_queue_kick(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_kick *kick) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_queue_group *group; ++ struct kbase_queue *queue; ++ int err = 0; ++ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) { ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)", ++ kick->buffer_gpu_addr); ++ return err; ++ } ++ ++ mutex_lock(&kctx->csf.lock); ++ queue = find_queue(kctx, kick->buffer_gpu_addr); ++ if (!queue) ++ err = -EINVAL; ++ ++ if (!err) { ++ group = get_bound_queue_group(queue); ++ if (!group) { ++ dev_err(kctx->kbdev->dev, "queue not bound\n"); ++ err = -EINVAL; ++ } ++ } ++ ++ if (!err) ++ err = kbase_csf_scheduler_queue_start(queue); ++ mutex_unlock(&kctx->csf.lock); ++ kbase_reset_gpu_allow(kbdev); ++ ++ return err; ++} ++ ++static void unbind_stopped_queue(struct kbase_context *kctx, ++ struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); ++ bitmap_clear(queue->group->protm_pending_bitmap, ++ queue->csi_index, 1); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR, ++ queue->group, queue, queue->group->protm_pending_bitmap[0]); ++ queue->group->bound_queues[queue->csi_index] = NULL; ++ queue->group = NULL; ++ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); ++ ++ put_user_pages_mmap_handle(kctx, queue); ++ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; ++ } ++} ++/** ++ * unbind_queue() - Remove the linkage between a GPU command queue and the group ++ * to which it was bound or being bound. ++ * ++ * @kctx: Address of the kbase context within which the queue was created. ++ * @queue: Pointer to the queue to be unlinked. ++ * ++ * This function will also send the stop request to firmware for the CS ++ * if the group to which the GPU command queue was bound is scheduled. ++ * ++ * This function would be called when :- ++ * - queue is being unbound. This would happen when the IO mapping ++ * created on bind is removed explicitly by userspace or the process ++ * is getting exited. ++ * - queue group is being terminated which still has queues bound ++ * to it. This could happen on an explicit terminate request from userspace ++ * or when the kbase context is being terminated. ++ * - queue is being terminated without completing the bind operation. ++ * This could happen if either the queue group is terminated ++ * after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation ++ * to create the IO mapping is initiated. ++ * - There is a failure in executing the 2nd part of bind operation, inside the ++ * mmap handler, which creates the IO mapping for queue. ++ */ ++ ++static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue) ++{ ++ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { ++ if (queue->bind_state == KBASE_CSF_QUEUE_BOUND) ++ kbase_csf_scheduler_queue_stop(queue); ++ ++ unbind_stopped_queue(kctx, queue); ++ } ++} ++ ++void kbase_csf_queue_unbind(struct kbase_queue *queue) ++{ ++ struct kbase_context *kctx = queue->kctx; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ /* As the process itself is exiting, the termination of queue group can ++ * be done which would be much faster than stopping of individual ++ * queues. This would ensure a faster exit for the process especially ++ * in the case where CSI gets stuck. ++ * The CSI STOP request will wait for the in flight work to drain ++ * whereas CSG TERM request would result in an immediate abort or ++ * cancellation of the pending work. ++ */ ++ if (current->flags & PF_EXITING) { ++ struct kbase_queue_group *group = get_bound_queue_group(queue); ++ ++ if (group) ++ term_queue_group(group); ++ ++ WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND); ++ } else { ++ unbind_queue(kctx, queue); ++ } ++ ++ /* Free the resources, if allocated for this queue. */ ++ if (queue->reg) ++ kbase_csf_free_command_stream_user_pages(kctx, queue); ++} ++ ++void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue) ++{ ++ struct kbase_context *kctx = queue->kctx; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); ++ unbind_stopped_queue(kctx, queue); ++ ++ /* Free the resources, if allocated for this queue. */ ++ if (queue->reg) ++ kbase_csf_free_command_stream_user_pages(kctx, queue); ++} ++ ++/** ++ * find_free_group_handle() - Find a free handle for a queue group ++ * ++ * @kctx: Address of the kbase context within which the queue group ++ * is to be created. ++ * ++ * Return: a queue group handle on success, or a negative error code on failure. ++ */ ++static int find_free_group_handle(struct kbase_context *const kctx) ++{ ++ /* find the available index in the array of CSGs per this context */ ++ int idx, group_handle = -ENOMEM; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ for (idx = 0; ++ (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0); ++ idx++) { ++ if (!kctx->csf.queue_groups[idx]) ++ group_handle = idx; ++ } ++ ++ return group_handle; ++} ++ ++/** ++ * iface_has_enough_streams() - Check that at least one CSG supports ++ * a given number of CS ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @cs_min: Minimum number of CSs required. ++ * ++ * Return: true if at least one CSG supports the given number ++ * of CSs (or more); otherwise false. ++ */ ++static bool iface_has_enough_streams(struct kbase_device *const kbdev, ++ u32 const cs_min) ++{ ++ bool has_enough = false; ++ struct kbase_csf_cmd_stream_group_info *const groups = ++ kbdev->csf.global_iface.groups; ++ const u32 group_num = kbdev->csf.global_iface.group_num; ++ u32 i; ++ ++ for (i = 0; (i < group_num) && !has_enough; i++) { ++ if (groups[i].stream_num >= cs_min) ++ has_enough = true; ++ } ++ ++ return has_enough; ++} ++ ++/** ++ * create_normal_suspend_buffer() - Create normal-mode suspend buffer per ++ * queue group ++ * ++ * @kctx: Pointer to kbase context where the queue group is created at ++ * @s_buf: Pointer to suspend buffer that is attached to queue group ++ * ++ * Return: 0 if suspend buffer is successfully allocated and reflected to GPU ++ * MMU page table. Otherwise -ENOMEM. ++ */ ++static int create_normal_suspend_buffer(struct kbase_context *const kctx, ++ struct kbase_normal_suspend_buffer *s_buf) ++{ ++ struct kbase_va_region *reg = NULL; ++ const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; ++ const size_t nr_pages = ++ PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); ++ int err = 0; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ /* Allocate and initialize Region Object */ ++ reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, ++ nr_pages, KBASE_REG_ZONE_MCU_SHARED); ++ ++ if (!reg) ++ return -ENOMEM; ++ ++ s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); ++ ++ if (!s_buf->phy) { ++ err = -ENOMEM; ++ goto phy_alloc_failed; ++ } ++ ++ /* Get physical page for a normal suspend buffer */ ++ err = kbase_mem_pool_alloc_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ nr_pages, &s_buf->phy[0], false); ++ ++ if (err < 0) ++ goto phy_pages_alloc_failed; ++ ++ /* Insert Region Object into rbtree and make virtual address available ++ * to map it to physical page ++ */ ++ mutex_lock(&kctx->kbdev->csf.reg_lock); ++ err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1); ++ reg->flags &= ~KBASE_REG_FREE; ++ mutex_unlock(&kctx->kbdev->csf.reg_lock); ++ ++ if (err) ++ goto add_va_region_failed; ++ ++ /* Update MMU table */ ++ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, ++ reg->start_pfn, &s_buf->phy[0], ++ nr_pages, mem_flags, ++ MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); ++ if (err) ++ goto mmu_insert_failed; ++ ++ s_buf->reg = reg; ++ ++ return 0; ++ ++mmu_insert_failed: ++ mutex_lock(&kctx->kbdev->csf.reg_lock); ++ WARN_ON(kbase_remove_va_region(reg)); ++ mutex_unlock(&kctx->kbdev->csf.reg_lock); ++ ++add_va_region_failed: ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, ++ &s_buf->phy[0], false, false); ++ ++phy_pages_alloc_failed: ++ kfree(s_buf->phy); ++phy_alloc_failed: ++ kfree(reg); ++ ++ return err; ++} ++ ++/** ++ * create_protected_suspend_buffer() - Create protected-mode suspend buffer ++ * per queue group ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @s_buf: Pointer to suspend buffer that is attached to queue group ++ * ++ * Return: 0 if suspend buffer is successfully allocated and reflected to GPU ++ * MMU page table. Otherwise -ENOMEM. ++ */ ++static int create_protected_suspend_buffer(struct kbase_device *const kbdev, ++ struct kbase_protected_suspend_buffer *s_buf) ++{ ++ struct kbase_va_region *reg = NULL; ++ struct tagged_addr *phys = NULL; ++ const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; ++ const size_t nr_pages = ++ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ int err = 0; ++ ++ /* Allocate and initialize Region Object */ ++ reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, ++ nr_pages, KBASE_REG_ZONE_MCU_SHARED); ++ ++ if (!reg) ++ return -ENOMEM; ++ ++ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) { ++ err = -ENOMEM; ++ goto phy_alloc_failed; ++ } ++ ++ s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, ++ nr_pages); ++ if (s_buf->pma == NULL) { ++ err = -ENOMEM; ++ goto pma_alloc_failed; ++ } ++ ++ /* Insert Region Object into rbtree and make virtual address available ++ * to map it to physical page ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1); ++ reg->flags &= ~KBASE_REG_FREE; ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ if (err) ++ goto add_va_region_failed; ++ ++ /* Update MMU table */ ++ err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, ++ reg->start_pfn, phys, ++ nr_pages, mem_flags, MCU_AS_NR, ++ KBASE_MEM_GROUP_CSF_FW); ++ if (err) ++ goto mmu_insert_failed; ++ ++ s_buf->reg = reg; ++ kfree(phys); ++ return 0; ++ ++mmu_insert_failed: ++ mutex_lock(&kbdev->csf.reg_lock); ++ WARN_ON(kbase_remove_va_region(reg)); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++add_va_region_failed: ++ kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages); ++pma_alloc_failed: ++ kfree(phys); ++phy_alloc_failed: ++ kfree(reg); ++ ++ return err; ++} ++ ++static void timer_event_worker(struct work_struct *data); ++static void protm_event_worker(struct work_struct *data); ++static void term_normal_suspend_buffer(struct kbase_context *const kctx, ++ struct kbase_normal_suspend_buffer *s_buf); ++ ++/** ++ * create_suspend_buffers - Setup normal and protected mode ++ * suspend buffers. ++ * ++ * @kctx: Address of the kbase context within which the queue group ++ * is to be created. ++ * @group: Pointer to GPU command queue group data. ++ * ++ * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM. ++ */ ++static int create_suspend_buffers(struct kbase_context *const kctx, ++ struct kbase_queue_group * const group) ++{ ++ int err = 0; ++ ++ if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { ++ dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); ++ return -ENOMEM; ++ } ++ ++ if (kctx->kbdev->csf.pma_dev) { ++ err = create_protected_suspend_buffer(kctx->kbdev, ++ &group->protected_suspend_buf); ++ if (err) { ++ term_normal_suspend_buffer(kctx, ++ &group->normal_suspend_buf); ++ dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n"); ++ } ++ } else { ++ group->protected_suspend_buf.reg = NULL; ++ } ++ ++ return err; ++} ++ ++/** ++ * generate_group_uid() - Makes an ID unique to all kernel base devices ++ * and contexts, for a queue group and CSG. ++ * ++ * Return: A unique ID in the form of an unsigned 32-bit integer ++ */ ++static u32 generate_group_uid(void) ++{ ++ /* use first KBase device to store max UID */ ++ struct kbase_device *kbdev = kbase_find_device(-1); ++ u32 uid = 1; ++ ++ if (kbdev) ++ uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices); ++ else ++ WARN(1, "NULL kbase device pointer in group UID generation"); ++ ++ return uid; ++} ++ ++/** ++ * create_queue_group() - Create a queue group ++ * ++ * @kctx: Address of the kbase context within which the queue group ++ * is to be created. ++ * @create: Address of a structure which contains details of the ++ * queue group which is to be created. ++ * ++ * Return: a queue group handle on success, or a negative error code on failure. ++ */ ++static int create_queue_group(struct kbase_context *const kctx, ++ union kbase_ioctl_cs_queue_group_create *const create) ++{ ++ int group_handle = find_free_group_handle(kctx); ++ ++ if (group_handle < 0) { ++ dev_err(kctx->kbdev->dev, ++ "All queue group handles are already in use\n"); ++ } else { ++ struct kbase_queue_group * const group = ++ kmalloc(sizeof(struct kbase_queue_group), ++ GFP_KERNEL); ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ if (!group) { ++ dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n"); ++ group_handle = -ENOMEM; ++ } else { ++ int err = 0; ++ ++ group->kctx = kctx; ++ group->handle = group_handle; ++ group->csg_nr = KBASEP_CSG_NR_INVALID; ++ ++ group->tiler_mask = create->in.tiler_mask; ++ group->fragment_mask = create->in.fragment_mask; ++ group->compute_mask = create->in.compute_mask; ++ ++ group->tiler_max = create->in.tiler_max; ++ group->fragment_max = create->in.fragment_max; ++ group->compute_max = create->in.compute_max; ++ group->priority = kbase_csf_priority_queue_group_priority_to_relative( ++ kbase_csf_priority_check(kctx->kbdev, create->in.priority)); ++ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ group->faulted = false; ++ ++ group->group_uid = generate_group_uid(); ++ create->out.group_uid = group->group_uid; ++ ++ INIT_LIST_HEAD(&group->link); ++ INIT_LIST_HEAD(&group->link_to_schedule); ++ INIT_LIST_HEAD(&group->error_fatal.link); ++ INIT_LIST_HEAD(&group->error_timeout.link); ++ INIT_LIST_HEAD(&group->error_tiler_oom.link); ++ INIT_WORK(&group->timer_event_work, timer_event_worker); ++ INIT_WORK(&group->protm_event_work, protm_event_worker); ++ bitmap_zero(group->protm_pending_bitmap, ++ MAX_SUPPORTED_STREAMS_PER_GROUP); ++ ++ group->run_state = KBASE_CSF_GROUP_INACTIVE; ++ err = create_suspend_buffers(kctx, group); ++ ++ if (err < 0) { ++ kfree(group); ++ group_handle = err; ++ } else { ++ int j; ++ ++ kctx->csf.queue_groups[group_handle] = group; ++ for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP; ++ j++) ++ group->bound_queues[j] = NULL; ++ } ++ } ++ } ++ ++ return group_handle; ++} ++ ++int kbase_csf_queue_group_create(struct kbase_context *const kctx, ++ union kbase_ioctl_cs_queue_group_create *const create) ++{ ++ int err = 0; ++ const u32 tiler_count = hweight64(create->in.tiler_mask); ++ const u32 fragment_count = hweight64(create->in.fragment_mask); ++ const u32 compute_count = hweight64(create->in.compute_mask); ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ if ((create->in.tiler_max > tiler_count) || ++ (create->in.fragment_max > fragment_count) || ++ (create->in.compute_max > compute_count)) { ++ dev_err(kctx->kbdev->dev, ++ "Invalid maximum number of endpoints for a queue group\n"); ++ err = -EINVAL; ++ } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) { ++ dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n", ++ (unsigned int)create->in.priority); ++ err = -EINVAL; ++ } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) { ++ dev_err(kctx->kbdev->dev, ++ "No CSG has at least %d CSs\n", ++ create->in.cs_min); ++ err = -EINVAL; ++ } else { ++ /* For the CSG which satisfies the condition for having ++ * the needed number of CSs, check whether it also conforms ++ * with the requirements for at least one of its CSs having ++ * the iterator of the needed type ++ * (note: for CSF v1.0 all CSs in a CSG will have access to ++ * the same iterators) ++ */ ++ const int group_handle = create_queue_group(kctx, create); ++ ++ if (group_handle >= 0) ++ create->out.group_handle = group_handle; ++ else ++ err = group_handle; ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++ ++ return err; ++} ++ ++/** ++ * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group ++ * ++ * @kctx: Pointer to kbase context where queue group belongs to ++ * @s_buf: Pointer to queue group suspend buffer to be freed ++ */ ++static void term_normal_suspend_buffer(struct kbase_context *const kctx, ++ struct kbase_normal_suspend_buffer *s_buf) ++{ ++ const size_t nr_pages = ++ PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ WARN_ON(kbase_mmu_teardown_pages( ++ kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, ++ s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); ++ ++ WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); ++ ++ mutex_lock(&kctx->kbdev->csf.reg_lock); ++ WARN_ON(kbase_remove_va_region(s_buf->reg)); ++ mutex_unlock(&kctx->kbdev->csf.reg_lock); ++ ++ kbase_mem_pool_free_pages( ++ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ nr_pages, &s_buf->phy[0], false, false); ++ ++ kfree(s_buf->phy); ++ s_buf->phy = NULL; ++ kfree(s_buf->reg); ++ s_buf->reg = NULL; ++} ++ ++/** ++ * term_protected_suspend_buffer() - Free normal-mode suspend buffer of ++ * queue group ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @s_buf: Pointer to queue group suspend buffer to be freed ++ */ ++static void term_protected_suspend_buffer(struct kbase_device *const kbdev, ++ struct kbase_protected_suspend_buffer *s_buf) ++{ ++ const size_t nr_pages = ++ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); ++ ++ WARN_ON(kbase_mmu_teardown_pages( ++ kbdev, &kbdev->csf.mcu_mmu, ++ s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); ++ ++ WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ WARN_ON(kbase_remove_va_region(s_buf->reg)); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages); ++ s_buf->pma = NULL; ++ kfree(s_buf->reg); ++ s_buf->reg = NULL; ++} ++ ++void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ ++ /* Currently each group supports the same number of CS */ ++ u32 max_streams = ++ kctx->kbdev->csf.global_iface.groups[0].stream_num; ++ u32 i; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE && ++ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); ++ ++ for (i = 0; i < max_streams; i++) { ++ struct kbase_queue *queue = ++ group->bound_queues[i]; ++ ++ /* The group is already being evicted from the scheduler */ ++ if (queue) ++ unbind_stopped_queue(kctx, queue); ++ } ++ ++ term_normal_suspend_buffer(kctx, &group->normal_suspend_buf); ++ if (kctx->kbdev->csf.pma_dev) ++ term_protected_suspend_buffer(kctx->kbdev, ++ &group->protected_suspend_buf); ++ ++ group->run_state = KBASE_CSF_GROUP_TERMINATED; ++} ++ ++/** ++ * term_queue_group - Terminate a GPU command queue group. ++ * ++ * @group: Pointer to GPU command queue group data. ++ * ++ * Terminates a GPU command queue group. From the userspace perspective the ++ * group will still exist but it can't bind new queues to it. Userspace can ++ * still add work in queues bound to the group but it won't be executed. (This ++ * is because the IO mapping created upon binding such queues is still intact.) ++ */ ++static void term_queue_group(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ ++ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ /* Stop the group and evict it from the scheduler */ ++ kbase_csf_scheduler_group_deschedule(group); ++ ++ if (group->run_state == KBASE_CSF_GROUP_TERMINATED) ++ return; ++ ++ dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle); ++ ++ kbase_csf_term_descheduled_queue_group(group); ++} ++ ++static void cancel_queue_group_events(struct kbase_queue_group *group) ++{ ++ cancel_work_sync(&group->timer_event_work); ++ cancel_work_sync(&group->protm_event_work); ++} ++ ++void kbase_csf_queue_group_terminate(struct kbase_context *kctx, ++ u8 group_handle) ++{ ++ struct kbase_queue_group *group; ++ int err; ++ bool reset_prevented = false; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless", ++ group_handle); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ group = find_queue_group(kctx, group_handle); ++ ++ if (group) { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ ++ dev_dbg(kbdev->dev, ++ "Remove any pending group fatal error from context %pK\n", ++ (void *)group->kctx); ++ ++ list_del_init(&group->error_tiler_oom.link); ++ list_del_init(&group->error_timeout.link); ++ list_del_init(&group->error_fatal.link); ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++ ++ term_queue_group(group); ++ kctx->csf.queue_groups[group_handle] = NULL; ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++ ++ if (!group) ++ return; ++ ++ /* Cancel any pending event callbacks. If one is in progress ++ * then this thread waits synchronously for it to complete (which ++ * is why we must unlock the context first). We already ensured ++ * that no more callbacks can be enqueued by terminating the group. ++ */ ++ cancel_queue_group_events(group); ++ kfree(group); ++} ++ ++int kbase_csf_queue_group_suspend(struct kbase_context *kctx, ++ struct kbase_suspend_copy_buffer *sus_buf, ++ u8 group_handle) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ int err; ++ struct kbase_queue_group *group; ++ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) { ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when suspending group %d", ++ group_handle); ++ return err; ++ } ++ mutex_lock(&kctx->csf.lock); ++ ++ group = find_queue_group(kctx, group_handle); ++ if (group) ++ err = kbase_csf_scheduler_group_copy_suspend_buf(group, ++ sus_buf); ++ else ++ err = -EINVAL; ++ ++ mutex_unlock(&kctx->csf.lock); ++ kbase_reset_gpu_allow(kbdev); ++ ++ return err; ++} ++ ++/** ++ * add_error() - Add an error to the list of errors to report to user space ++ * ++ * @kctx: Address of a base context associated with a GPU address space. ++ * @error: Address of the item to be added to the context's pending error list. ++ * @data: Error data to be returned to userspace. ++ * ++ * Does not wake up the event queue blocking a user thread in kbase_poll. This ++ * is to make it more efficient to add multiple errors. ++ * ++ * The added error must not already be on the context's list of errors waiting ++ * to be reported (e.g. because a previous error concerning the same object has ++ * not yet been reported). ++ */ ++static void add_error(struct kbase_context *const kctx, ++ struct kbase_csf_notification *const error, ++ struct base_csf_notification const *const data) ++{ ++ unsigned long flags; ++ ++ if (WARN_ON(!kctx)) ++ return; ++ ++ if (WARN_ON(!error)) ++ return; ++ ++ if (WARN_ON(!data)) ++ return; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ ++ if (!WARN_ON(!list_empty(&error->link))) { ++ error->data = *data; ++ list_add_tail(&error->link, &kctx->csf.error_list); ++ dev_dbg(kctx->kbdev->dev, ++ "Added error %pK of type %d in context %pK\n", ++ (void *)error, data->type, (void *)kctx); ++ } ++ ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++} ++ ++void kbase_csf_add_group_fatal_error( ++ struct kbase_queue_group *const group, ++ struct base_gpu_queue_group_error const *const err_payload) ++{ ++ struct base_csf_notification error; ++ ++ if (WARN_ON(!group)) ++ return; ++ ++ if (WARN_ON(!err_payload)) ++ return; ++ ++ error = (struct base_csf_notification) { ++ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group->handle, ++ .error = *err_payload ++ } ++ } ++ }; ++ ++ add_error(group->kctx, &group->error_fatal, &error); ++} ++ ++void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct list_head evicted_groups; ++ struct kbase_queue_group *group; ++ int i; ++ ++ INIT_LIST_HEAD(&evicted_groups); ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups); ++ while (!list_empty(&evicted_groups)) { ++ group = list_first_entry(&evicted_groups, ++ struct kbase_queue_group, link); ++ ++ dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated", ++ kctx->tgid, kctx->id, group->handle); ++ kbase_csf_term_descheduled_queue_group(group); ++ list_del_init(&group->link); ++ } ++ ++ /* Acting on the queue groups that are pending to be terminated. */ ++ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { ++ group = kctx->csf.queue_groups[i]; ++ if (group && ++ group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) ++ kbase_csf_term_descheduled_queue_group(group); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++} ++ ++int kbase_csf_ctx_init(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ int err = -ENOMEM; ++ ++ INIT_LIST_HEAD(&kctx->csf.event_callback_list); ++ INIT_LIST_HEAD(&kctx->csf.queue_list); ++ INIT_LIST_HEAD(&kctx->csf.link); ++ INIT_LIST_HEAD(&kctx->csf.error_list); ++ ++ spin_lock_init(&kctx->csf.event_lock); ++ kctx->csf.user_reg_vma = NULL; ++ mutex_lock(&kbdev->pm.lock); ++ /* The inode information for /dev/malixx file is not available at the ++ * time of device probe as the inode is created when the device node ++ * is created by udevd (through mknod). ++ */ ++ if (kctx->filp) { ++ if (!kbdev->csf.mali_file_inode) ++ kbdev->csf.mali_file_inode = kctx->filp->f_inode; ++ ++ /* inode is unique for a file */ ++ WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode); ++ } ++ mutex_unlock(&kbdev->pm.lock); ++ ++ /* Mark all the cookies as 'free' */ ++ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); ++ ++ kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", ++ WQ_UNBOUND, 1); ++ ++ if (likely(kctx->csf.wq)) { ++ err = kbase_csf_scheduler_context_init(kctx); ++ ++ if (likely(!err)) { ++ err = kbase_csf_kcpu_queue_context_init(kctx); ++ ++ if (likely(!err)) { ++ err = kbase_csf_tiler_heap_context_init(kctx); ++ ++ if (likely(!err)) ++ mutex_init(&kctx->csf.lock); ++ else ++ kbase_csf_kcpu_queue_context_term(kctx); ++ } ++ ++ if (unlikely(err)) ++ kbase_csf_scheduler_context_term(kctx); ++ } ++ ++ if (unlikely(err)) ++ destroy_workqueue(kctx->csf.wq); ++ } ++ ++ return err; ++} ++ ++void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, ++ struct kbase_fault *fault) ++{ ++ int gr; ++ bool reported = false; ++ struct base_gpu_queue_group_error err_payload; ++ int err; ++ struct kbase_device *kbdev; ++ ++ if (WARN_ON(!kctx)) ++ return; ++ ++ if (WARN_ON(!fault)) ++ return; ++ ++ kbdev = kctx->kbdev; ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; ++ ++ err_payload = (struct base_gpu_queue_group_error) { ++ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { ++ .fatal_group = { ++ .sideband = fault->addr, ++ .status = fault->status, ++ } ++ } ++ }; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { ++ struct kbase_queue_group *const group = ++ kctx->csf.queue_groups[gr]; ++ ++ if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) { ++ term_queue_group(group); ++ kbase_csf_add_group_fatal_error(group, &err_payload); ++ reported = true; ++ } ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++ ++ if (reported) ++ kbase_event_wakeup(kctx); ++ ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++void kbase_csf_ctx_term(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_as *as = NULL; ++ unsigned long flags; ++ u32 i; ++ int err; ++ bool reset_prevented = false; ++ ++ /* As the kbase context is terminating, its debugfs sub-directory would ++ * have been removed already and so would be the debugfs file created ++ * for queue groups & kcpu queues, hence no need to explicitly remove ++ * those debugfs files. ++ */ ++ kbase_csf_event_wait_remove_all(kctx); ++ ++ /* Wait for a GPU reset if it is happening, prevent it if not happening */ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless", ++ kctx->tgid, kctx->id); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ /* Iterate through the queue groups that were not terminated by ++ * userspace and issue the term request to firmware for them. ++ */ ++ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { ++ if (kctx->csf.queue_groups[i]) ++ term_queue_group(kctx->csf.queue_groups[i]); ++ } ++ mutex_unlock(&kctx->csf.lock); ++ ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++ ++ /* Now that all queue groups have been terminated, there can be no ++ * more OoM or timer event interrupts but there can be inflight work ++ * items. Destroying the wq will implicitly flush those work items. ++ */ ++ destroy_workqueue(kctx->csf.wq); ++ ++ /* Wait for the firmware error work item to also finish as it could ++ * be affecting this outgoing context also. ++ */ ++ flush_work(&kctx->kbdev->csf.fw_error_work); ++ ++ /* A work item to handle page_fault/bus_fault/gpu_fault could be ++ * pending for the outgoing context. Flush the workqueue that will ++ * execute that work item. ++ */ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID) ++ as = &kctx->kbdev->as[kctx->as_nr]; ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ if (as) ++ flush_workqueue(as->pf_wq); ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { ++ kfree(kctx->csf.queue_groups[i]); ++ kctx->csf.queue_groups[i] = NULL; ++ } ++ ++ /* Iterate through the queues that were not terminated by ++ * userspace and do the required cleanup for them. ++ */ ++ while (!list_empty(&kctx->csf.queue_list)) { ++ struct kbase_queue *queue; ++ ++ queue = list_first_entry(&kctx->csf.queue_list, ++ struct kbase_queue, link); ++ ++ /* The reference held when the IO mapping was created on bind ++ * would have been dropped otherwise the termination of Kbase ++ * context itself wouldn't have kicked-in. So there shall be ++ * only one reference left that was taken when queue was ++ * registered. ++ */ ++ if (atomic_read(&queue->refcount) != 1) ++ dev_warn(kctx->kbdev->dev, ++ "Releasing queue with incorrect refcounting!\n"); ++ list_del_init(&queue->link); ++ release_queue(queue); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++ ++ kbase_csf_tiler_heap_context_term(kctx); ++ kbase_csf_kcpu_queue_context_term(kctx); ++ kbase_csf_scheduler_context_term(kctx); ++ ++ mutex_destroy(&kctx->csf.lock); ++} ++ ++int kbase_csf_event_wait_add(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param) ++{ ++ int err = -ENOMEM; ++ struct kbase_csf_event *event = ++ kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL); ++ ++ if (event) { ++ unsigned long flags; ++ ++ event->kctx = kctx; ++ event->callback = callback; ++ event->param = param; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ list_add_tail(&event->link, &kctx->csf.event_callback_list); ++ dev_dbg(kctx->kbdev->dev, ++ "Added event handler %pK with param %pK\n", event, ++ event->param); ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++ ++ err = 0; ++ } ++ ++ return err; ++} ++ ++void kbase_csf_event_wait_remove(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param) ++{ ++ struct kbase_csf_event *event; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ ++ list_for_each_entry(event, &kctx->csf.event_callback_list, link) { ++ if ((event->callback == callback) && (event->param == param)) { ++ list_del(&event->link); ++ dev_dbg(kctx->kbdev->dev, ++ "Removed event handler %pK with param %pK\n", ++ event, event->param); ++ kfree(event); ++ break; ++ } ++ } ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++} ++ ++bool kbase_csf_read_error(struct kbase_context *kctx, ++ struct base_csf_notification *event_data) ++{ ++ bool got_event = true; ++ struct kbase_csf_notification *error_data = NULL; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ ++ if (likely(!list_empty(&kctx->csf.error_list))) { ++ error_data = list_first_entry(&kctx->csf.error_list, ++ struct kbase_csf_notification, link); ++ list_del_init(&error_data->link); ++ *event_data = error_data->data; ++ dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", ++ (void *)error_data, (void *)kctx); ++ } else { ++ got_event = false; ++ } ++ ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++ ++ return got_event; ++} ++ ++bool kbase_csf_error_pending(struct kbase_context *kctx) ++{ ++ bool event_pended = false; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ event_pended = !list_empty(&kctx->csf.error_list); ++ dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", ++ event_pended ? "An" : "No", (void *)kctx); ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++ ++ return event_pended; ++} ++ ++void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) ++{ ++ struct kbase_csf_event *event, *next_event; ++ unsigned long flags; ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Signal event (%s GPU notify) for context %pK\n", ++ notify_gpu ? "with" : "without", (void *)kctx); ++ ++ /* First increment the signal count and wake up event thread. ++ */ ++ atomic_set(&kctx->event_count, 1); ++ kbase_event_wakeup(kctx); ++ ++ /* Signal the CSF firmware. This is to ensure that pending command ++ * stream synch object wait operations are re-evaluated. ++ * Write to GLB_DOORBELL would suffice as spec says that all pending ++ * synch object wait operations are re-evaluated on a write to any ++ * CS_DOORBELL/GLB_DOORBELL register. ++ */ ++ if (notify_gpu) { ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ if (kctx->kbdev->pm.backend.gpu_powered) ++ kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); ++ KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u); ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ } ++ ++ /* Now invoke the callbacks registered on backend side. ++ * Allow item removal inside the loop, if requested by the callback. ++ */ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ ++ list_for_each_entry_safe( ++ event, next_event, &kctx->csf.event_callback_list, link) { ++ enum kbase_csf_event_callback_action action; ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Calling event handler %pK with param %pK\n", ++ (void *)event, event->param); ++ action = event->callback(event->param); ++ if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { ++ list_del(&event->link); ++ kfree(event); ++ } ++ } ++ ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++} ++ ++void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) ++{ ++ struct kbase_csf_event *event, *next_event; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kctx->csf.event_lock, flags); ++ ++ list_for_each_entry_safe( ++ event, next_event, &kctx->csf.event_callback_list, link) { ++ list_del(&event->link); ++ dev_dbg(kctx->kbdev->dev, ++ "Removed event handler %pK with param %pK\n", ++ (void *)event, event->param); ++ kfree(event); ++ } ++ ++ spin_unlock_irqrestore(&kctx->csf.event_lock, flags); ++} ++ ++/** ++ * handle_oom_event - Handle the OoM event generated by the firmware for the ++ * CSI. ++ * ++ * This function will handle the OoM event request from the firmware for the ++ * CS. It will retrieve the address of heap context and heap's ++ * statistics (like number of render passes in-flight) from the CS's kernel ++ * kernel output page and pass them to the tiler heap function to allocate a ++ * new chunk. ++ * It will also update the CS's kernel input page with the address ++ * of a new chunk that was allocated. ++ * ++ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. ++ * @stream: Pointer to the structure containing info provided by the firmware ++ * about the CSI. ++ * ++ * Return: 0 if successfully handled the request, otherwise a negative error ++ * code on failure. ++ */ ++static int handle_oom_event(struct kbase_context *const kctx, ++ struct kbase_csf_cmd_stream_info const *const stream) ++{ ++ u64 gpu_heap_va = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | ++ ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); ++ const u32 vt_start = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START); ++ const u32 vt_end = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END); ++ const u32 frag_end = ++ kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END); ++ u32 renderpasses_in_flight; ++ u32 pending_frag_count; ++ u64 new_chunk_ptr; ++ int err; ++ ++ if ((frag_end > vt_end) || (vt_end >= vt_start)) { ++ dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", ++ vt_start, vt_end, frag_end); ++ return -EINVAL; ++ } ++ ++ renderpasses_in_flight = vt_start - frag_end; ++ pending_frag_count = vt_end - frag_end; ++ ++ err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, ++ gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); ++ ++ /* It is okay to acknowledge with a NULL chunk (firmware will then wait ++ * for the fragment jobs to complete and release chunks) ++ */ ++ if (err == -EBUSY) ++ new_chunk_ptr = 0; ++ else if (err) ++ return err; ++ ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, ++ new_chunk_ptr & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI, ++ new_chunk_ptr >> 32); ++ ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO, ++ new_chunk_ptr & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI, ++ new_chunk_ptr >> 32); ++ ++ return 0; ++} ++ ++/** ++ * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event ++ * ++ * @group: Pointer to the GPU command queue group that encountered the error ++ */ ++static void report_tiler_oom_error(struct kbase_queue_group *group) ++{ ++ struct base_csf_notification const ++ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group->handle, ++ .error = { ++ .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, ++ } } } }; ++ ++ add_error(group->kctx, &group->error_tiler_oom, &error); ++ kbase_event_wakeup(group->kctx); ++} ++ ++/** ++ * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. ++ * ++ * @queue: Pointer to queue for which out-of-memory event was received. ++ * ++ * Called with the CSF locked for the affected GPU virtual address space. ++ * Do not call in interrupt context. ++ * ++ * Handles tiler out-of-memory for a GPU command queue and then clears the ++ * notification to allow the firmware to report out-of-memory again in future. ++ * If the out-of-memory condition was successfully handled then this function ++ * rings the relevant doorbell to notify the firmware; otherwise, it terminates ++ * the GPU command queue group to which the queue is bound. See ++ * term_queue_group() for details. ++ */ ++static void kbase_queue_oom_event(struct kbase_queue *const queue) ++{ ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_queue_group *group; ++ int slot_num, err; ++ struct kbase_csf_cmd_stream_group_info const *ginfo; ++ struct kbase_csf_cmd_stream_info const *stream; ++ int csi_index = queue->csi_index; ++ u32 cs_oom_ack, cs_oom_req; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ group = get_bound_queue_group(queue); ++ if (!group) { ++ dev_warn(kctx->kbdev->dev, "queue not bound\n"); ++ return; ++ } ++ ++ kbase_csf_scheduler_lock(kbdev); ++ ++ slot_num = kbase_csf_scheduler_group_get_slot(group); ++ ++ /* The group could have gone off slot before this work item got ++ * a chance to execute. ++ */ ++ if (slot_num < 0) ++ goto unlock; ++ ++ /* If the bound group is on slot yet the kctx is marked with disabled ++ * on address-space fault, the group is pending to be killed. So skip ++ * the inflight oom operation. ++ */ ++ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) ++ goto unlock; ++ ++ ginfo = &kbdev->csf.global_iface.groups[slot_num]; ++ stream = &ginfo->streams[csi_index]; ++ cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & ++ CS_ACK_TILER_OOM_MASK; ++ cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & ++ CS_REQ_TILER_OOM_MASK; ++ ++ /* The group could have already undergone suspend-resume cycle before ++ * this work item got a chance to execute. On CSG resume the CS_ACK ++ * register is set by firmware to reflect the CS_REQ register, which ++ * implies that all events signaled before suspension are implicitly ++ * acknowledged. ++ * A new OoM event is expected to be generated after resume. ++ */ ++ if (cs_oom_ack == cs_oom_req) ++ goto unlock; ++ ++ err = handle_oom_event(kctx, stream); ++ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, ++ CS_REQ_TILER_OOM_MASK); ++ ++ if (err) { ++ dev_warn( ++ kbdev->dev, ++ "Queue group to be terminated, couldn't handle the OoM event\n"); ++ kbase_csf_scheduler_unlock(kbdev); ++ term_queue_group(group); ++ report_tiler_oom_error(group); ++ return; ++ } ++ ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); ++unlock: ++ kbase_csf_scheduler_unlock(kbdev); ++} ++ ++/** ++ * oom_event_worker - Tiler out-of-memory handler called from a workqueue. ++ * ++ * @data: Pointer to a work_struct embedded in GPU command queue data. ++ * ++ * Handles a tiler out-of-memory condition for a GPU command queue and then ++ * releases a reference that was added to prevent the queue being destroyed ++ * while this work item was pending on a workqueue. ++ */ ++static void oom_event_worker(struct work_struct *data) ++{ ++ struct kbase_queue *queue = ++ container_of(data, struct kbase_queue, oom_event_work); ++ struct kbase_context *kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ int err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ kbase_queue_oom_event(queue); ++ release_queue(queue); ++ ++ mutex_unlock(&kctx->csf.lock); ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++/** ++ * report_group_timeout_error - Report the timeout error for the group to userspace. ++ * ++ * @group: Pointer to the group for which timeout error occurred ++ */ ++static void report_group_timeout_error(struct kbase_queue_group *const group) ++{ ++ struct base_csf_notification const ++ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group->handle, ++ .error = { ++ .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, ++ } } } }; ++ ++ dev_warn(group->kctx->kbdev->dev, ++ "Notify the event notification thread, forward progress timeout (%llu cycles)\n", ++ kbase_csf_timeout_get(group->kctx->kbdev)); ++ ++ add_error(group->kctx, &group->error_timeout, &error); ++ kbase_event_wakeup(group->kctx); ++} ++ ++/** ++ * timer_event_worker - Handle the progress timeout error for the group ++ * ++ * @data: Pointer to a work_struct embedded in GPU command queue group data. ++ * ++ * Terminate the CSG and report the error to userspace ++ */ ++static void timer_event_worker(struct work_struct *data) ++{ ++ struct kbase_queue_group *const group = ++ container_of(data, struct kbase_queue_group, timer_event_work); ++ struct kbase_context *const kctx = group->kctx; ++ bool reset_prevented = false; ++ int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev); ++ ++ if (err) ++ dev_warn( ++ kctx->kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless", ++ group->handle); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ term_queue_group(group); ++ report_group_timeout_error(group); ++ ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kctx->kbdev); ++} ++ ++/** ++ * handle_progress_timer_event - Progress timer timeout event handler. ++ * ++ * @group: Pointer to GPU queue group for which the timeout event is received. ++ * ++ * Enqueue a work item to terminate the group and notify the event notification ++ * thread of progress timeout fault for the GPU command queue group. ++ */ ++static void handle_progress_timer_event(struct kbase_queue_group *const group) ++{ ++ queue_work(group->kctx->csf.wq, &group->timer_event_work); ++} ++ ++/** ++ * protm_event_worker - Protected mode switch request event handler ++ * called from a workqueue. ++ * ++ * @data: Pointer to a work_struct embedded in GPU command queue group data. ++ * ++ * Request to switch to protected mode. ++ */ ++static void protm_event_worker(struct work_struct *data) ++{ ++ struct kbase_queue_group *const group = ++ container_of(data, struct kbase_queue_group, protm_event_work); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN, ++ group, 0u); ++ kbase_csf_scheduler_group_protm_enter(group); ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, ++ group, 0u); ++} ++ ++static void report_queue_fatal_error(struct kbase_queue *const queue, ++ u32 cs_fatal, u64 cs_fatal_info, ++ u8 group_handle) ++{ ++ struct base_csf_notification error = ++ { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ .payload = { ++ .csg_error = { ++ .handle = group_handle, ++ .error = { ++ .error_type = ++ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, ++ .payload = { ++ .fatal_queue = { ++ .sideband = ++ cs_fatal_info, ++ .status = cs_fatal, ++ .csi_index = ++ queue->csi_index, ++ } } } } } }; ++ ++ add_error(queue->kctx, &queue->error, &error); ++ kbase_event_wakeup(queue->kctx); ++} ++ ++/** ++ * handle_fault_event - Handler for CS fault. ++ * ++ * @queue: Pointer to queue for which fault event was received. ++ * @stream: Pointer to the structure containing info provided by the ++ * firmware about the CSI. ++ * ++ * Prints meaningful CS fault information. ++ * ++ */ ++static void ++handle_fault_event(struct kbase_queue *const queue, ++ struct kbase_csf_cmd_stream_info const *const stream) ++{ ++ const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); ++ const u64 cs_fault_info = ++ kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | ++ ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI) ++ << 32); ++ const u8 cs_fault_exception_type = ++ CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); ++ const u32 cs_fault_exception_data = ++ CS_FAULT_EXCEPTION_DATA_GET(cs_fault); ++ const u64 cs_fault_info_exception_data = ++ CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); ++ struct kbase_device *const kbdev = queue->kctx->kbdev; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ dev_warn(kbdev->dev, ++ "Ctx %d_%d Group %d CSG %d CSI: %d\n" ++ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FAULT.EXCEPTION_DATA: 0x%x\n" ++ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", ++ queue->kctx->tgid, queue->kctx->id, queue->group->handle, ++ queue->group->csg_nr, queue->csi_index, ++ cs_fault_exception_type, ++ kbase_gpu_exception_name(cs_fault_exception_type), ++ cs_fault_exception_data, cs_fault_info_exception_data); ++ ++ if (cs_fault_exception_type == ++ CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT) ++ report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, ++ 0, queue->group->handle); ++} ++ ++/** ++ * fatal_event_worker - Handle the fatal error for the GPU queue ++ * ++ * @data: Pointer to a work_struct embedded in GPU command queue. ++ * ++ * Terminate the CSG and report the error to userspace. ++ */ ++static void fatal_event_worker(struct work_struct *const data) ++{ ++ struct kbase_queue *const queue = ++ container_of(data, struct kbase_queue, fatal_event_work); ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_queue_group *group; ++ u8 group_handle; ++ bool reset_prevented = false; ++ int err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless"); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ group = get_bound_queue_group(queue); ++ if (!group) { ++ dev_warn(kbdev->dev, "queue not bound when handling fatal event"); ++ goto unlock; ++ } ++ ++ group_handle = group->handle; ++ term_queue_group(group); ++ report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info, ++ group_handle); ++ ++unlock: ++ release_queue(queue); ++ mutex_unlock(&kctx->csf.lock); ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++/** ++ * handle_fatal_event - Handler for CS fatal. ++ * ++ * @queue: Pointer to queue for which fatal event was received. ++ * @stream: Pointer to the structure containing info provided by the ++ * firmware about the CSI. ++ * ++ * Prints meaningful CS fatal information. ++ * Enqueue a work item to terminate the group and report the fatal error ++ * to user space. ++ */ ++static void ++handle_fatal_event(struct kbase_queue *const queue, ++ struct kbase_csf_cmd_stream_info const *const stream) ++{ ++ const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); ++ const u64 cs_fatal_info = ++ kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) | ++ ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI) ++ << 32); ++ const u32 cs_fatal_exception_type = ++ CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); ++ const u32 cs_fatal_exception_data = ++ CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); ++ const u64 cs_fatal_info_exception_data = ++ CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); ++ struct kbase_device *const kbdev = queue->kctx->kbdev; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ dev_warn(kbdev->dev, ++ "Ctx %d_%d Group %d CSG %d CSI: %d\n" ++ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FATAL.EXCEPTION_DATA: 0x%x\n" ++ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", ++ queue->kctx->tgid, queue->kctx->id, queue->group->handle, ++ queue->group->csg_nr, queue->csi_index, ++ cs_fatal_exception_type, ++ kbase_gpu_exception_name(cs_fatal_exception_type), ++ cs_fatal_exception_data, cs_fatal_info_exception_data); ++ ++ if (cs_fatal_exception_type == ++ CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { ++ queue_work(system_wq, &kbdev->csf.fw_error_work); ++ } else { ++ get_queue(queue); ++ queue->cs_fatal = cs_fatal; ++ queue->cs_fatal_info = cs_fatal_info; ++ if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work)) ++ release_queue(queue); ++ } ++} ++ ++/** ++ * handle_queue_exception_event - Handler for CS fatal/fault exception events. ++ * ++ * @queue: Pointer to queue for which fatal/fault event was received. ++ * @cs_req: Value of the CS_REQ register from the CS's input page. ++ * @cs_ack: Value of the CS_ACK register from the CS's output page. ++ */ ++static void handle_queue_exception_event(struct kbase_queue *const queue, ++ const u32 cs_req, const u32 cs_ack) ++{ ++ struct kbase_csf_cmd_stream_group_info const *ginfo; ++ struct kbase_csf_cmd_stream_info const *stream; ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_queue_group *group = queue->group; ++ int csi_index = queue->csi_index; ++ int slot_num = group->csg_nr; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ ginfo = &kbdev->csf.global_iface.groups[slot_num]; ++ stream = &ginfo->streams[csi_index]; ++ ++ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { ++ handle_fatal_event(queue, stream); ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, ++ CS_REQ_FATAL_MASK); ++ } ++ ++ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { ++ handle_fault_event(queue, stream); ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, ++ CS_REQ_FAULT_MASK); ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); ++ } ++} ++ ++/** ++ * process_cs_interrupts - Process interrupts for a CS. ++ * ++ * @group: Pointer to GPU command queue group data. ++ * @ginfo: The CSG interface provided by the firmware. ++ * @irqreq: CSG's IRQ request bitmask (one bit per CS). ++ * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS). ++ * ++ * If the interrupt request bitmask differs from the acknowledge bitmask ++ * then the firmware is notifying the host of an event concerning those ++ * CSs indicated by bits whose value differs. The actions required ++ * are then determined by examining which notification flags differ between ++ * the request and acknowledge registers for the individual CS(s). ++ */ ++static void process_cs_interrupts(struct kbase_queue_group *const group, ++ struct kbase_csf_cmd_stream_group_info const *const ginfo, ++ u32 const irqreq, u32 const irqack) ++{ ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ u32 remaining = irqreq ^ irqack; ++ bool protm_pend = false; ++ const bool group_suspending = ++ !kbase_csf_scheduler_group_events_enabled(kbdev, group); ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ while (remaining != 0) { ++ int const i = ffs(remaining) - 1; ++ struct kbase_queue *const queue = group->bound_queues[i]; ++ ++ remaining &= ~(1 << i); ++ ++ /* The queue pointer can be NULL, but if it isn't NULL then it ++ * cannot disappear since scheduler spinlock is held and before ++ * freeing a bound queue it has to be first unbound which ++ * requires scheduler spinlock. ++ */ ++ if (queue && !WARN_ON(queue->csi_index != i)) { ++ struct kbase_csf_cmd_stream_info const *const stream = ++ &ginfo->streams[i]; ++ u32 const cs_req = kbase_csf_firmware_cs_input_read( ++ stream, CS_REQ); ++ u32 const cs_ack = ++ kbase_csf_firmware_cs_output(stream, CS_ACK); ++ struct workqueue_struct *wq = group->kctx->csf.wq; ++ ++ if ((cs_req & CS_REQ_EXCEPTION_MASK) ^ ++ (cs_ack & CS_ACK_EXCEPTION_MASK)) { ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack); ++ handle_queue_exception_event(queue, cs_req, cs_ack); ++ } ++ ++ /* PROTM_PEND and TILER_OOM can be safely ignored ++ * because they will be raised again if the group ++ * is assigned a CSG slot in future. ++ */ ++ if (group_suspending) { ++ u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK; ++ u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND, ++ group, queue, cs_req_remain ^ cs_ack_remain); ++ continue; ++ } ++ ++ if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ ++ (cs_ack & CS_ACK_TILER_OOM_MASK))) { ++ get_queue(queue); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue, ++ cs_req ^ cs_ack); ++ if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) { ++ /* The work item shall not have been ++ * already queued, there can be only ++ * one pending OoM event for a ++ * queue. ++ */ ++ release_queue(queue); ++ } ++ } ++ ++ if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ ++ (cs_ack & CS_ACK_PROTM_PEND_MASK)) { ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue, ++ cs_req ^ cs_ack); ++ ++ dev_dbg(kbdev->dev, ++ "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", ++ queue->csi_index, group->handle, ++ group->csg_nr); ++ ++ bitmap_set(group->protm_pending_bitmap, i, 1); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue, ++ group->protm_pending_bitmap[0]); ++ protm_pend = true; ++ } ++ } ++ } ++ ++ if (protm_pend) ++ queue_work(group->kctx->csf.wq, &group->protm_event_work); ++} ++ ++/** ++ * process_csg_interrupts - Process interrupts for a CSG. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @csg_nr: CSG number. ++ * ++ * Handles interrupts for a CSG and for CSs within it. ++ * ++ * If the CSG's request register value differs from its acknowledge register ++ * then the firmware is notifying the host of an event concerning the whole ++ * group. The actions required are then determined by examining which ++ * notification flags differ between those two register values. ++ * ++ * See process_cs_interrupts() for details of per-stream interrupt handling. ++ */ ++static void process_csg_interrupts(struct kbase_device *const kbdev, ++ int const csg_nr) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ struct kbase_queue_group *group = NULL; ++ u32 req, ack, irqreq, irqack; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) ++ return; ++ ++ KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr); ++ ++ ginfo = &kbdev->csf.global_iface.groups[csg_nr]; ++ req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ++ ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ); ++ irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK); ++ ++ /* There may not be any pending CSG/CS interrupts to process */ ++ if ((req == ack) && (irqreq == irqack)) ++ goto out; ++ ++ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before ++ * examining the CS_ACK & CS_REQ bits. This would ensure that Host ++ * doesn't misses an interrupt for the CS in the race scenario where ++ * whilst Host is servicing an interrupt for the CS, firmware sends ++ * another interrupt for that CS. ++ */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq); ++ ++ group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr); ++ ++ /* The group pointer can be NULL here if interrupts for the group ++ * (like SYNC_UPDATE, IDLE notification) were delayed and arrived ++ * just after the suspension of group completed. However if not NULL ++ * then the group pointer cannot disappear even if User tries to ++ * terminate the group whilst this loop is running as scheduler ++ * spinlock is held and for freeing a group that is resident on a CSG ++ * slot scheduler spinlock is required. ++ */ ++ if (!group) ++ goto out; ++ ++ if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) ++ goto out; ++ ++ if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { ++ kbase_csf_firmware_csg_input_mask(ginfo, ++ CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack); ++ kbase_csf_event_signal_cpu_only(group->kctx); ++ } ++ ++ if ((req ^ ack) & CSG_REQ_IDLE_MASK) { ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, ++ CSG_REQ_IDLE_MASK); ++ ++ set_bit(csg_nr, scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, ++ scheduler->csg_slots_idle_mask[0]); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack); ++ dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", ++ group->handle, csg_nr); ++ ++ /* Check if the scheduling tick can be advanced */ ++ if (kbase_csf_scheduler_all_csgs_idle(kbdev) && ++ !scheduler->gpu_idle_fw_timer_enabled) { ++ kbase_csf_scheduler_advance_tick_nolock(kbdev); ++ } ++ } ++ ++ if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) { ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, ++ CSG_REQ_PROGRESS_TIMER_EVENT_MASK); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT, ++ group, req ^ ack); ++ dev_info(kbdev->dev, ++ "Timeout notification received for group %u of ctx %d_%d on slot %d\n", ++ group->handle, group->kctx->tgid, group->kctx->id, csg_nr); ++ ++ handle_progress_timer_event(group); ++ } ++ ++ process_cs_interrupts(group, ginfo, irqreq, irqack); ++ ++out: ++ /* group may still be NULL here */ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, ++ ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); ++} ++ ++/** ++ * process_prfcnt_interrupts - Process performance counter interrupts. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @glb_req: Global request register value. ++ * @glb_ack: Global acknowledge register value. ++ * ++ * Handles interrupts issued by the firmware that relate to the performance ++ * counters. For example, on completion of a performance counter sample. It is ++ * expected that the scheduler spinlock is already held on calling this ++ * function. ++ */ ++static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, ++ u32 glb_ack) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ ++ /* Process PRFCNT_SAMPLE interrupt. */ ++ if (kbdev->csf.hwcnt.request_pending && ++ ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == ++ (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { ++ kbdev->csf.hwcnt.request_pending = false; ++ ++ dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); ++ ++ kbase_hwcnt_backend_csf_on_prfcnt_sample( ++ &kbdev->hwcnt_gpu_iface); ++ } ++ ++ /* Process PRFCNT_ENABLE interrupt. */ ++ if (kbdev->csf.hwcnt.enable_pending && ++ ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == ++ (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { ++ kbdev->csf.hwcnt.enable_pending = false; ++ ++ dev_dbg(kbdev->dev, ++ "PRFCNT_ENABLE status changed interrupt received."); ++ ++ if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) ++ kbase_hwcnt_backend_csf_on_prfcnt_enable( ++ &kbdev->hwcnt_gpu_iface); ++ else ++ kbase_hwcnt_backend_csf_on_prfcnt_disable( ++ &kbdev->hwcnt_gpu_iface); ++ } ++ ++ /* Process PRFCNT_THRESHOLD interrupt. */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { ++ dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); ++ ++ kbase_hwcnt_backend_csf_on_prfcnt_threshold( ++ &kbdev->hwcnt_gpu_iface); ++ ++ /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to ++ * the same value as GLB_ACK.PRFCNT_THRESHOLD ++ * flag in order to enable reporting of another ++ * PRFCNT_THRESHOLD event. ++ */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_PRFCNT_THRESHOLD_MASK); ++ } ++ ++ /* Process PRFCNT_OVERFLOW interrupt. */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { ++ dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); ++ ++ kbase_hwcnt_backend_csf_on_prfcnt_overflow( ++ &kbdev->hwcnt_gpu_iface); ++ ++ /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to ++ * the same value as GLB_ACK.PRFCNT_OVERFLOW ++ * flag in order to enable reporting of another ++ * PRFCNT_OVERFLOW event. ++ */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_PRFCNT_OVERFLOW_MASK); ++ } ++} ++ ++void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) ++{ ++ unsigned long flags; ++ u32 remaining = val; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val); ++ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); ++ ++ if (val & JOB_IRQ_GLOBAL_IF) { ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ kbdev->csf.interrupt_received = true; ++ remaining &= ~JOB_IRQ_GLOBAL_IF; ++ ++ if (!kbdev->csf.firmware_reloaded) ++ kbase_csf_firmware_reload_completed(kbdev); ++ else if (global_iface->output) { ++ u32 glb_req, glb_ack; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ glb_req = kbase_csf_firmware_global_input_read( ++ global_iface, GLB_REQ); ++ glb_ack = kbase_csf_firmware_global_output( ++ global_iface, GLB_ACK); ++ KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack); ++ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) { ++ dev_dbg(kbdev->dev, "Protected mode exit interrupt received"); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_PROTM_EXIT_MASK); ++ WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev)); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u); ++ scheduler->active_protm_grp = NULL; ++ kbdev->protected_mode = false; ++ kbase_ipa_control_protm_exited(kbdev); ++ kbase_hwcnt_backend_csf_protm_exited( ++ &kbdev->hwcnt_gpu_iface); ++ } ++ ++ /* Handle IDLE Hysteresis notification event */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { ++ int non_idle_offslot_grps; ++ bool can_suspend_on_idle; ++ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_REQ, glb_ack, ++ GLB_REQ_IDLE_EVENT_MASK); ++ ++ non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); ++ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL, ++ ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); ++ ++ if (!non_idle_offslot_grps) { ++ if (can_suspend_on_idle) ++ queue_work(system_highpri_wq, ++ &scheduler->gpu_idle_work); ++ } else { ++ /* Advance the scheduling tick to get ++ * the non-idle suspended groups loaded ++ * soon. ++ */ ++ kbase_csf_scheduler_advance_tick_nolock( ++ kbdev); ++ } ++ } ++ ++ process_prfcnt_interrupts(kbdev, glb_req, glb_ack); ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ /* Invoke the MCU state machine as a state transition ++ * might have completed. ++ */ ++ kbase_pm_update_state(kbdev); ++ } ++ ++ if (!remaining) { ++ wake_up_all(&kbdev->csf.event_wait); ++ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); ++ return; ++ } ++ } ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ while (remaining != 0) { ++ int const csg_nr = ffs(remaining) - 1; ++ ++ process_csg_interrupts(kbdev, csg_nr); ++ remaining &= ~(1 << csg_nr); ++ } ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ wake_up_all(&kbdev->csf.event_wait); ++ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); ++} ++ ++void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.db_filp) { ++ struct page *page = as_page(kbdev->csf.dummy_db_page); ++ ++ kbase_mem_pool_free( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ page, false); ++ ++ fput(kbdev->csf.db_filp); ++ } ++} ++ ++int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) ++{ ++ struct tagged_addr phys; ++ struct file *filp; ++ int ret; ++ ++ filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE); ++ if (IS_ERR(filp)) ++ return PTR_ERR(filp); ++ ++ ret = kbase_mem_pool_alloc_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ 1, &phys, false); ++ ++ if (ret <= 0) { ++ fput(filp); ++ return ret; ++ } ++ ++ kbdev->csf.db_filp = filp; ++ kbdev->csf.dummy_db_page = phys; ++ kbdev->csf.db_file_offsets = 0; ++ ++ return 0; ++} ++ ++void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) ++{ ++ if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) { ++ struct page *page = as_page(kbdev->csf.dummy_user_reg_page); ++ ++ kbase_mem_pool_free( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, ++ false); ++ } ++} ++ ++int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) ++{ ++ struct tagged_addr phys; ++ struct page *page; ++ u32 *addr; ++ int ret; ++ ++ kbdev->csf.dummy_user_reg_page = as_tagged(0); ++ ++ ret = kbase_mem_pool_alloc_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, ++ false); ++ ++ if (ret <= 0) ++ return ret; ++ ++ page = as_page(phys); ++ addr = kmap_atomic(page); ++ ++ /* Write a special value for the latest flush register inside the ++ * dummy page ++ */ ++ addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE; ++ ++ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32), ++ DMA_BIDIRECTIONAL); ++ kunmap_atomic(addr); ++ ++ kbdev->csf.dummy_user_reg_page = phys; ++ ++ return 0; ++} ++ ++u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority) ++{ ++ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; ++ u8 out_priority = req_priority; ++ ++ if (pcm_device) { ++ req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority); ++ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); ++ out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority); ++ } ++ ++ return out_priority; ++} ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h +new file mode 100644 +index 0000000..e3bd436 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h +@@ -0,0 +1,564 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_H_ ++#define _KBASE_CSF_H_ ++ ++#include "mali_kbase_csf_kcpu.h" ++#include "mali_kbase_csf_scheduler.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_protected_memory.h" ++ ++/* Indicate invalid CS h/w interface ++ */ ++#define KBASEP_IF_NR_INVALID ((s8)-1) ++ ++/* Indicate invalid CSG number for a GPU command queue group ++ */ ++#define KBASEP_CSG_NR_INVALID ((s8)-1) ++ ++/* Indicate invalid user doorbell number for a GPU command queue ++ */ ++#define KBASEP_USER_DB_NR_INVALID ((s8)-1) ++ ++#define FIRMWARE_PING_INTERVAL_MS (4000) /* 4 seconds */ ++ ++#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */ ++ ++/** ++ * enum kbase_csf_event_callback_action - return type for CSF event callbacks. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly. ++ * It doesn't correspond to any action or type of event callback. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed ++ * immediately upon return. ++ * ++ * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly. ++ * It doesn't correspond to any action or type of event callback. ++ */ ++enum kbase_csf_event_callback_action { ++ KBASE_CSF_EVENT_CALLBACK_FIRST = 0, ++ KBASE_CSF_EVENT_CALLBACK_KEEP, ++ KBASE_CSF_EVENT_CALLBACK_REMOVE, ++ KBASE_CSF_EVENT_CALLBACK_LAST, ++}; ++ ++/** ++ * kbase_csf_event_callback_action - type for callback functions to be ++ * called upon CSF events. ++ * ++ * This is the type of callback functions that can be registered ++ * for CSF events. These function calls shall be triggered by any call ++ * to kbase_csf_event_signal. ++ * ++ * @param: Generic parameter to pass to the callback function. ++ * ++ * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain ++ * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed. ++ */ ++typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param); ++ ++/** ++ * kbase_csf_event_wait_add - Add a CSF event callback ++ * ++ * This function adds an event callback to the list of CSF event callbacks ++ * belonging to a given Kbase context, to be triggered when a CSF event is ++ * signalled by kbase_csf_event_signal. ++ * ++ * @kctx: The Kbase context the @callback should be registered to. ++ * @callback: The callback function to register. ++ * @param: Custom parameter to be passed to the @callback function. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_event_wait_add(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param); ++ ++/** ++ * kbase_csf_event_wait_remove - Remove a CSF event callback ++ * ++ * This function removes an event callback from the list of CSF event callbacks ++ * belonging to a given Kbase context. ++ * ++ * @kctx: The kbase context the @callback should be removed from. ++ * @callback: The callback function to remove. ++ * @param: Custom parameter that would have been passed to the @p callback ++ * function. ++ */ ++void kbase_csf_event_wait_remove(struct kbase_context *kctx, ++ kbase_csf_event_callback *callback, void *param); ++ ++/** ++ * kbase_csf_event_wait_remove_all - Removes all CSF event callbacks ++ * ++ * This function empties the list of CSF event callbacks belonging to a given ++ * Kbase context. ++ * ++ * @kctx: The kbase context for which CSF event callbacks have to be removed. ++ */ ++void kbase_csf_event_wait_remove_all(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_read_error - Read CS fatal error ++ * ++ * This function takes the CS fatal error from context's ordered ++ * error_list, copies its contents to @event_data. ++ * ++ * @kctx: The kbase context to read fatal error from ++ * @event_data: Caller-provided buffer to copy the fatal error to ++ * ++ * Return: true if fatal error is read successfully. ++ */ ++bool kbase_csf_read_error(struct kbase_context *kctx, ++ struct base_csf_notification *event_data); ++ ++/** ++ * kbase_csf_error_pending - Check whether fatal error is pending ++ * ++ * @kctx: The kbase context to check fatal error upon. ++ * ++ * Return: true if fatal error is pending. ++ */ ++bool kbase_csf_error_pending(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_event_signal - Signal a CSF event ++ * ++ * This function triggers all the CSF event callbacks that are registered to ++ * a given Kbase context, and also signals the event handling thread of ++ * userspace driver waiting for the CSF event. ++ * ++ * @kctx: The kbase context whose CSF event callbacks shall be triggered. ++ * @notify_gpu: Flag to indicate if CSF firmware should be notified of the ++ * signaling of event that happened on the Driver side, either ++ * the signal came from userspace or from kcpu queues. ++ */ ++void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu); ++ ++static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx) ++{ ++ kbase_csf_event_signal(kctx, true); ++} ++ ++static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx) ++{ ++ kbase_csf_event_signal(kctx, false); ++} ++ ++/** ++ * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space. ++ * ++ * @kctx: Pointer to the kbase context which is being initialized. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_ctx_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon ++ * GPU bus fault, MMU page fault or similar. ++ * ++ * This function terminates all GPU command queue groups in the context and ++ * notifies the event notification thread of the fault. ++ * ++ * @kctx: Pointer to faulty kbase context. ++ * @fault: Pointer to the fault. ++ */ ++void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, ++ struct kbase_fault *fault); ++ ++/** ++ * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. ++ * ++ * This function terminates any remaining CSGs and CSs which weren't destroyed ++ * before context termination. ++ * ++ * @kctx: Pointer to the kbase context which is being terminated. ++ */ ++void kbase_csf_ctx_term(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_queue_register - Register a GPU command queue. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue is to be registered. ++ * @reg: Pointer to the structure which contains details of the ++ * queue to be registered within the provided ++ * context. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_register(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg); ++ ++/** ++ * kbase_csf_queue_register_ex - Register a GPU command queue with ++ * extended format. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue is to be registered. ++ * @reg: Pointer to the structure which contains details of the ++ * queue to be registered within the provided ++ * context, together with the extended parameter fields ++ * for supporting cs trace command. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_register_ex(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register_ex *reg); ++ ++/** ++ * kbase_csf_queue_terminate - Terminate a GPU command queue. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue is to be terminated. ++ * @term: Pointer to the structure which identifies which ++ * queue is to be terminated. ++ */ ++void kbase_csf_queue_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_terminate *term); ++ ++/** ++ * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a ++ * GPU command queue. ++ * ++ * This function allocates a pair of User mode input/output pages for a ++ * GPU command queue and maps them in the shared interface segment of MCU ++ * firmware address space. Also reserves a hardware doorbell page for the queue. ++ * ++ * @kctx: Pointer to the kbase context within which the resources ++ * for the queue are being allocated. ++ * @queue: Pointer to the queue for which to allocate resources. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, ++ struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_queue_bind - Bind a GPU command queue to a queue group. ++ * ++ * @kctx: The kbase context. ++ * @bind: Pointer to the union which specifies a queue group and a ++ * queue to be bound to that group. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_bind(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_bind *bind); ++ ++/** ++ * kbase_csf_queue_unbind - Unbind a GPU command queue from a queue group ++ * to which it has been bound and free ++ * resources allocated for this queue if there ++ * are any. ++ * ++ * @queue: Pointer to queue to be unbound. ++ */ ++void kbase_csf_queue_unbind(struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_queue_unbind_stopped - Unbind a GPU command queue in the case ++ * where it was never started. ++ * @queue: Pointer to queue to be unbound. ++ * ++ * Variant of kbase_csf_queue_unbind() for use on error paths for cleaning up ++ * queues that failed to fully bind. ++ */ ++void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware ++ * ++ * @kctx: The kbase context. ++ * @kick: Pointer to the struct which specifies the queue ++ * that needs to be scheduled. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_kick(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_kick *kick); ++ ++/** ++ * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle ++ * is valid. ++ * ++ * This function is used to determine if the queue group handle is valid. ++ * ++ * @kctx: The kbase context under which the queue group exists. ++ * @group_handle: Handle for the group which uniquely identifies it within ++ * the context with which it was created. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, ++ u8 group_handle); ++ ++/** ++ * kbase_csf_queue_group_create - Create a GPU command queue group. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue group is to be created. ++ * @create: Pointer to the structure which contains details of the ++ * queue group which is to be created within the ++ * provided kbase context. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_queue_group_create(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_group_create *create); ++ ++/** ++ * kbase_csf_queue_group_terminate - Terminate a GPU command queue group. ++ * ++ * @kctx: Pointer to the kbase context within which the ++ * queue group is to be terminated. ++ * @group_handle: Pointer to the structure which identifies the queue ++ * group which is to be terminated. ++ */ ++void kbase_csf_queue_group_terminate(struct kbase_context *kctx, ++ u8 group_handle); ++ ++/** ++ * kbase_csf_term_descheduled_queue_group - Terminate a GPU command queue ++ * group that is not operational ++ * inside the scheduler. ++ * ++ * @group: Pointer to the structure which identifies the queue ++ * group to be terminated. The function assumes that the caller ++ * is sure that the given group is not operational inside the ++ * scheduler. If in doubt, use its alternative: ++ * @ref kbase_csf_queue_group_terminate(). ++ */ ++void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); ++ ++/** ++ * kbase_csf_queue_group_suspend - Suspend a GPU command queue group ++ * ++ * This function is used to suspend a queue group and copy the suspend buffer. ++ * ++ * @kctx: The kbase context for which the queue group is to be ++ * suspended. ++ * @sus_buf: Pointer to the structure which contains details of the ++ * user buffer and its kernel pinned pages. ++ * @group_handle: Handle for the group which uniquely identifies it within ++ * the context within which it was created. ++ * ++ * Return: 0 on success or negative value if failed to suspend ++ * queue group and copy suspend buffer contents. ++ */ ++int kbase_csf_queue_group_suspend(struct kbase_context *kctx, ++ struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); ++ ++/** ++ * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace ++ * ++ * @group: GPU command queue group. ++ * @err_payload: Error payload to report. ++ */ ++void kbase_csf_add_group_fatal_error( ++ struct kbase_queue_group *const group, ++ struct base_gpu_queue_group_error const *const err_payload); ++ ++/** ++ * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. ++ * ++ * @kbdev: The kbase device to handle an IRQ for ++ * @val: The value of JOB IRQ status register which triggered the interrupt ++ */ ++void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); ++ ++/** ++ * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates ++ * the update of userspace mapping of HW ++ * doorbell page. ++ * ++ * The function creates a file and allocates a dummy page to facilitate the ++ * update of userspace mapping to point to the dummy page instead of the real ++ * HW doorbell page after the suspend of queue group. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_doorbell_mapping_term - Free the dummy page & close the file used ++ * to update the userspace mapping of HW doorbell page ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_setup_dummy_user_reg_page - Setup the dummy page that is accessed ++ * instead of the User register page after ++ * the GPU power down. ++ * ++ * The function allocates a dummy page which is used to replace the User ++ * register page in the userspace mapping after the power down of GPU. ++ * On the power up of GPU, the mapping is updated to point to the real ++ * User register page. The mapping is used to allow access to LATEST_FLUSH ++ * register from userspace. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used ++ * used to replace the User register page ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. ++ * ++ * The function kicks a notification on the CSG interface to firmware. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @slot: Index of CSG interface for ringing the door-bell. ++ */ ++void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); ++ ++/** ++ * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG ++ * interfaces. ++ * ++ * The function kicks a notification on a set of CSG interfaces to firmware. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc. ++ */ ++void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, ++ u32 slot_bitmap); ++ ++/** ++ * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI ++ * assigned to a GPU queue ++ * ++ * The function sends a doorbell interrupt notification to the firmware for ++ * a CSI assigned to a GPU queue. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @csi_index: ID of the CSI assigned to the GPU queue. ++ * @csg_nr: Index of the CSG slot assigned to the queue ++ * group to which the GPU queue is bound. ++ * @ring_csg_doorbell: Flag to indicate if the CSG doorbell needs to be rung ++ * after updating the CSG_DB_REQ. So if this flag is false ++ * the doorbell interrupt will not be sent to FW. ++ * The flag is supposed be false only when the input page ++ * for bound GPU queues is programmed at the time of ++ * starting/resuming the group on a CSG slot. ++ */ ++void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, ++ int csi_index, int csg_nr, ++ bool ring_csg_doorbell); ++ ++/** ++ * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a ++ * queue. ++ * ++ * The function kicks a notification to the firmware on the doorbell assigned ++ * to the queue. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @queue: Pointer to the queue for ringing the door-bell. ++ */ ++void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, ++ struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU ++ * command queue groups associated with the context. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @kctx: The kbase context. ++ * ++ * This function will iterate through all the active/scheduled GPU command ++ * queue groups associated with the context, deschedule and mark them as ++ * terminated (which will then lead to unbinding of all the queues bound to ++ * them) and also no more work would be allowed to execute for them. ++ * ++ * This is similar to the action taken in response to an unexpected OoM event. ++ */ ++void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, ++ struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_priority_check - Check the priority requested ++ * ++ * @kbdev: Device pointer ++ * @req_priority: Requested priority ++ * ++ * This will determine whether the requested priority can be satisfied. ++ * ++ * Return: The same or lower priority than requested. ++ */ ++u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority); ++ ++extern const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT]; ++extern const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; ++ ++/** ++ * kbase_csf_priority_relative_to_queue_group_priority - Convert relative to base priority ++ * ++ * @priority: kbase relative priority ++ * ++ * This will convert the monotonically increasing realtive priority to the ++ * fixed base priority list. ++ * ++ * Return: base_queue_group_priority priority. ++ */ ++static inline u8 kbase_csf_priority_relative_to_queue_group_priority(u8 priority) ++{ ++ if (priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT) ++ priority = KBASE_QUEUE_GROUP_PRIORITY_LOW; ++ return kbasep_csf_relative_to_queue_group_priority[priority]; ++} ++ ++/** ++ * kbase_csf_priority_queue_group_priority_to_relative - Convert base priority to relative ++ * ++ * @priority: base_queue_group_priority priority ++ * ++ * This will convert the fixed base priority list to monotonically increasing realtive priority. ++ * ++ * Return: kbase relative priority. ++ */ ++static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority) ++{ ++ /* Apply low priority in case of invalid priority */ ++ if (priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) ++ priority = BASE_QUEUE_GROUP_PRIORITY_LOW; ++ return kbasep_csf_queue_group_priority_to_relative[priority]; ++} ++ ++ ++#endif /* _KBASE_CSF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c +new file mode 100644 +index 0000000..26637bf +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c +@@ -0,0 +1,191 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_csf_cpu_queue_debugfs.h" ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, ++ struct base_csf_notification *req) ++{ ++ if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, ++ BASE_CSF_CPU_QUEUE_DUMP_ISSUED, ++ BASE_CSF_CPU_QUEUE_DUMP_PENDING) != ++ BASE_CSF_CPU_QUEUE_DUMP_ISSUED) { ++ return false; ++ } ++ ++ req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP; ++ return true; ++} ++ ++/** ++ * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_context *kctx = file->private; ++ ++ mutex_lock(&kctx->csf.lock); ++ if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != ++ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) { ++ seq_printf(file, "Dump request already started! (try again)\n"); ++ mutex_unlock(&kctx->csf.lock); ++ return -EBUSY; ++ } ++ ++ atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED); ++ init_completion(&kctx->csf.cpu_queue.dump_cmp); ++ kbase_event_wakeup(kctx); ++ mutex_unlock(&kctx->csf.lock); ++ ++ seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION); ++ ++ wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, ++ msecs_to_jiffies(3000)); ++ ++ mutex_lock(&kctx->csf.lock); ++ if (kctx->csf.cpu_queue.buffer) { ++ WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) != ++ BASE_CSF_CPU_QUEUE_DUMP_PENDING); ++ ++ seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer); ++ ++ kfree(kctx->csf.cpu_queue.buffer); ++ kctx->csf.cpu_queue.buffer = NULL; ++ kctx->csf.cpu_queue.buffer_size = 0; ++ } ++ else ++ seq_printf(file, "Dump error! (time out)\n"); ++ ++ atomic_set(&kctx->csf.cpu_queue.dump_req_status, ++ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); ++ ++ mutex_unlock(&kctx->csf.lock); ++ return 0; ++} ++ ++static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_cpu_queue_debugfs_show, in->i_private); ++} ++ ++static const struct file_operations kbasep_csf_cpu_queue_debugfs_fops = { ++ .open = kbasep_csf_cpu_queue_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, ++ kctx, &kbasep_csf_cpu_queue_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create cpu queue debugfs entry"); ++ } ++ ++ kctx->csf.cpu_queue.buffer = NULL; ++ kctx->csf.cpu_queue.buffer_size = 0; ++ atomic_set(&kctx->csf.cpu_queue.dump_req_status, ++ BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); ++} ++ ++int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, ++ u64 buffer, size_t buf_size) ++{ ++ int err = 0; ++ ++ size_t alloc_size = buf_size; ++ char *dump_buffer; ++ ++ if (!buffer || !alloc_size) ++ goto done; ++ ++ alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); ++ dump_buffer = kzalloc(alloc_size, GFP_KERNEL); ++ if (ZERO_OR_NULL_PTR(dump_buffer)) { ++ err = -ENOMEM; ++ goto done; ++ } ++ ++ WARN_ON(kctx->csf.cpu_queue.buffer != NULL); ++ ++ err = copy_from_user(dump_buffer, ++ u64_to_user_ptr(buffer), ++ buf_size); ++ if (err) { ++ kfree(dump_buffer); ++ err = -EFAULT; ++ goto done; ++ } ++ ++ mutex_lock(&kctx->csf.lock); ++ ++ kfree(kctx->csf.cpu_queue.buffer); ++ ++ if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == ++ BASE_CSF_CPU_QUEUE_DUMP_PENDING) { ++ kctx->csf.cpu_queue.buffer = dump_buffer; ++ kctx->csf.cpu_queue.buffer_size = buf_size; ++ complete_all(&kctx->csf.cpu_queue.dump_cmp); ++ } else { ++ kfree(dump_buffer); ++ } ++ ++ mutex_unlock(&kctx->csf.lock); ++done: ++ return err; ++} ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) ++{ ++} ++ ++bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, ++ struct base_csf_notification *req) ++{ ++ return false; ++} ++ ++int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, ++ u64 buffer, size_t buf_size) ++{ ++ return 0; ++} ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h +new file mode 100644 +index 0000000..435a993 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h +@@ -0,0 +1,90 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ ++#define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ ++ ++#include ++#include ++ ++#include "mali_kbase.h" ++ ++/* Forward declaration */ ++struct base_csf_notification; ++ ++#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0 ++ ++/* CPU queue dump status */ ++/* Dumping is done or no dumping is in progress. */ ++#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0 ++/* Dumping request is pending. */ ++#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1 ++/* Dumping request is issued to Userspace */ ++#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2 ++ ++ ++/** ++ * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s) ++ * ++ * @kctx: The kbase_context for which to create the debugfs entry ++ */ ++void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event ++ * ++ * @kctx: The kbase_context which cpu queue dumpped belongs to ++ * @req: Notification with cpu queue dump request. ++ * ++ * Return: true if needs CPU queue dump, or false otherwise. ++ */ ++bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, ++ struct base_csf_notification *req); ++ ++/** ++ * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump ++ * ++ * @kctx: The kbase_context which cpu queue dumpped belongs to ++ * ++ * Return: true if it needs cpu queue dump, or false otherwise. ++ */ ++static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx) ++{ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == ++ BASE_CSF_CPU_QUEUE_DUMP_ISSUED); ++#else ++ return false; ++#endif ++} ++ ++/** ++ * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs ++ * ++ * @kctx: The kbase_context which cpu queue dumpped belongs to ++ * @buffer: Buffer containing the cpu queue information. ++ * @buf_size: Buffer size. ++ * ++ * Return: Return 0 for dump successfully, or error code. ++ */ ++int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, ++ u64 buffer, size_t buf_size); ++#endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c +new file mode 100644 +index 0000000..14deb98 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c +@@ -0,0 +1,591 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_csf_csg_debugfs.h" ++#include ++#include ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include "mali_kbase_csf_tl_reader.h" ++ ++/** ++ * blocked_reason_to_string() - Convert blocking reason id to a string ++ * ++ * @reason_id: blocked_reason ++ * ++ * Return: Suitable string ++ */ ++static const char *blocked_reason_to_string(u32 reason_id) ++{ ++ /* possible blocking reasons of a cs */ ++ static const char *const cs_blocked_reason[] = { ++ [CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED", ++ [CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT", ++ [CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = ++ "PROGRESS_WAIT", ++ [CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT", ++ [CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED", ++ [CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE", ++ [CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH" ++ }; ++ ++ if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason))) ++ return "UNKNOWN_BLOCKED_REASON_ID"; ++ ++ return cs_blocked_reason[reason_id]; ++} ++ ++static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( ++ struct seq_file *file, u32 wait_status, u32 wait_sync_value, ++ u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, ++ u32 blocked_reason) ++{ ++#define WAITING "Waiting" ++#define NOT_WAITING "Not waiting" ++ ++ seq_printf(file, "SB_MASK: %d\n", ++ CS_STATUS_WAIT_SB_MASK_GET(wait_status)); ++ seq_printf(file, "PROGRESS_WAIT: %s\n", ++ CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? ++ WAITING : NOT_WAITING); ++ seq_printf(file, "PROTM_PEND: %s\n", ++ CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? ++ WAITING : NOT_WAITING); ++ seq_printf(file, "SYNC_WAIT: %s\n", ++ CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? ++ WAITING : NOT_WAITING); ++ seq_printf(file, "WAIT_CONDITION: %s\n", ++ CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? ++ "greater than" : "less or equal"); ++ seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer); ++ seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value); ++ seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value); ++ seq_printf(file, "SB_STATUS: %u\n", ++ CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status)); ++ seq_printf(file, "BLOCKED_REASON: %s\n", ++ blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET( ++ blocked_reason))); ++} ++ ++static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, ++ struct kbase_csf_cmd_stream_info const *const stream) ++{ ++ u32 val = kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_BASE_LO); ++ u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_BASE_HI) << 32) | val; ++ val = kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_SIZE); ++ ++ seq_printf(file, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val); ++ ++ /* Write offset variable address (pointer) */ ++ val = kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_OFFSET_POINTER_LO); ++ addr = ((u64)kbase_csf_firmware_cs_input_read(stream, ++ CS_INSTR_BUFFER_OFFSET_POINTER_HI) << 32) | val; ++ seq_printf(file, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr); ++ ++ /* EVENT_SIZE and EVENT_STATEs */ ++ val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG); ++ seq_printf(file, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STAES 0x%x\n", ++ CS_INSTR_CONFIG_EVENT_SIZE_GET(val), ++ CS_INSTR_CONFIG_EVENT_STATE_GET(val)); ++} ++ ++/** ++ * kbasep_csf_scheduler_dump_active_queue() - Print GPU command queue ++ * debug information ++ * ++ * @file: seq_file for printing to ++ * @queue: Address of a GPU command queue to examine ++ */ ++static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, ++ struct kbase_queue *queue) ++{ ++ u32 *addr; ++ u64 cs_extract; ++ u64 cs_insert; ++ u32 cs_active; ++ u64 wait_sync_pointer; ++ u32 wait_status, wait_sync_value; ++ u32 sb_status; ++ u32 blocked_reason; ++ struct kbase_vmap_struct *mapping; ++ u64 *evt; ++ u64 wait_sync_live_value; ++ ++ if (!queue) ++ return; ++ ++ if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || ++ !queue->group)) ++ return; ++ ++ /* Ring the doorbell to have firmware update CS_EXTRACT */ ++ kbase_csf_ring_cs_user_doorbell(queue->kctx->kbdev, queue); ++ msleep(100); ++ ++ addr = (u32 *)queue->user_io_addr; ++ cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); ++ ++ addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); ++ cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); ++ cs_active = addr[CS_ACTIVE/4]; ++ ++#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ ++ "Bind Idx, Ringbuf addr, Prio, Insert offset, Extract offset, Active, Doorbell\n" ++ ++ seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %4u, %16llx, %16llx, %6u, %8d\n", ++ queue->csi_index, queue->base_addr, queue->priority, ++ cs_insert, cs_extract, cs_active, queue->doorbell_nr); ++ ++ /* Print status information for blocked group waiting for sync object. For on-slot queues, ++ * if cs_trace is enabled, dump the interface's cs_trace configuration. ++ */ ++ if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) { ++ if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { ++ wait_status = queue->status_wait; ++ wait_sync_value = queue->sync_value; ++ wait_sync_pointer = queue->sync_ptr; ++ sb_status = queue->sb_status; ++ blocked_reason = queue->blocked_reason; ++ ++ evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); ++ if (evt) { ++ wait_sync_live_value = evt[0]; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ } else { ++ wait_sync_live_value = U64_MAX; ++ } ++ ++ kbasep_csf_scheduler_dump_active_queue_cs_status_wait( ++ file, wait_status, wait_sync_value, ++ wait_sync_live_value, wait_sync_pointer, ++ sb_status, blocked_reason); ++ } ++ } else { ++ struct kbase_device const *const kbdev = ++ queue->group->kctx->kbdev; ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; ++ struct kbase_csf_cmd_stream_info const *const stream = ++ &ginfo->streams[queue->csi_index]; ++ u64 cmd_ptr; ++ u32 req_res; ++ ++ if (WARN_ON(!stream)) ++ return; ++ ++ cmd_ptr = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_CMD_PTR_LO); ++ cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_CMD_PTR_HI) << 32; ++ req_res = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_REQ_RESOURCE); ++ ++ seq_printf(file, "CMD_PTR: 0x%llx\n", cmd_ptr); ++ seq_printf(file, "REQ_RESOURCE [COMPUTE]: %d\n", ++ CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res)); ++ seq_printf(file, "REQ_RESOURCE [FRAGMENT]: %d\n", ++ CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res)); ++ seq_printf(file, "REQ_RESOURCE [TILER]: %d\n", ++ CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res)); ++ seq_printf(file, "REQ_RESOURCE [IDVS]: %d\n", ++ CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res)); ++ ++ wait_status = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT); ++ wait_sync_value = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_VALUE); ++ wait_sync_pointer = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_LO); ++ wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++ ++ sb_status = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_SCOREBOARDS); ++ blocked_reason = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_BLOCKED_REASON); ++ ++ evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); ++ if (evt) { ++ wait_sync_live_value = evt[0]; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ } else { ++ wait_sync_live_value = U64_MAX; ++ } ++ ++ kbasep_csf_scheduler_dump_active_queue_cs_status_wait( ++ file, wait_status, wait_sync_value, ++ wait_sync_live_value, wait_sync_pointer, sb_status, ++ blocked_reason); ++ /* Dealing with cs_trace */ ++ if (kbase_csf_scheduler_queue_has_trace(queue)) ++ kbasep_csf_scheduler_dump_active_cs_trace(file, stream); ++ else ++ seq_puts(file, "NO CS_TRACE\n"); ++ } ++ ++ seq_puts(file, "\n"); ++} ++ ++/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */ ++#define CSF_STATUS_UPDATE_TO_MS (100) ++ ++static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, ++ struct kbase_queue_group *const group) ++{ ++ if (kbase_csf_scheduler_group_get_slot(group) >= 0) { ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ unsigned long flags; ++ u32 ep_c, ep_r; ++ char exclusive; ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[group->csg_nr]; ++ long remaining = ++ kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS); ++ u8 slot_priority = ++ kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ++ ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), ++ CSG_REQ_STATUS_UPDATE_MASK); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr); ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ ++ kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & ++ CSG_REQ_STATUS_UPDATE_MASK), remaining); ++ ++ ep_c = kbase_csf_firmware_csg_output(ginfo, ++ CSG_STATUS_EP_CURRENT); ++ ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); ++ ++ if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r)) ++ exclusive = 'C'; ++ else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r)) ++ exclusive = 'F'; ++ else ++ exclusive = '0'; ++ ++ if (!remaining) { ++ dev_err(kbdev->dev, ++ "Timed out for STATUS_UPDATE on group %d on slot %d", ++ group->handle, group->csg_nr); ++ ++ seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", ++ group->csg_nr); ++ seq_printf(file, "*** The following group-record is likely stale\n"); ++ } ++ ++ seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n"); ++ seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n", ++ group->handle, ++ group->csg_nr, ++ slot_priority, ++ group->run_state, ++ group->priority, ++ CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), ++ CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), ++ CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), ++ CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), ++ CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), ++ CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), ++ exclusive); ++ } else { ++ seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); ++ seq_printf(file, "%7d, %6d, %9d, %8d\n", ++ group->handle, ++ group->csg_nr, ++ group->run_state, ++ group->priority); ++ } ++ ++ if (group->run_state != KBASE_CSF_GROUP_TERMINATED) { ++ unsigned int i; ++ ++ seq_puts(file, "Bound queues:\n"); ++ ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ kbasep_csf_scheduler_dump_active_queue(file, ++ group->bound_queues[i]); ++ } ++ } ++ ++ seq_puts(file, "\n"); ++} ++ ++/** ++ * kbasep_csf_queue_group_debugfs_show() - Print per-context GPU command queue ++ * group debug information ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase context ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, ++ void *data) ++{ ++ u32 gr; ++ struct kbase_context *const kctx = file->private; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ if (WARN_ON(!kctx)) ++ return -EINVAL; ++ ++ seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", ++ MALI_CSF_CSG_DEBUGFS_VERSION); ++ ++ mutex_lock(&kctx->csf.lock); ++ kbase_csf_scheduler_lock(kbdev); ++ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { ++ struct kbase_queue_group *const group = ++ kctx->csf.queue_groups[gr]; ++ ++ if (group) ++ kbasep_csf_scheduler_dump_active_group(file, group); ++ } ++ kbase_csf_scheduler_unlock(kbdev); ++ mutex_unlock(&kctx->csf.lock); ++ ++ return 0; ++} ++ ++/** ++ * kbasep_csf_scheduler_dump_active_groups() - Print debug info for active ++ * GPU command queue groups ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_device ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, ++ void *data) ++{ ++ u32 csg_nr; ++ struct kbase_device *kbdev = file->private; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ ++ seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", ++ MALI_CSF_CSG_DEBUGFS_VERSION); ++ ++ kbase_csf_scheduler_lock(kbdev); ++ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++ ++ if (!group) ++ continue; ++ ++ seq_printf(file, "\nCtx %d_%d\n", group->kctx->tgid, ++ group->kctx->id); ++ ++ kbasep_csf_scheduler_dump_active_group(file, group); ++ } ++ kbase_csf_scheduler_unlock(kbdev); ++ ++ return 0; ++} ++ ++static int kbasep_csf_queue_group_debugfs_open(struct inode *in, ++ struct file *file) ++{ ++ return single_open(file, kbasep_csf_queue_group_debugfs_show, ++ in->i_private); ++} ++ ++static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, ++ struct file *file) ++{ ++ return single_open(file, kbasep_csf_scheduler_dump_active_groups, ++ in->i_private); ++} ++ ++static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { ++ .open = kbasep_csf_queue_group_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; ++#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) ++ const mode_t mode = 0444; ++#else ++ const mode_t mode = 0400; ++#endif ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ file = debugfs_create_file("groups", mode, ++ kctx->kctx_dentry, kctx, &kbasep_csf_queue_group_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create per context queue groups debugfs entry"); ++ } ++} ++ ++static const struct file_operations ++ kbasep_csf_active_queue_groups_debugfs_fops = { ++ .open = kbasep_csf_active_queue_groups_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++static int kbasep_csf_debugfs_scheduling_timer_enabled_get( ++ void *data, u64 *val) ++{ ++ struct kbase_device *const kbdev = data; ++ ++ *val = kbase_csf_scheduler_timer_is_enabled(kbdev); ++ ++ return 0; ++} ++ ++static int kbasep_csf_debugfs_scheduling_timer_enabled_set( ++ void *data, u64 val) ++{ ++ struct kbase_device *const kbdev = data; ++ ++ kbase_csf_scheduler_timer_set_enabled(kbdev, val != 0); ++ ++ return 0; ++} ++ ++static int kbasep_csf_debugfs_scheduling_timer_kick_set( ++ void *data, u64 val) ++{ ++ struct kbase_device *const kbdev = data; ++ ++ kbase_csf_scheduler_kick(kbdev); ++ ++ return 0; ++} ++ ++DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, ++ &kbasep_csf_debugfs_scheduling_timer_enabled_get, ++ &kbasep_csf_debugfs_scheduling_timer_enabled_set, ++ "%llu\n"); ++DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, ++ NULL, ++ &kbasep_csf_debugfs_scheduling_timer_kick_set, ++ "%llu\n"); ++ ++/** ++ * kbase_csf_debugfs_scheduler_suspend_get() - get if the scheduler is suspended. ++ * ++ * @data: The debugfs dentry private data, a pointer to kbase_device ++ * @val: The debugfs output value, boolean: 1 suspended, 0 otherwise ++ * ++ * Return: 0 ++ */ ++static int kbase_csf_debugfs_scheduler_suspend_get( ++ void *data, u64 *val) ++{ ++ struct kbase_device *kbdev = data; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ kbase_csf_scheduler_lock(kbdev); ++ *val = (scheduler->state == SCHED_SUSPENDED); ++ kbase_csf_scheduler_unlock(kbdev); ++ ++ return 0; ++} ++ ++/** ++ * kbase_csf_debugfs_scheduler_suspend_set() - set the scheduler to suspended. ++ * ++ * @data: The debugfs dentry private data, a pointer to kbase_device ++ * @val: The debugfs input value, boolean: 1 suspend, 0 otherwise ++ * ++ * Return: Negative value if already in requested state, 0 otherwise. ++ */ ++static int kbase_csf_debugfs_scheduler_suspend_set( ++ void *data, u64 val) ++{ ++ struct kbase_device *kbdev = data; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ enum kbase_csf_scheduler_state state; ++ ++ kbase_csf_scheduler_lock(kbdev); ++ state = scheduler->state; ++ kbase_csf_scheduler_unlock(kbdev); ++ ++ if (val && (state != SCHED_SUSPENDED)) ++ kbase_csf_scheduler_pm_suspend(kbdev); ++ else if (!val && (state == SCHED_SUSPENDED)) ++ kbase_csf_scheduler_pm_resume(kbdev); ++ else ++ return -1; ++ ++ return 0; ++} ++ ++DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduler_suspend_fops, ++ &kbase_csf_debugfs_scheduler_suspend_get, ++ &kbase_csf_debugfs_scheduler_suspend_set, ++ "%llu\n"); ++ ++void kbase_csf_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("active_groups", 0444, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_active_queue_groups_debugfs_fops); ++ ++ debugfs_create_file("scheduling_timer_enabled", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_debugfs_scheduling_timer_enabled_fops); ++ debugfs_create_file("scheduling_timer_kick", 0200, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_debugfs_scheduling_timer_kick_fops); ++ debugfs_create_file("scheduler_suspend", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_debugfs_scheduler_suspend_fops); ++ ++ kbase_csf_tl_reader_debugfs_init(kbdev); ++ kbase_csf_firmware_trace_buffer_debugfs_init(kbdev); ++} ++ ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) ++{ ++} ++ ++void kbase_csf_debugfs_init(struct kbase_device *kbdev) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h +new file mode 100644 +index 0000000..397e657 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_CSG_DEBUGFS_H_ ++#define _KBASE_CSF_CSG_DEBUGFS_H_ ++ ++/* Forward declarations */ ++struct kbase_device; ++struct kbase_context; ++struct kbase_queue_group; ++ ++#define MALI_CSF_CSG_DEBUGFS_VERSION 0 ++ ++/** ++ * kbase_csf_queue_group_debugfs_init() - Add debugfs entry for queue groups ++ * associated with @kctx. ++ * ++ * @kctx: Pointer to kbase_context ++ */ ++void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_debugfs_init() - Add a global debugfs entry for queue groups ++ * ++ * @kbdev: Pointer to the device ++ */ ++void kbase_csf_debugfs_init(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h +new file mode 100644 +index 0000000..53526ce +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h +@@ -0,0 +1,1254 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* Definitions (types, defines, etcs) common to the CSF. ++ * They are placed here to allow the hierarchy of header files to work. ++ */ ++ ++#ifndef _KBASE_CSF_DEFS_H_ ++#define _KBASE_CSF_DEFS_H_ ++ ++#include ++#include ++ ++#include "mali_kbase_csf_firmware.h" ++ ++/* Maximum number of KCPU command queues to be created per GPU address space. ++ */ ++#define KBASEP_MAX_KCPU_QUEUES ((size_t)256) ++ ++/* Maximum number of GPU command queue groups to be created per GPU address ++ * space. ++ */ ++#define MAX_QUEUE_GROUP_NUM (256) ++ ++/* Maximum number of GPU tiler heaps to allow to be created per GPU address ++ * space. ++ */ ++#define MAX_TILER_HEAPS (128) ++ ++#define CSF_FIRMWARE_ENTRY_READ (1ul << 0) ++#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1) ++#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2) ++#define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3) ++#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5) ++#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30) ++#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) ++ ++/** ++ * enum kbase_csf_bind_state - bind state of the queue ++ * ++ * @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link ++ * between queue and the group to which it was bound or being bound is removed. ++ * @KBASE_CSF_QUEUE_BIND_IN_PROGRESS: Set when the first part of bind operation ++ * has completed i.e. CS_QUEUE_BIND ioctl. ++ * @KBASE_CSF_QUEUE_BOUND: Set when the bind operation has completed i.e. IO ++ * pages have been mapped in the process address space. ++ */ ++enum kbase_csf_queue_bind_state { ++ KBASE_CSF_QUEUE_UNBOUND, ++ KBASE_CSF_QUEUE_BIND_IN_PROGRESS, ++ KBASE_CSF_QUEUE_BOUND, ++}; ++ ++/** ++ * enum kbase_csf_reset_gpu_state - state of the gpu reset ++ * ++ * @KBASE_CSF_RESET_GPU_NOT_PENDING: Set when the GPU reset isn't pending ++ * ++ * @KBASE_CSF_RESET_GPU_PREPARED: Set when kbase_prepare_to_reset_gpu() has ++ * been called. This is just for debugging checks to encourage callers to call ++ * kbase_prepare_to_reset_gpu() before kbase_reset_gpu(). ++ * ++ * @KBASE_CSF_RESET_GPU_COMMITTED: Set when the GPU reset process has been ++ * committed and so will definitely happen, but the procedure to reset the GPU ++ * has not yet begun. Other threads must finish accessing the HW before we ++ * reach %KBASE_CSF_RESET_GPU_HAPPENING. ++ * ++ * @KBASE_CSF_RESET_GPU_HAPPENING: Set when the GPU reset process is occurring ++ * (silent or otherwise), and is actively accessing the HW. Any changes to the ++ * HW in other threads might get lost, overridden, or corrupted. ++ * ++ * @KBASE_CSF_RESET_GPU_COMMITTED_SILENT: Set when the GPU reset process has ++ * been committed but has not started happening. This is used when resetting ++ * the GPU as part of normal behavior (e.g. when exiting protected mode). ++ * Other threads must finish accessing the HW before we reach ++ * %KBASE_CSF_RESET_GPU_HAPPENING. ++ * ++ * @KBASE_CSF_RESET_GPU_FAILED: Set when an error is encountered during the ++ * GPU reset process. No more work could then be executed on GPU, unloading ++ * the Driver module is the only option. ++ */ ++enum kbase_csf_reset_gpu_state { ++ KBASE_CSF_RESET_GPU_NOT_PENDING, ++ KBASE_CSF_RESET_GPU_PREPARED, ++ KBASE_CSF_RESET_GPU_COMMITTED, ++ KBASE_CSF_RESET_GPU_HAPPENING, ++ KBASE_CSF_RESET_GPU_COMMITTED_SILENT, ++ KBASE_CSF_RESET_GPU_FAILED, ++}; ++ ++/** ++ * enum kbase_csf_group_state - state of the GPU command queue group ++ * ++ * @KBASE_CSF_GROUP_INACTIVE: Group is inactive and won't be ++ * considered by scheduler for running on ++ * CSG slot. ++ * @KBASE_CSF_GROUP_RUNNABLE: Group is in the list of runnable groups ++ * and is subjected to time-slice based ++ * scheduling. A start request would be ++ * sent (or already has been sent) if the ++ * group is assigned the CS ++ * group slot for the fist time. ++ * @KBASE_CSF_GROUP_IDLE: Group is currently on a CSG slot ++ * but all the CSs bound to the group have ++ * become either idle or waiting on sync ++ * object. ++ * Group could be evicted from the slot on ++ * the next tick if there are no spare ++ * slots left after scheduling non-idle ++ * queue groups. If the group is kept on ++ * slot then it would be moved to the ++ * RUNNABLE state, also if one of the ++ * queues bound to the group is kicked it ++ * would be moved to the RUNNABLE state. ++ * If the group is evicted from the slot it ++ * would be moved to either ++ * KBASE_CSF_GROUP_SUSPENDED_ON_IDLE or ++ * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC ++ * state. ++ * @KBASE_CSF_GROUP_SUSPENDED: Group was evicted from the CSG slot ++ * and is not running but is still in the ++ * list of runnable groups and subjected ++ * to time-slice based scheduling. A resume ++ * request would be sent when a CSG slot is ++ * re-assigned to the group and once the ++ * resume is complete group would be moved ++ * back to the RUNNABLE state. ++ * @KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: Same as KBASE_CSF_GROUP_SUSPENDED except ++ * that queue group also became idle before ++ * the suspension. This state helps ++ * Scheduler avoid scheduling the idle ++ * groups over the non-idle groups in the ++ * subsequent ticks. If one of the queues ++ * bound to the group is kicked it would be ++ * moved to the SUSPENDED state. ++ * @KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: Same as GROUP_SUSPENDED_ON_IDLE ++ * except that at least one CS ++ * bound to this group was ++ * waiting for synchronization object ++ * before the suspension. ++ * @KBASE_CSF_GROUP_FAULT_EVICTED: Group is evicted from the scheduler due ++ * to a fault condition, pending to be ++ * terminated. ++ * @KBASE_CSF_GROUP_TERMINATED: Group is no longer schedulable and is ++ * pending to be deleted by Client, all the ++ * queues bound to it have been unbound. ++ */ ++enum kbase_csf_group_state { ++ KBASE_CSF_GROUP_INACTIVE, ++ KBASE_CSF_GROUP_RUNNABLE, ++ KBASE_CSF_GROUP_IDLE, ++ KBASE_CSF_GROUP_SUSPENDED, ++ KBASE_CSF_GROUP_SUSPENDED_ON_IDLE, ++ KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, ++ KBASE_CSF_GROUP_FAULT_EVICTED, ++ KBASE_CSF_GROUP_TERMINATED, ++}; ++ ++/** ++ * enum kbase_csf_csg_slot_state - state of the command queue group slots under ++ * the scheduler control. ++ * ++ * @CSG_SLOT_READY: The slot is clean and ready to be programmed with a ++ * queue group. ++ * @CSG_SLOT_READY2RUN: The slot has been programmed with a queue group, i.e. a ++ * start or resume request has been sent to the firmware. ++ * @CSG_SLOT_RUNNING: The queue group is running on the slot, acknowledgment ++ * of a start or resume request has been obtained from the ++ * firmware. ++ * @CSG_SLOT_DOWN2STOP: The suspend or terminate request for the queue group on ++ * the slot has been sent to the firmware. ++ * @CSG_SLOT_STOPPED: The queue group is removed from the slot, acknowledgment ++ * of suspend or terminate request has been obtained from ++ * the firmware. ++ * @CSG_SLOT_READY2RUN_TIMEDOUT: The start or resume request sent on the slot ++ * for the queue group timed out. ++ * @CSG_SLOT_DOWN2STOP_TIMEDOUT: The suspend or terminate request for queue ++ * group on the slot timed out. ++ */ ++enum kbase_csf_csg_slot_state { ++ CSG_SLOT_READY, ++ CSG_SLOT_READY2RUN, ++ CSG_SLOT_RUNNING, ++ CSG_SLOT_DOWN2STOP, ++ CSG_SLOT_STOPPED, ++ CSG_SLOT_READY2RUN_TIMEDOUT, ++ CSG_SLOT_DOWN2STOP_TIMEDOUT, ++}; ++ ++/** ++ * enum kbase_csf_scheduler_state - state of the scheduler operational phases. ++ * ++ * @SCHED_BUSY: The scheduler is busy performing on tick schedule ++ * operations, the state of CSG slots ++ * can't be changed. ++ * @SCHED_INACTIVE: The scheduler is inactive, it is allowed to modify the ++ * state of CSG slots by in-cycle ++ * priority scheduling. ++ * @SCHED_SUSPENDED: The scheduler is in low-power mode with scheduling ++ * operations suspended and is not holding the power ++ * management reference. This can happen if the GPU ++ * becomes idle for a duration exceeding a threshold, ++ * or due to a system triggered suspend action. ++ */ ++enum kbase_csf_scheduler_state { ++ SCHED_BUSY, ++ SCHED_INACTIVE, ++ SCHED_SUSPENDED, ++}; ++ ++/** ++ * enum kbase_queue_group_priority - Kbase internal relative priority list. ++ * ++ * @KBASE_QUEUE_GROUP_PRIORITY_REALTIME: The realtime queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_HIGH: The high queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_MEDIUM: The medium queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_LOW: The low queue group priority. ++ * @KBASE_QUEUE_GROUP_PRIORITY_COUNT: The number of priority levels. ++ */ ++enum kbase_queue_group_priority { ++ KBASE_QUEUE_GROUP_PRIORITY_REALTIME = 0, ++ KBASE_QUEUE_GROUP_PRIORITY_HIGH, ++ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ KBASE_QUEUE_GROUP_PRIORITY_LOW, ++ KBASE_QUEUE_GROUP_PRIORITY_COUNT ++}; ++ ++ ++/** ++ * struct kbase_csf_notification - Event or error generated as part of command ++ * queue execution ++ * ++ * @data: Event or error data returned to userspace ++ * @link: Link to the linked list, &struct_kbase_csf_context.error_list. ++ */ ++struct kbase_csf_notification { ++ struct base_csf_notification data; ++ struct list_head link; ++}; ++ ++/** ++ * struct kbase_queue - Object representing a GPU command queue. ++ * ++ * @kctx: Pointer to the base context with which this GPU command queue ++ * is associated. ++ * @reg: Pointer to the region allocated from the shared ++ * interface segment for mapping the User mode ++ * input/output pages in MCU firmware address space. ++ * @phys: Pointer to the physical pages allocated for the ++ * pair or User mode input/output page ++ * @user_io_addr: Pointer to the permanent kernel mapping of User mode ++ * input/output pages. The pages can be accessed through ++ * the mapping without any cache maintenance. ++ * @handle: Handle returned with bind ioctl for creating a ++ * contiguous User mode mapping of input/output pages & ++ * the hardware doorbell page. ++ * @doorbell_nr: Index of the hardware doorbell page assigned to the ++ * queue. ++ * @db_file_offset: File offset value that is assigned to userspace mapping ++ * created on bind to access the doorbell page. ++ * It is in page units. ++ * @link: Link to the linked list of GPU command queues created per ++ * GPU address space. ++ * @refcount: Reference count, stands for the number of times the queue ++ * has been referenced. The reference is taken when it is ++ * created, when it is bound to the group and also when the ++ * @oom_event_work work item is queued ++ * for it. ++ * @group: Pointer to the group to which this queue is bound. ++ * @queue_reg: Pointer to the VA region allocated for CS buffer. ++ * @oom_event_work: Work item corresponding to the out of memory event for ++ * chunked tiler heap being used for this queue. ++ * @base_addr: Base address of the CS buffer. ++ * @size: Size of the CS buffer. ++ * @priority: Priority of this queue within the group. ++ * @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state ++ * @csi_index: The ID of the assigned CS hardware interface. ++ * @enabled: Indicating whether the CS is running, or not. ++ * @status_wait: Value of CS_STATUS_WAIT register of the CS will ++ * be kept when the CS gets blocked by sync wait. ++ * CS_STATUS_WAIT provides information on conditions queue is ++ * blocking on. This is set when the group, to which queue is ++ * bound, is suspended after getting blocked, i.e. in ++ * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC state. ++ * @sync_ptr: Value of CS_STATUS_WAIT_SYNC_POINTER register of the CS ++ * will be kept when the CS gets blocked by ++ * sync wait. CS_STATUS_WAIT_SYNC_POINTER contains the address ++ * of synchronization object being waited on. ++ * Valid only when @status_wait is set. ++ * @sync_value: Value of CS_STATUS_WAIT_SYNC_VALUE register of the CS ++ * will be kept when the CS gets blocked by ++ * sync wait. CS_STATUS_WAIT_SYNC_VALUE contains the value ++ * tested against the synchronization object. ++ * Valid only when @status_wait is set. ++ * @sb_status: Value indicates which of the scoreboard entries in the queue ++ * are non-zero ++ * @blocked_reason: Value shows if the queue is blocked, and if so, ++ * the reason why it is blocked ++ * @trace_buffer_base: CS trace buffer base address. ++ * @trace_offset_ptr: Pointer to the CS trace buffer offset variable. ++ * @trace_buffer_size: CS trace buffer size for the queue. ++ * @trace_cfg: CS trace configuration parameters. ++ * @error: GPU command queue fatal information to pass to user space. ++ * @fatal_event_work: Work item to handle the CS fatal event reported for this ++ * queue. ++ * @cs_fatal_info: Records additional information about the CS fatal event. ++ * @cs_fatal: Records information about the CS fatal event. ++ */ ++struct kbase_queue { ++ struct kbase_context *kctx; ++ struct kbase_va_region *reg; ++ struct tagged_addr phys[2]; ++ char *user_io_addr; ++ u64 handle; ++ int doorbell_nr; ++ unsigned long db_file_offset; ++ struct list_head link; ++ atomic_t refcount; ++ struct kbase_queue_group *group; ++ struct kbase_va_region *queue_reg; ++ struct work_struct oom_event_work; ++ u64 base_addr; ++ u32 size; ++ u8 priority; ++ s8 csi_index; ++ enum kbase_csf_queue_bind_state bind_state; ++ bool enabled; ++ u32 status_wait; ++ u64 sync_ptr; ++ u32 sync_value; ++ u32 sb_status; ++ u32 blocked_reason; ++ u64 trace_buffer_base; ++ u64 trace_offset_ptr; ++ u32 trace_buffer_size; ++ u32 trace_cfg; ++ struct kbase_csf_notification error; ++ struct work_struct fatal_event_work; ++ u64 cs_fatal_info; ++ u32 cs_fatal; ++}; ++ ++/** ++ * struct kbase_normal_suspend_buffer - Object representing a normal ++ * suspend buffer for queue group. ++ * @reg: Memory region allocated for the normal-mode suspend buffer. ++ * @phy: Array of physical memory pages allocated for the normal- ++ * mode suspend buffer. ++ */ ++struct kbase_normal_suspend_buffer { ++ struct kbase_va_region *reg; ++ struct tagged_addr *phy; ++}; ++ ++/** ++ * struct kbase_protected_suspend_buffer - Object representing a protected ++ * suspend buffer for queue group. ++ * @reg: Memory region allocated for the protected-mode suspend buffer. ++ * @pma: Array of pointer to protected mode allocations containing ++ * information about memory pages allocated for protected mode ++ * suspend buffer. ++ */ ++struct kbase_protected_suspend_buffer { ++ struct kbase_va_region *reg; ++ struct protected_memory_allocation **pma; ++}; ++ ++/** ++ * struct kbase_queue_group - Object representing a GPU command queue group. ++ * ++ * @kctx: Pointer to the kbase context with which this queue group ++ * is associated. ++ * @normal_suspend_buf: Object representing the normal suspend buffer. ++ * Normal-mode suspend buffer that is used for ++ * group context switch. ++ * @protected_suspend_buf: Object representing the protected suspend ++ * buffer. Protected-mode suspend buffer that is ++ * used for group context switch. ++ * @handle: Handle which identifies this queue group. ++ * @csg_nr: Number/index of the CSG to which this queue group is ++ * mapped; KBASEP_CSG_NR_INVALID indicates that the queue ++ * group is not scheduled. ++ * @priority: Priority of the queue group, 0 being the highest, ++ * BASE_QUEUE_GROUP_PRIORITY_COUNT - 1 being the lowest. ++ * @tiler_max: Maximum number of tiler endpoints the group is allowed ++ * to use. ++ * @fragment_max: Maximum number of fragment endpoints the group is ++ * allowed to use. ++ * @compute_max: Maximum number of compute endpoints the group is ++ * allowed to use. ++ * @tiler_mask: Mask of tiler endpoints the group is allowed to use. ++ * @fragment_mask: Mask of fragment endpoints the group is allowed to use. ++ * @compute_mask: Mask of compute endpoints the group is allowed to use. ++ * @group_uid: 32-bit wide unsigned identifier for the group, unique ++ * across all kbase devices and contexts. ++ * @link: Link to this queue group in the 'runnable_groups' list of ++ * the corresponding kctx. ++ * @link_to_schedule: Link to this queue group in the list of prepared groups ++ * to be scheduled, if the group is runnable/suspended. ++ * If the group is idle or waiting for CQS, it would be a ++ * link to the list of idle/blocked groups list. ++ * @run_state: Current state of the queue group. ++ * @prepared_seq_num: Indicates the position of queue group in the list of ++ * prepared groups to be scheduled. ++ * @scan_seq_num: Scan out sequence number before adjusting for dynamic ++ * idle conditions. It is used for setting a group's ++ * onslot priority. It could differ from prepared_seq_number ++ * when there are idle groups. ++ * @faulted: Indicates that a GPU fault occurred for the queue group. ++ * This flag persists until the fault has been queued to be ++ * reported to userspace. ++ * @bound_queues: Array of registered queues bound to this queue group. ++ * @doorbell_nr: Index of the hardware doorbell page assigned to the ++ * group. ++ * @protm_event_work: Work item corresponding to the protected mode entry ++ * event for this queue. ++ * @protm_pending_bitmap: Bit array to keep a track of CSs that ++ * have pending protected mode entry requests. ++ * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be ++ * returned to userspace if such an error has occurred. ++ * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT ++ * to be returned to userspace if such an error has occurred. ++ * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM ++ * to be returned to userspace if such an error has occurred. ++ * @timer_event_work: Work item to handle the progress timeout fatal event ++ * for the group. ++ */ ++struct kbase_queue_group { ++ struct kbase_context *kctx; ++ struct kbase_normal_suspend_buffer normal_suspend_buf; ++ struct kbase_protected_suspend_buffer protected_suspend_buf; ++ u8 handle; ++ s8 csg_nr; ++ u8 priority; ++ ++ u8 tiler_max; ++ u8 fragment_max; ++ u8 compute_max; ++ ++ u64 tiler_mask; ++ u64 fragment_mask; ++ u64 compute_mask; ++ ++ u32 group_uid; ++ ++ struct list_head link; ++ struct list_head link_to_schedule; ++ enum kbase_csf_group_state run_state; ++ u32 prepared_seq_num; ++ u32 scan_seq_num; ++ bool faulted; ++ ++ struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; ++ ++ int doorbell_nr; ++ struct work_struct protm_event_work; ++ DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); ++ ++ struct kbase_csf_notification error_fatal; ++ struct kbase_csf_notification error_timeout; ++ struct kbase_csf_notification error_tiler_oom; ++ ++ struct work_struct timer_event_work; ++}; ++ ++/** ++ * struct kbase_csf_kcpu_queue_context - Object representing the kernel CPU ++ * queues for a GPU address space. ++ * ++ * @lock: Lock preventing concurrent access to @array and the @in_use bitmap. ++ * @array: Array of pointers to kernel CPU command queues. ++ * @in_use: Bitmap which indicates which kernel CPU command queues are in use. ++ * @wq: Dedicated workqueue for processing kernel CPU command queues. ++ * @num_cmds: The number of commands that have been enqueued across ++ * all the KCPU command queues. This could be used as a ++ * timestamp to determine the command's enqueueing time. ++ * @jit_cmds_head: A list of the just-in-time memory commands, both ++ * allocate & free, in submission order, protected ++ * by kbase_csf_kcpu_queue_context.lock. ++ * @jit_blocked_queues: A list of KCPU command queues blocked by a pending ++ * just-in-time memory allocation command which will be ++ * reattempted after the impending free of other active ++ * allocations. ++ */ ++struct kbase_csf_kcpu_queue_context { ++ struct mutex lock; ++ struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES]; ++ DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); ++ struct workqueue_struct *wq; ++ u64 num_cmds; ++ ++ struct list_head jit_cmds_head; ++ struct list_head jit_blocked_queues; ++}; ++ ++/** ++ * struct kbase_csf_cpu_queue_context - Object representing the cpu queue ++ * information. ++ * ++ * @buffer: Buffer containing CPU queue information provided by Userspace. ++ * @buffer_size: The size of @buffer. ++ * @dump_req_status: Indicates the current status for CPU queues dump request. ++ * @dump_cmp: Dumping cpu queue completion event. ++ */ ++struct kbase_csf_cpu_queue_context { ++ char *buffer; ++ size_t buffer_size; ++ atomic_t dump_req_status; ++ struct completion dump_cmp; ++}; ++ ++/** ++ * struct kbase_csf_heap_context_allocator - Allocator of heap contexts ++ * ++ * Heap context structures are allocated by the kernel for use by the firmware. ++ * The current implementation subdivides a single GPU memory region for use as ++ * a sparse array. ++ * ++ * @kctx: Pointer to the kbase context with which this allocator is ++ * associated. ++ * @region: Pointer to a GPU memory region from which heap context structures ++ * are allocated. NULL if no heap contexts have been allocated. ++ * @gpu_va: GPU virtual address of the start of the region from which heap ++ * context structures are allocated. 0 if no heap contexts have been ++ * allocated. ++ * @lock: Lock preventing concurrent access to the @in_use bitmap. ++ * @in_use: Bitmap that indicates which heap context structures are currently ++ * allocated (in @region). ++ */ ++struct kbase_csf_heap_context_allocator { ++ struct kbase_context *kctx; ++ struct kbase_va_region *region; ++ u64 gpu_va; ++ struct mutex lock; ++ DECLARE_BITMAP(in_use, MAX_TILER_HEAPS); ++}; ++ ++/** ++ * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps ++ * context for a GPU address space. ++ * ++ * This contains all of the CSF state relating to chunked tiler heaps for one ++ * @kbase_context. It is not the same as a heap context structure allocated by ++ * the kernel for use by the firmware. ++ * ++ * @lock: Lock preventing concurrent access to the tiler heaps. ++ * @list: List of tiler heaps. ++ * @ctx_alloc: Allocator for heap context structures. ++ * @nr_of_heaps: Total number of tiler heaps that were added during the ++ * life time of the context. ++ */ ++struct kbase_csf_tiler_heap_context { ++ struct mutex lock; ++ struct list_head list; ++ struct kbase_csf_heap_context_allocator ctx_alloc; ++ u64 nr_of_heaps; ++}; ++ ++/** ++ * struct kbase_csf_scheduler_context - Object representing the scheduler's ++ * context for a GPU address space. ++ * ++ * @runnable_groups: Lists of runnable GPU command queue groups in the kctx, ++ * one per queue group relative-priority level. ++ * @num_runnable_grps: Total number of runnable groups across all priority ++ * levels in @runnable_groups. ++ * @idle_wait_groups: A list of GPU command queue groups in which all enabled ++ * GPU command queues are idle and at least one of them ++ * is blocked on a sync wait operation. ++ * @num_idle_wait_grps: Length of the @idle_wait_groups list. ++ * @sync_update_wq: Dedicated workqueue to process work items corresponding ++ * to the sync_update events by sync_set/sync_add ++ * instruction execution on CSs bound to groups ++ * of @idle_wait_groups list. ++ * @sync_update_work: work item to process the sync_update events by ++ * sync_set / sync_add instruction execution on command ++ * streams bound to groups of @idle_wait_groups list. ++ * @ngrp_to_schedule: Number of groups added for the context to the ++ * 'groups_to_schedule' list of scheduler instance. ++ */ ++struct kbase_csf_scheduler_context { ++ struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; ++ u32 num_runnable_grps; ++ struct list_head idle_wait_groups; ++ u32 num_idle_wait_grps; ++ struct workqueue_struct *sync_update_wq; ++ struct work_struct sync_update_work; ++ u32 ngrp_to_schedule; ++}; ++ ++/** ++ * struct kbase_csf_context - Object representing CSF for a GPU address space. ++ * ++ * @event_pages_head: A list of pages allocated for the event memory used by ++ * the synchronization objects. A separate list would help ++ * in the fast lookup, since the list is expected to be short ++ * as one page would provide the memory for up to 1K ++ * synchronization objects. ++ * KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES is the upper ++ * bound on the size of event memory. ++ * @cookies: Bitmask containing of KBASE_CSF_NUM_USER_IO_PAGES_HANDLE ++ * bits, used for creating the User mode CPU mapping in a ++ * deferred manner of a pair of User mode input/output pages ++ * & a hardware doorbell page. ++ * The pages are allocated when a GPU command queue is ++ * bound to a CSG in kbase_csf_queue_bind. ++ * This helps returning unique handles to Userspace from ++ * kbase_csf_queue_bind and later retrieving the pointer to ++ * queue in the mmap handler. ++ * @user_pages_info: Array containing pointers to queue ++ * structures, used in conjunction with cookies bitmask for ++ * providing a mechansim to create a CPU mapping of ++ * input/output pages & hardware doorbell page. ++ * @lock: Serializes accesses to all members, except for ones that ++ * have their own locks. ++ * @queue_groups: Array of registered GPU command queue groups. ++ * @queue_list: Linked list of GPU command queues not yet deregistered. ++ * Note that queues can persist after deregistration if the ++ * userspace mapping created for them on bind operation ++ * hasn't been removed. ++ * @kcpu_queues: Kernel CPU command queues. ++ * @event_lock: Lock protecting access to @event_callback_list and ++ * @error_list. ++ * @event_callback_list: List of callbacks which are registered to serve CSF ++ * events. ++ * @tiler_heaps: Chunked tiler memory heaps. ++ * @wq: Dedicated workqueue to process work items corresponding ++ * to the OoM events raised for chunked tiler heaps being ++ * used by GPU command queues, and progress timeout events. ++ * @link: Link to this csf context in the 'runnable_kctxs' list of ++ * the scheduler instance ++ * @user_reg_vma: Pointer to the vma corresponding to the virtual mapping ++ * of the USER register page. Currently used only for sanity ++ * checking. ++ * @sched: Object representing the scheduler's context ++ * @error_list: List for CS fatal errors in this context. ++ * Link of fatal error is ++ * &struct_kbase_csf_notification.link. ++ * @event_lock needs to be held to access this list. ++ * @cpu_queue: CPU queue information. Only be available when DEBUG_FS ++ * is enabled. ++ */ ++struct kbase_csf_context { ++ struct list_head event_pages_head; ++ DECLARE_BITMAP(cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); ++ struct kbase_queue *user_pages_info[ ++ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE]; ++ struct mutex lock; ++ struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM]; ++ struct list_head queue_list; ++ struct kbase_csf_kcpu_queue_context kcpu_queues; ++ spinlock_t event_lock; ++ struct list_head event_callback_list; ++ struct kbase_csf_tiler_heap_context tiler_heaps; ++ struct workqueue_struct *wq; ++ struct list_head link; ++ struct vm_area_struct *user_reg_vma; ++ struct kbase_csf_scheduler_context sched; ++ struct list_head error_list; ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ struct kbase_csf_cpu_queue_context cpu_queue; ++#endif ++}; ++ ++/** ++ * struct kbase_csf_reset_gpu - Object containing the members required for ++ * GPU reset handling. ++ * @workq: Workqueue to execute the GPU reset work item @work. ++ * @work: Work item for performing the GPU reset. ++ * @wait: Wait queue used to wait for the GPU reset completion. ++ * @sem: RW Semaphore to ensure no other thread attempts to use the ++ * GPU whilst a reset is in process. Unlike traditional ++ * semaphores and wait queues, this allows Linux's lockdep ++ * mechanism to check for deadlocks involving reset waits. ++ * @state: Tracks if the GPU reset is in progress or not. ++ * The state is represented by enum @kbase_csf_reset_gpu_state. ++ */ ++struct kbase_csf_reset_gpu { ++ struct workqueue_struct *workq; ++ struct work_struct work; ++ wait_queue_head_t wait; ++ struct rw_semaphore sem; ++ atomic_t state; ++}; ++ ++/** ++ * struct kbase_csf_csg_slot - Object containing members for tracking the state ++ * of CSG slots. ++ * @resident_group: pointer to the queue group that is resident on the CSG slot. ++ * @state: state of the slot as per enum @kbase_csf_csg_slot_state. ++ * @trigger_jiffies: value of jiffies when change in slot state is recorded. ++ * @priority: dynamic priority assigned to CSG slot. ++ */ ++struct kbase_csf_csg_slot { ++ struct kbase_queue_group *resident_group; ++ atomic_t state; ++ unsigned long trigger_jiffies; ++ u8 priority; ++}; ++ ++/** ++ * struct kbase_csf_scheduler - Object representing the scheduler used for ++ * CSF for an instance of GPU platform device. ++ * @lock: Lock to serialize the scheduler operations and ++ * access to the data members. ++ * @interrupt_lock: Lock to protect members accessed by interrupt ++ * handler. ++ * @state: The operational phase the scheduler is in. Primarily ++ * used for indicating what in-cycle schedule actions ++ * are allowed. ++ * @doorbell_inuse_bitmap: Bitmap of hardware doorbell pages keeping track of ++ * which pages are currently available for assignment ++ * to clients. ++ * @csg_inuse_bitmap: Bitmap to keep a track of CSG slots ++ * that are currently in use. ++ * @csg_slots: The array for tracking the state of CS ++ * group slots. ++ * @runnable_kctxs: List of Kbase contexts that have runnable command ++ * queue groups. ++ * @groups_to_schedule: List of runnable queue groups prepared on every ++ * scheduler tick. The dynamic priority of the CSG ++ * slot assigned to a group will depend upon the ++ * position of group in the list. ++ * @ngrp_to_schedule: Number of groups in the @groups_to_schedule list, ++ * incremented when a group is added to the list, used ++ * to record the position of group in the list. ++ * @num_active_address_spaces: Number of GPU address space slots that would get ++ * used to program the groups in @groups_to_schedule ++ * list on all the available CSG ++ * slots. ++ * @num_csg_slots_for_tick: Number of CSG slots that can be ++ * active in the given tick/tock. This depends on the ++ * value of @num_active_address_spaces. ++ * @remaining_tick_slots: Tracking the number of remaining available slots ++ * for @num_csg_slots_for_tick during the scheduling ++ * operation in a tick/tock. ++ * @idle_groups_to_schedule: List of runnable queue groups, in which all GPU ++ * command queues became idle or are waiting for ++ * synchronization object, prepared on every ++ * scheduler tick. The groups in this list are ++ * appended to the tail of @groups_to_schedule list ++ * after the scan out so that the idle groups aren't ++ * preferred for scheduling over the non-idle ones. ++ * @csg_scan_count_for_tick: CSG scanout count for assign the scan_seq_num for ++ * each scanned out group during scheduling operation ++ * in a tick/tock. ++ * @total_runnable_grps: Total number of runnable groups across all KCTXs. ++ * @csgs_events_enable_mask: Use for temporary masking off asynchronous events ++ * from firmware (such as OoM events) before a group ++ * is suspended. ++ * @csg_slots_idle_mask: Bit array for storing the mask of CS ++ * group slots for which idle notification was ++ * received. ++ * @csg_slots_prio_update: Bit array for tracking slots that have an on-slot ++ * priority update operation. ++ * @last_schedule: Time in jiffies recorded when the last "tick" or ++ * "tock" schedule operation concluded. Used for ++ * evaluating the exclusion window for in-cycle ++ * schedule operation. ++ * @timer_enabled: Whether the CSF scheduler wakes itself up for ++ * periodic scheduling tasks. If this value is 0 ++ * then it will only perform scheduling under the ++ * influence of external factors e.g., IRQs, IOCTLs. ++ * @wq: Dedicated workqueue to execute the @tick_work. ++ * @tick_timer: High-resolution timer employed to schedule tick ++ * workqueue items (kernel-provided delayed_work ++ * items do not use hrtimer and for some reason do ++ * not provide sufficiently reliable periodicity). ++ * @tick_work: Work item that performs the "schedule on tick" ++ * operation to implement timeslice-based scheduling. ++ * @tock_work: Work item that would perform the schedule on tock ++ * operation to implement the asynchronous scheduling. ++ * @ping_work: Work item that would ping the firmware at regular ++ * intervals, only if there is a single active CSG ++ * slot, to check if firmware is alive and would ++ * initiate a reset if the ping request isn't ++ * acknowledged. ++ * @top_ctx: Pointer to the Kbase context corresponding to the ++ * @top_grp. ++ * @top_grp: Pointer to queue group inside @groups_to_schedule ++ * list that was assigned the highest slot priority. ++ * @tock_pending_request: A "tock" request is pending: a group that is not ++ * currently on the GPU demands to be scheduled. ++ * @active_protm_grp: Indicates if firmware has been permitted to let GPU ++ * enter protected mode with the given group. On exit ++ * from protected mode the pointer is reset to NULL. ++ * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the ++ * firmware idle hysteresis timer for preparing a ++ * GPU suspend on idle. ++ * @gpu_idle_work: Work item for facilitating the scheduler to bring ++ * the GPU to a low-power mode on becoming idle. ++ * @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during ++ * the scheduler active phase in a tick. It then ++ * tracks the count of non-idle groups across all the ++ * other phases. ++ * @non_idle_scanout_grps: Count on the non-idle groups in the scan-out ++ * list at the scheduling prepare stage. ++ * @pm_active_count: Count indicating if the scheduler is owning a power ++ * management reference count. Reference is taken when ++ * the count becomes 1 and is dropped when the count ++ * becomes 0. It is used to enable the power up of MCU ++ * after GPU and L2 cache have been powered up. So when ++ * this count is zero, MCU will not be powered up. ++ * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds. ++ * @tick_timer_active: Indicates whether the @tick_timer is effectively ++ * active or not, as the callback function of ++ * @tick_timer will enqueue @tick_work only if this ++ * flag is true. This is mainly useful for the case ++ * when scheduling tick needs to be advanced from ++ * interrupt context, without actually deactivating ++ * the @tick_timer first and then enqueing @tick_work. ++ */ ++struct kbase_csf_scheduler { ++ struct mutex lock; ++ spinlock_t interrupt_lock; ++ enum kbase_csf_scheduler_state state; ++ DECLARE_BITMAP(doorbell_inuse_bitmap, CSF_NUM_DOORBELL); ++ DECLARE_BITMAP(csg_inuse_bitmap, MAX_SUPPORTED_CSGS); ++ struct kbase_csf_csg_slot *csg_slots; ++ struct list_head runnable_kctxs; ++ struct list_head groups_to_schedule; ++ u32 ngrp_to_schedule; ++ u32 num_active_address_spaces; ++ u32 num_csg_slots_for_tick; ++ u32 remaining_tick_slots; ++ struct list_head idle_groups_to_schedule; ++ u32 csg_scan_count_for_tick; ++ u32 total_runnable_grps; ++ DECLARE_BITMAP(csgs_events_enable_mask, MAX_SUPPORTED_CSGS); ++ DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS); ++ DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS); ++ unsigned long last_schedule; ++ bool timer_enabled; ++ struct workqueue_struct *wq; ++ struct hrtimer tick_timer; ++ struct work_struct tick_work; ++ struct delayed_work tock_work; ++ struct delayed_work ping_work; ++ struct kbase_context *top_ctx; ++ struct kbase_queue_group *top_grp; ++ bool tock_pending_request; ++ struct kbase_queue_group *active_protm_grp; ++ bool gpu_idle_fw_timer_enabled; ++ struct work_struct gpu_idle_work; ++ atomic_t non_idle_offslot_grps; ++ u32 non_idle_scanout_grps; ++ u32 pm_active_count; ++ unsigned int csg_scheduling_period_ms; ++ bool tick_timer_active; ++}; ++ ++/** ++ * Number of GPU cycles per unit of the global progress timeout. ++ */ ++#define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024) ++ ++/** ++ * Maximum value of the global progress timeout. ++ */ ++#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ ++ ((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> \ ++ GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ ++ GLB_PROGRESS_TIMER_TIMEOUT_SCALE) ++ ++/** ++ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. ++ */ ++#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) ++ ++/** ++ * In typical operations, the management of the shader core power transitions ++ * is delegated to the MCU/firmware. However, if the host driver is configured ++ * to take direct control, one needs to disable the MCU firmware GLB_PWROFF ++ * timer. ++ */ ++#define DISABLE_GLB_PWROFF_TIMER (0) ++ ++/* Index of the GPU_ACTIVE counter within the CSHW counter block */ ++#define GPU_ACTIVE_CNT_IDX (4) ++ ++/** ++ * Maximum number of sessions that can be managed by the IPA Control component. ++ */ ++#if MALI_UNIT_TEST ++#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)8) ++#else ++#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)2) ++#endif ++ ++/** ++ * enum kbase_ipa_core_type - Type of counter block for performance counters ++ * ++ * @KBASE_IPA_CORE_TYPE_CSHW: CS Hardware counters. ++ * @KBASE_IPA_CORE_TYPE_MEMSYS: Memory System counters. ++ * @KBASE_IPA_CORE_TYPE_TILER: Tiler counters. ++ * @KBASE_IPA_CORE_TYPE_SHADER: Shader Core counters. ++ * @KBASE_IPA_CORE_TYPE_NUM: Number of core types. ++ */ ++enum kbase_ipa_core_type { ++ KBASE_IPA_CORE_TYPE_CSHW = 0, ++ KBASE_IPA_CORE_TYPE_MEMSYS, ++ KBASE_IPA_CORE_TYPE_TILER, ++ KBASE_IPA_CORE_TYPE_SHADER, ++ KBASE_IPA_CORE_TYPE_NUM ++}; ++ ++/** ++ * Number of configurable counters per type of block on the IPA Control ++ * interface. ++ */ ++#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8) ++ ++/** ++ * Total number of configurable counters existing on the IPA Control interface. ++ */ ++#define KBASE_IPA_CONTROL_MAX_COUNTERS \ ++ ((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) ++ ++/** ++ * struct kbase_ipa_control_prfcnt - Session for a single performance counter ++ * ++ * @latest_raw_value: Latest raw value read from the counter. ++ * @scaling_factor: Factor raw value shall be multiplied by. ++ * @accumulated_diff: Partial sum of scaled and normalized values from ++ * previous samples. This represent all the values ++ * that were read before the latest raw value. ++ * @type: Type of counter block for performance counter. ++ * @select_idx: Index of the performance counter as configured on ++ * the IPA Control interface. ++ * @gpu_norm: Indicating whether values shall be normalized by ++ * GPU frequency. If true, returned values represent ++ * an interval of time expressed in seconds (when the ++ * scaling factor is set to 1). ++ */ ++struct kbase_ipa_control_prfcnt { ++ u64 latest_raw_value; ++ u64 scaling_factor; ++ u64 accumulated_diff; ++ enum kbase_ipa_core_type type; ++ u8 select_idx; ++ bool gpu_norm; ++}; ++ ++/** ++ * struct kbase_ipa_control_session - Session for an IPA Control client ++ * ++ * @prfcnts: Sessions for individual performance counters. ++ * @num_prfcnts: Number of performance counters. ++ * @active: Indicates whether this slot is in use or not ++ * @last_query_time: Time of last query, in ns ++ * @protm_time: Amount of time (in ns) that GPU has been in protected ++ */ ++struct kbase_ipa_control_session { ++ struct kbase_ipa_control_prfcnt prfcnts[KBASE_IPA_CONTROL_MAX_COUNTERS]; ++ size_t num_prfcnts; ++ bool active; ++ u64 last_query_time; ++ u64 protm_time; ++}; ++ ++/** ++ * struct kbase_ipa_control_prfcnt_config - Performance counter configuration ++ * ++ * @idx: Index of the performance counter inside the block, as specified ++ * in the GPU architecture. ++ * @refcount: Number of client sessions bound to this counter. ++ * ++ * This structure represents one configurable performance counter of ++ * the IPA Control interface. The entry may be mapped to a specific counter ++ * by one or more client sessions. The counter is considered to be unused ++ * if it isn't part of any client session. ++ */ ++struct kbase_ipa_control_prfcnt_config { ++ u8 idx; ++ u8 refcount; ++}; ++ ++/** ++ * struct kbase_ipa_control_prfcnt_block - Block of performance counters ++ * ++ * @select: Current performance counter configuration. ++ * @num_available_counters: Number of counters that are not already configured. ++ * ++ */ ++struct kbase_ipa_control_prfcnt_block { ++ struct kbase_ipa_control_prfcnt_config ++ select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS]; ++ size_t num_available_counters; ++}; ++ ++/** ++ * struct kbase_ipa_control - Manager of the IPA Control interface. ++ * ++ * @blocks: Current configuration of performance counters ++ * for the IPA Control interface. ++ * @sessions: State of client sessions, storing information ++ * like performance counters the client subscribed to ++ * and latest value read from each counter. ++ * @lock: Spinlock to serialize access by concurrent clients. ++ * @rtm_listener_data: Private data for allocating a GPU frequency change ++ * listener. ++ * @num_active_sessions: Number of sessions opened by clients. ++ * @cur_gpu_rate: Current GPU top-level operating frequency, in Hz. ++ * @rtm_listener_data: Private data for allocating a GPU frequency change ++ * listener. ++ * @protm_start: Time (in ns) at which the GPU entered protected mode ++ */ ++struct kbase_ipa_control { ++ struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM]; ++ struct kbase_ipa_control_session ++ sessions[KBASE_IPA_CONTROL_MAX_SESSIONS]; ++ spinlock_t lock; ++ void *rtm_listener_data; ++ size_t num_active_sessions; ++ u32 cur_gpu_rate; ++ u64 protm_start; ++}; ++ ++/** ++ * struct kbase_csf_firmware_interface - Interface in the MCU firmware ++ * ++ * @node: Interface objects are on the kbase_device:csf.firmware_interfaces ++ * list using this list_head to link them ++ * @phys: Array of the physical (tagged) addresses making up this interface ++ * @name: NULL-terminated string naming the interface ++ * @num_pages: Number of entries in @phys and @pma (and length of the interface) ++ * @virtual: Starting GPU virtual address this interface is mapped at ++ * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes ++ * @data_start: Offset into firmware image at which the interface data starts ++ * @data_end: Offset into firmware image at which the interface data ends ++ * @kernel_map: A kernel mapping of the memory or NULL if not required to be ++ * mapped in the kernel ++ * @pma: Array of pointers to protected memory allocations. ++ */ ++struct kbase_csf_firmware_interface { ++ struct list_head node; ++ struct tagged_addr *phys; ++ char *name; ++ u32 num_pages; ++ u32 virtual; ++ u32 flags; ++ u32 data_start; ++ u32 data_end; ++ void *kernel_map; ++ struct protected_memory_allocation **pma; ++}; ++ ++/* ++ * struct kbase_csf_hwcnt - Object containing members for handling the dump of ++ * HW counters. ++ * ++ * @request_pending: Flag set when HWC requested and used for HWC sample ++ * done interrupt. ++ * @enable_pending: Flag set when HWC enable status change and used for ++ * enable done interrupt. ++ */ ++struct kbase_csf_hwcnt { ++ bool request_pending; ++ bool enable_pending; ++}; ++ ++/** ++ * struct kbase_csf_device - Object representing CSF for an instance of GPU ++ * platform device. ++ * ++ * @mcu_mmu: MMU page tables for the MCU firmware ++ * @firmware_interfaces: List of interfaces defined in the firmware image ++ * @firmware_config: List of configuration options within the firmware ++ * image ++ * @firmware_timeline_metadata: List of timeline meta-data within the firmware ++ * image ++ * @fw_cfg_kobj: Pointer to the kobject corresponding to the sysf ++ * directory that contains a sub-directory for each ++ * of the configuration option present in the ++ * firmware image. ++ * @firmware_trace_buffers: List of trace buffers described in the firmware ++ * image. ++ * @shared_interface: Pointer to the interface object containing info for ++ * the memory area shared between firmware & host. ++ * @shared_reg_rbtree: RB tree of the memory regions allocated from the ++ * shared interface segment in MCU firmware address ++ * space. ++ * @db_filp: Pointer to a dummy file, that alongwith ++ * @db_file_offsets, facilitates the use of unqiue ++ * file offset for the userspace mapping created ++ * for Hw Doorbell pages. The userspace mapping ++ * is made to point to this file inside the mmap ++ * handler. ++ * @db_file_offsets: Counter that is incremented every time a GPU ++ * command queue is bound to provide a unique file ++ * offset range for @db_filp file, so that pte of ++ * Doorbell page can be zapped through the kernel ++ * function unmap_mapping_range(). It is incremented ++ * in page units. ++ * @dummy_db_page: Address of the dummy page that is mapped in place ++ * of the real Hw doorbell page for the active GPU ++ * command queues after they are stopped or after the ++ * GPU is powered down. ++ * @dummy_user_reg_page: Address of the dummy page that is mapped in place ++ * of the real User register page just before the GPU ++ * is powered down. The User register page is mapped ++ * in the address space of every process, that created ++ * a Base context, to enable the access to LATEST_FLUSH ++ * register from userspace. ++ * @mali_file_inode: Pointer to the inode corresponding to mali device ++ * file. This is needed in order to switch to the ++ * @dummy_user_reg_page on GPU power down. ++ * All instances of the mali device file will point to ++ * the same inode. ++ * @reg_lock: Lock to serialize the MCU firmware related actions ++ * that affect all contexts such as allocation of ++ * regions from shared interface area, assignment of ++ * of hardware doorbell pages, assignment of CSGs, ++ * sending global requests. ++ * @event_wait: Wait queue to wait for receiving csf events, i.e. ++ * the interrupt from CSF firmware, or scheduler state ++ * changes. ++ * @interrupt_received: Flag set when the interrupt is received from CSF fw ++ * @global_iface: The result of parsing the global interface ++ * structure set up by the firmware, including the ++ * CSGs, CSs, and their properties ++ * @scheduler: The CS scheduler instance. ++ * @reset: Contain members required for GPU reset handling. ++ * @progress_timeout: Maximum number of GPU clock cycles without forward ++ * progress to allow, for all tasks running on ++ * hardware endpoints (e.g. shader cores), before ++ * terminating a GPU command queue group. ++ * Must not exceed @GLB_PROGRESS_TIMER_TIMEOUT_MAX. ++ * @pma_dev: Pointer to protected memory allocator device. ++ * @firmware_inited: Flag for indicating that the cold-boot stage of ++ * the MCU has completed. ++ * @firmware_reloaded: Flag for indicating a firmware reload operation ++ * in GPU reset has completed. ++ * @firmware_reload_needed: Flag for indicating that the firmware needs to be ++ * reloaded as part of the GPU reset action. ++ * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in ++ * charge of the shader core's power transitions, and ++ * the mcu_core_pwroff timeout feature is disabled ++ * (i.e. configured 0 in the register field). If ++ * false, the control is delegated to the MCU. ++ * @firmware_reload_work: Work item for facilitating the procedural actions ++ * on reloading the firmware. ++ * @glb_init_request_pending: Flag to indicate that Global requests have been ++ * sent to the FW after MCU was re-enabled and their ++ * acknowledgement is pending. ++ * @fw_error_work: Work item for handling the firmware internal error ++ * fatal event. ++ * @ipa_control: IPA Control component manager. ++ * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input ++ * in unit of micro-seconds. The firmware does not use ++ * it directly. ++ * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input ++ * in interface required format, ready to be used ++ * directly in the firmware. ++ * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into ++ * the glb_pwoff register. This is separated from ++ * the @p mcu_core_pwroff_dur_count as an update ++ * to the latter is asynchronous. ++ * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time ++ * window in unit of ms. The firmware does not use it ++ * directly. ++ * @gpu_idle_dur_count: The counterpart of the hysteresis time window in ++ * interface required format, ready to be used ++ * directly in the firmware. ++ * @fw_timeout_ms: Timeout value (in milliseconds) used when waiting ++ * for any request sent to the firmware. ++ * @hwcnt: Contain members required for handling the dump of ++ * HW counters. ++ */ ++struct kbase_csf_device { ++ struct kbase_mmu_table mcu_mmu; ++ struct list_head firmware_interfaces; ++ struct list_head firmware_config; ++ struct list_head firmware_timeline_metadata; ++ struct kobject *fw_cfg_kobj; ++ struct kbase_csf_trace_buffers firmware_trace_buffers; ++ void *shared_interface; ++ struct rb_root shared_reg_rbtree; ++ struct file *db_filp; ++ u32 db_file_offsets; ++ struct tagged_addr dummy_db_page; ++ struct tagged_addr dummy_user_reg_page; ++ struct inode *mali_file_inode; ++ struct mutex reg_lock; ++ wait_queue_head_t event_wait; ++ bool interrupt_received; ++ struct kbase_csf_global_iface global_iface; ++ struct kbase_csf_scheduler scheduler; ++ struct kbase_csf_reset_gpu reset; ++ atomic64_t progress_timeout; ++ struct protected_memory_allocator_device *pma_dev; ++ bool firmware_inited; ++ bool firmware_reloaded; ++ bool firmware_reload_needed; ++ bool firmware_hctl_core_pwr; ++ struct work_struct firmware_reload_work; ++ bool glb_init_request_pending; ++ struct work_struct fw_error_work; ++ struct kbase_ipa_control ipa_control; ++ u32 mcu_core_pwroff_dur_us; ++ u32 mcu_core_pwroff_dur_count; ++ u32 mcu_core_pwroff_reg_shadow; ++ u32 gpu_idle_hysteresis_ms; ++ u32 gpu_idle_dur_count; ++ unsigned int fw_timeout_ms; ++ struct kbase_csf_hwcnt hwcnt; ++}; ++ ++/** ++ * struct kbase_as - Object representing an address space of GPU. ++ * @number: Index at which this address space structure is present ++ * in an array of address space structures embedded inside ++ * the &struct kbase_device. ++ * @pf_wq: Workqueue for processing work items related to ++ * Page fault, Bus fault and GPU fault handling. ++ * @work_pagefault: Work item for the Page fault handling. ++ * @work_busfault: Work item for the Bus fault handling. ++ * @work_gpufault: Work item for the GPU fault handling. ++ * @pf_data: Data relating to Page fault. ++ * @bf_data: Data relating to Bus fault. ++ * @gf_data: Data relating to GPU fault. ++ * @current_setup: Stores the MMU configuration for this address space. ++ */ ++struct kbase_as { ++ int number; ++ struct workqueue_struct *pf_wq; ++ struct work_struct work_pagefault; ++ struct work_struct work_busfault; ++ struct work_struct work_gpufault; ++ struct kbase_fault pf_data; ++ struct kbase_fault bf_data; ++ struct kbase_fault gf_data; ++ struct kbase_mmu_setup current_setup; ++}; ++ ++#endif /* _KBASE_CSF_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c +new file mode 100644 +index 0000000..1b31122 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c +@@ -0,0 +1,2337 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase.h" ++#include "mali_kbase_csf_firmware_cfg.h" ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_csf_timeout.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_reset_gpu.h" ++#include "mali_kbase_ctx_sched.h" ++#include "mali_kbase_csf_scheduler.h" ++#include "device/mali_kbase_device.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "tl/mali_kbase_timeline_priv.h" ++#include "mali_kbase_csf_tl_reader.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) ++#include ++#endif ++#include ++#include ++ ++#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) ++ ++ ++static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; ++module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); ++MODULE_PARM_DESC(fw_name, "firmware image"); ++ ++/* The waiting time for firmware to boot */ ++static unsigned int csf_firmware_boot_timeout_ms = 500; ++module_param(csf_firmware_boot_timeout_ms, uint, 0444); ++MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, ++ "Maximum time to wait for firmware to boot."); ++ ++#ifdef CONFIG_MALI_DEBUG ++/* Makes Driver wait indefinitely for an acknowledgment for the different ++ * requests it sends to firmware. Otherwise the timeouts interfere with the ++ * use of debugger for source-level debugging of firmware as Driver initiates ++ * a GPU reset when a request times out, which always happen when a debugger ++ * is connected. ++ */ ++bool fw_debug; /* Default value of 0/false */ ++module_param(fw_debug, bool, 0444); ++MODULE_PARM_DESC(fw_debug, ++ "Enables effective use of a debugger for debugging firmware code."); ++#endif ++ ++#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) ++#define FIRMWARE_HEADER_VERSION (0ul) ++#define FIRMWARE_HEADER_LENGTH (0x14ul) ++ ++#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ ++ (CSF_FIRMWARE_ENTRY_READ | \ ++ CSF_FIRMWARE_ENTRY_WRITE | \ ++ CSF_FIRMWARE_ENTRY_EXECUTE | \ ++ CSF_FIRMWARE_ENTRY_PROTECTED | \ ++ CSF_FIRMWARE_ENTRY_SHARED | \ ++ CSF_FIRMWARE_ENTRY_ZERO | \ ++ CSF_FIRMWARE_ENTRY_CACHE_MODE) ++ ++#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) ++#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) ++#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2) ++#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) ++#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) ++ ++#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) ++#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) ++#define CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT (2ul << 3) ++#define CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT (3ul << 3) ++ ++#define INTERFACE_ENTRY_NAME_OFFSET (0x14) ++ ++#define TL_METADATA_ENTRY_NAME_OFFSET (0x8) ++ ++#define CSF_MAX_FW_STOP_LOOPS (100000) ++ ++#define CSF_GLB_REQ_CFG_MASK \ ++ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ ++ GLB_REQ_CFG_PWROFF_TIMER_MASK) ++ ++static inline u32 input_page_read(const u32 *const input, const u32 offset) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ return input[offset / sizeof(u32)]; ++} ++ ++static inline void input_page_write(u32 *const input, const u32 offset, ++ const u32 value) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ input[offset / sizeof(u32)] = value; ++} ++ ++static inline void input_page_partial_write(u32 *const input, const u32 offset, ++ u32 value, u32 mask) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ input[offset / sizeof(u32)] = ++ (input_page_read(input, offset) & ~mask) | (value & mask); ++} ++ ++static inline u32 output_page_read(const u32 *const output, const u32 offset) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ return output[offset / sizeof(u32)]; ++} ++ ++static unsigned int entry_type(u32 header) ++{ ++ return header & 0xFF; ++} ++static unsigned int entry_size(u32 header) ++{ ++ return (header >> 8) & 0xFF; ++} ++static bool entry_update(u32 header) ++{ ++ return (header >> 30) & 0x1; ++} ++static bool entry_optional(u32 header) ++{ ++ return (header >> 31) & 0x1; ++} ++ ++/** ++ * struct firmware_timeline_metadata - ++ * Timeline metadata item within the MCU firmware ++ * ++ * @node: List head linking all timeline metadata to ++ * kbase_device:csf.firmware_timeline_metadata. ++ * @name: NUL-terminated string naming the metadata. ++ * @data: Metadata content. ++ * @size: Metadata size. ++ */ ++struct firmware_timeline_metadata { ++ struct list_head node; ++ char *name; ++ char *data; ++ size_t size; ++}; ++ ++/* The shared interface area, used for communicating with firmware, is managed ++ * like a virtual memory zone. Reserve the virtual space from that zone ++ * corresponding to shared interface entry parsed from the firmware image. ++ * The shared_reg_rbtree should have been initialized before calling this ++ * function. ++ */ ++static int setup_shared_iface_static_region(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ struct kbase_va_region *reg; ++ int ret = -ENOMEM; ++ ++ if (!interface) ++ return -EINVAL; ++ ++ reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, ++ interface->num_pages, KBASE_REG_ZONE_MCU_SHARED); ++ if (reg) { ++ ret = kbase_add_va_region_rbtree(kbdev, reg, ++ interface->virtual, interface->num_pages, 1); ++ if (ret) ++ kfree(reg); ++ else ++ reg->flags &= ~KBASE_REG_FREE; ++ } ++ ++ return ret; ++} ++ ++static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val) ++{ ++ u32 max_loops = CSF_MAX_FW_STOP_LOOPS; ++ ++ /* wait for the MCU_STATUS register to reach the given status value */ ++ while (--max_loops && ++ (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) != val)) { ++ } ++ ++ return (max_loops == 0) ? -1 : 0; ++} ++ ++void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) ++{ ++ if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) ++ dev_err(kbdev->dev, "MCU failed to get disabled"); ++} ++ ++static void wait_for_firmware_stop(struct kbase_device *kbdev) ++{ ++ if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) { ++ /* This error shall go away once MIDJM-2371 is closed */ ++ dev_err(kbdev->dev, "Firmware failed to stop"); ++ } ++} ++ ++static void stop_csf_firmware(struct kbase_device *kbdev) ++{ ++ /* Stop the MCU firmware */ ++ kbase_csf_firmware_disable_mcu(kbdev); ++ ++ wait_for_firmware_stop(kbdev); ++} ++ ++static void wait_for_firmware_boot(struct kbase_device *kbdev) ++{ ++ const long wait_timeout = ++ kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); ++ long remaining; ++ ++ /* Firmware will generate a global interface interrupt once booting ++ * is complete ++ */ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ kbdev->csf.interrupt_received == true, wait_timeout); ++ ++ if (!remaining) ++ dev_err(kbdev->dev, "Timed out waiting for fw boot completion"); ++ ++ kbdev->csf.interrupt_received = false; ++} ++ ++static void boot_csf_firmware(struct kbase_device *kbdev) ++{ ++ kbase_csf_firmware_enable_mcu(kbdev); ++ ++ wait_for_firmware_boot(kbdev); ++} ++ ++static void wait_ready(struct kbase_device *kbdev) ++{ ++ u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; ++ u32 val; ++ ++ val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); ++ ++ /* Wait for a while for the update command to take effect */ ++ while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) ++ val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); ++ ++ if (max_loops == 0) ++ dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); ++} ++ ++static void unload_mmu_tables(struct kbase_device *kbdev) ++{ ++ unsigned long irq_flags; ++ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ if (kbdev->pm.backend.gpu_powered) ++ kbase_mmu_disable_as(kbdev, MCU_AS_NR); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++} ++ ++static void load_mmu_tables(struct kbase_device *kbdev) ++{ ++ unsigned long irq_flags; ++ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); ++ kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, MCU_AS_NR); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ /* Wait for a while for the update command to take effect */ ++ wait_ready(kbdev); ++} ++ ++/** ++ * convert_mem_flags() - Convert firmware memory flags to GPU region flags ++ * ++ * Return: GPU memory region flags ++ * ++ * @kbdev: Instance of GPU platform device (used to determine system coherency) ++ * @flags: Flags of an "interface memory setup" section in a firmware image ++ * @cm: appropriate cache mode chosen for the "interface memory setup" ++ * section, which could be different from the cache mode requested by ++ * firmware. ++ */ ++static unsigned long convert_mem_flags(const struct kbase_device * const kbdev, ++ const u32 flags, u32 *cm) ++{ ++ unsigned long mem_flags = 0; ++ u32 cache_mode = flags & CSF_FIRMWARE_ENTRY_CACHE_MODE; ++ bool is_shared = (flags & CSF_FIRMWARE_ENTRY_SHARED) ? true : false; ++ ++ /* The memory flags control the access permissions for the MCU, the ++ * shader cores/tiler are not expected to access this memory ++ */ ++ if (flags & CSF_FIRMWARE_ENTRY_READ) ++ mem_flags |= KBASE_REG_GPU_RD; ++ ++ if (flags & CSF_FIRMWARE_ENTRY_WRITE) ++ mem_flags |= KBASE_REG_GPU_WR; ++ ++ if ((flags & CSF_FIRMWARE_ENTRY_EXECUTE) == 0) ++ mem_flags |= KBASE_REG_GPU_NX; ++ ++ if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) ++ mem_flags |= KBASE_REG_PROTECTED; ++ ++ /* Substitute uncached coherent memory for cached coherent memory if ++ * the system does not support ACE coherency. ++ */ ++ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) && ++ (kbdev->system_coherency != COHERENCY_ACE)) ++ cache_mode = CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT; ++ ++ /* Substitute uncached incoherent memory for uncached coherent memory ++ * if the system does not support ACE-Lite coherency. ++ */ ++ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT) && ++ (kbdev->system_coherency == COHERENCY_NONE)) ++ cache_mode = CSF_FIRMWARE_CACHE_MODE_NONE; ++ ++ *cm = cache_mode; ++ ++ switch (cache_mode) { ++ case CSF_FIRMWARE_CACHE_MODE_NONE: ++ mem_flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ break; ++ case CSF_FIRMWARE_CACHE_MODE_CACHED: ++ mem_flags |= ++ KBASE_REG_MEMATTR_INDEX( ++ AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY); ++ break; ++ case CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT: ++ case CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT: ++ WARN_ON(!is_shared); ++ mem_flags |= KBASE_REG_SHARE_BOTH | ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); ++ break; ++ default: ++ dev_err(kbdev->dev, ++ "Firmware contains interface with unsupported cache mode\n"); ++ break; ++ } ++ return mem_flags; ++} ++ ++static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, ++ struct tagged_addr *phys, u32 num_pages, u32 flags, ++ u32 data_start, u32 data_end) ++{ ++ u32 data_pos = data_start; ++ u32 data_len = data_end - data_start; ++ u32 page_num; ++ u32 page_limit; ++ ++ if (flags & CSF_FIRMWARE_ENTRY_ZERO) ++ page_limit = num_pages; ++ else ++ page_limit = (data_len + PAGE_SIZE - 1) / PAGE_SIZE; ++ ++ for (page_num = 0; page_num < page_limit; ++page_num) { ++ struct page *const page = as_page(phys[page_num]); ++ char *const p = kmap_atomic(page); ++ u32 const copy_len = min_t(u32, PAGE_SIZE, data_len); ++ ++ if (copy_len > 0) { ++ memcpy(p, data + data_pos, copy_len); ++ data_pos += copy_len; ++ data_len -= copy_len; ++ } ++ ++ if (flags & CSF_FIRMWARE_ENTRY_ZERO) { ++ u32 const zi_len = PAGE_SIZE - copy_len; ++ ++ memset(p + copy_len, 0, zi_len); ++ } ++ ++ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), ++ PAGE_SIZE, DMA_TO_DEVICE); ++ kunmap_atomic(p); ++ } ++} ++ ++static int reload_fw_data_sections(struct kbase_device *kbdev) ++{ ++ const u32 magic = FIRMWARE_HEADER_MAGIC; ++ struct kbase_csf_firmware_interface *interface; ++ const struct firmware *firmware; ++ int ret = 0; ++ ++ if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { ++ dev_err(kbdev->dev, ++ "Failed to reload firmware image '%s'\n", ++ fw_name); ++ return -ENOENT; ++ } ++ ++ /* Do couple of basic sanity checks */ ++ if (firmware->size < FIRMWARE_HEADER_LENGTH) { ++ dev_err(kbdev->dev, "Firmware image unexpectedly too small\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { ++ dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ /* Skip reload of text & read only data sections */ ++ if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) || ++ !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE)) ++ continue; ++ ++ load_fw_image_section(kbdev, firmware->data, interface->phys, ++ interface->num_pages, interface->flags, ++ interface->data_start, interface->data_end); ++ } ++ ++ kbase_csf_firmware_reload_trace_buffers_data(kbdev); ++ ++out: ++ release_firmware(firmware); ++ return ret; ++} ++ ++/** ++ * parse_memory_setup_entry() - Process an "interface memory setup" section ++ * ++ * Read an "interface memory setup" section from the firmware image and create ++ * the necessary memory region including the MMU page tables. If successful ++ * the interface will be added to the kbase_device:csf.firmware_interfaces list. ++ * ++ * Return: 0 if successful, negative error code on failure ++ * ++ * @kbdev: Kbase device structure ++ * @fw: The firmware image containing the section ++ * @entry: Pointer to the start of the section ++ * @size: Size (in bytes) of the section ++ */ ++static int parse_memory_setup_entry(struct kbase_device *kbdev, ++ const struct firmware *fw, ++ const u32 *entry, unsigned int size) ++{ ++ int ret = 0; ++ const u32 flags = entry[0]; ++ const u32 virtual_start = entry[1]; ++ const u32 virtual_end = entry[2]; ++ const u32 data_start = entry[3]; ++ const u32 data_end = entry[4]; ++ u32 num_pages; ++ char *name; ++ struct tagged_addr *phys = NULL; ++ struct kbase_csf_firmware_interface *interface = NULL; ++ bool allocated_pages = false, protected_mode = false; ++ unsigned long mem_flags = 0; ++ u32 cache_mode = 0; ++ struct protected_memory_allocation **pma = NULL; ++ ++ if (data_end < data_start) { ++ dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", ++ data_end, data_start); ++ return -EINVAL; ++ } ++ if (virtual_end < virtual_start) { ++ dev_err(kbdev->dev, "Firmware corrupt, virtual_end < virtual_start (0x%x<0x%x)\n", ++ virtual_end, virtual_start); ++ return -EINVAL; ++ } ++ if (data_end > fw->size) { ++ dev_err(kbdev->dev, "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx\n", ++ data_end, fw->size); ++ return -EINVAL; ++ } ++ ++ if ((virtual_start & ~PAGE_MASK) != 0 || ++ (virtual_end & ~PAGE_MASK) != 0) { ++ dev_err(kbdev->dev, "Firmware corrupt: virtual addresses not page aligned: 0x%x-0x%x\n", ++ virtual_start, virtual_end); ++ return -EINVAL; ++ } ++ ++ if ((flags & CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS) != flags) { ++ dev_err(kbdev->dev, "Firmware contains interface with unsupported flags (0x%x)\n", ++ flags); ++ return -EINVAL; ++ } ++ ++ if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) ++ protected_mode = true; ++ ++ if (protected_mode && kbdev->csf.pma_dev == NULL) { ++ dev_err(kbdev->dev, ++ "Protected memory allocator not found, Firmware protected mode entry will not be supported"); ++ return 0; ++ } ++ ++ num_pages = (virtual_end - virtual_start) ++ >> PAGE_SHIFT; ++ ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) ++ return -ENOMEM; ++ ++ if (protected_mode) { ++ pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages); ++ ++ if (pma == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } else { ++ ret = kbase_mem_pool_alloc_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false); ++ if (ret < 0) ++ goto out; ++ } ++ ++ allocated_pages = true; ++ load_fw_image_section(kbdev, fw->data, phys, num_pages, flags, ++ data_start, data_end); ++ ++ /* Allocate enough memory for the struct kbase_csf_firmware_interface and ++ * the name of the interface. An extra byte is allocated to place a ++ * NUL-terminator in. This should already be included according to the ++ * specification but here we add it anyway to be robust against a ++ * corrupt firmware image. ++ */ ++ interface = kmalloc(sizeof(*interface) + ++ size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL); ++ if (!interface) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ name = (void *)(interface + 1); ++ memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)), ++ size - INTERFACE_ENTRY_NAME_OFFSET); ++ name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0; ++ ++ interface->name = name; ++ interface->phys = phys; ++ interface->num_pages = num_pages; ++ interface->virtual = virtual_start; ++ interface->kernel_map = NULL; ++ interface->flags = flags; ++ interface->data_start = data_start; ++ interface->data_end = data_end; ++ interface->pma = pma; ++ ++ mem_flags = convert_mem_flags(kbdev, flags, &cache_mode); ++ ++ if (flags & CSF_FIRMWARE_ENTRY_SHARED) { ++ struct page **page_list; ++ u32 i; ++ pgprot_t cpu_map_prot; ++ u32 mem_attr_index = KBASE_REG_MEMATTR_VALUE(mem_flags); ++ ++ /* Since SHARED memory type was used for mapping shared memory ++ * on GPU side, it can be mapped as cached on CPU side on both ++ * types of coherent platforms. ++ */ ++ if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) || ++ (cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT)) { ++ WARN_ON(mem_attr_index != ++ AS_MEMATTR_INDEX_SHARED); ++ cpu_map_prot = PAGE_KERNEL; ++ } else { ++ WARN_ON(mem_attr_index != ++ AS_MEMATTR_INDEX_NON_CACHEABLE); ++ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); ++ } ++ ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), ++ GFP_KERNEL); ++ if (!page_list) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); ++ ++ interface->kernel_map = vmap(page_list, num_pages, VM_MAP, ++ cpu_map_prot); ++ ++ kfree(page_list); ++ ++ if (!interface->kernel_map) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } ++ ++ /* Start location of the shared interface area is fixed and is ++ * specified in firmware spec, and so there shall only be a ++ * single entry with that start address. ++ */ ++ if (virtual_start == (KBASE_REG_ZONE_MCU_SHARED_BASE << PAGE_SHIFT)) ++ kbdev->csf.shared_interface = interface; ++ ++ list_add(&interface->node, &kbdev->csf.firmware_interfaces); ++ ++ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, ++ virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags, ++ KBASE_MEM_GROUP_CSF_FW); ++ ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to insert firmware pages\n"); ++ /* The interface has been added to the list, so cleanup will ++ * be handled by firmware unloading ++ */ ++ } ++ ++ dev_dbg(kbdev->dev, "Processed section '%s'", name); ++ ++ return ret; ++ ++out: ++ if (allocated_pages) { ++ if (protected_mode) { ++ kbase_csf_protected_memory_free(kbdev, pma, num_pages); ++ } else { ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false, false); ++ } ++ } ++ ++ kfree(phys); ++ kfree(interface); ++ return ret; ++} ++ ++/** ++ * parse_timeline_metadata_entry() - Process a "timeline metadata" section ++ * ++ * Return: 0 if successful, negative error code on failure ++ * ++ * @kbdev: Kbase device structure ++ * @fw: Firmware image containing the section ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section ++ */ ++static int parse_timeline_metadata_entry(struct kbase_device *kbdev, ++ const struct firmware *fw, const u32 *entry, unsigned int size) ++{ ++ const u32 data_start = entry[0]; ++ const u32 data_size = entry[1]; ++ const u32 data_end = data_start + data_size; ++ const char *name = (char *)&entry[2]; ++ struct firmware_timeline_metadata *metadata; ++ const unsigned int name_len = ++ size - TL_METADATA_ENTRY_NAME_OFFSET; ++ size_t allocation_size = sizeof(*metadata) + name_len + 1 + data_size; ++ ++ if (data_end > fw->size) { ++ dev_err(kbdev->dev, ++ "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx", ++ data_end, fw->size); ++ return -EINVAL; ++ } ++ ++ /* Allocate enough space for firmware_timeline_metadata, ++ * its name and the content. ++ */ ++ metadata = kmalloc(allocation_size, GFP_KERNEL); ++ if (!metadata) ++ return -ENOMEM; ++ ++ metadata->name = (char *)(metadata + 1); ++ metadata->data = (char *)(metadata + 1) + name_len + 1; ++ metadata->size = data_size; ++ ++ memcpy(metadata->name, name, name_len); ++ metadata->name[name_len] = 0; ++ ++ /* Copy metadata's content. */ ++ memcpy(metadata->data, fw->data + data_start, data_size); ++ ++ list_add(&metadata->node, &kbdev->csf.firmware_timeline_metadata); ++ ++ dev_dbg(kbdev->dev, "Timeline metadata '%s'", metadata->name); ++ ++ return 0; ++} ++ ++/** ++ * load_firmware_entry() - Process an entry from a firmware image ++ * ++ * Read an entry from a firmware image and do any necessary work (e.g. loading ++ * the data into page accessible to the MCU). ++ * ++ * Unknown entries are ignored if the 'optional' flag is set within the entry, ++ * otherwise the function will fail with -EINVAL ++ * ++ * Return: 0 if successful, negative error code on failure ++ * ++ * @kbdev: Kbase device ++ * @fw: Firmware image containing the entry ++ * @offset: Byte offset within the image of the entry to load ++ * @header: Header word of the entry ++ */ ++static int load_firmware_entry(struct kbase_device *kbdev, ++ const struct firmware *fw, ++ u32 offset, u32 header) ++{ ++ const unsigned int type = entry_type(header); ++ unsigned int size = entry_size(header); ++ const bool optional = entry_optional(header); ++ /* Update is used with configuration and tracebuffer entries to ++ * initiate a FIRMWARE_CONFIG_UPDATE, instead of triggering a ++ * silent reset. ++ */ ++ const bool updatable = entry_update(header); ++ const u32 *entry = (void *)(fw->data + offset); ++ ++ if ((offset % sizeof(*entry)) || (size % sizeof(*entry))) { ++ dev_err(kbdev->dev, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", ++ offset, size); ++ return -EINVAL; ++ } ++ ++ if (size < sizeof(*entry)) { ++ dev_err(kbdev->dev, "Size field too small: %u\n", size); ++ return -EINVAL; ++ } ++ ++ /* Remove the header */ ++ entry++; ++ size -= sizeof(*entry); ++ ++ switch (type) { ++ case CSF_FIRMWARE_ENTRY_TYPE_INTERFACE: ++ /* Interface memory setup */ ++ if (size < INTERFACE_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Interface memory setup entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return parse_memory_setup_entry(kbdev, fw, entry, size); ++ case CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION: ++ /* Configuration option */ ++ if (size < CONFIGURATION_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Configuration option entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return kbase_csf_firmware_cfg_option_entry_parse( ++ kbdev, fw, entry, size, updatable); ++ case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST: ++#ifndef MALI_KBASE_BUILD ++ /* FW UTF option */ ++ if (size < 2*sizeof(*entry)) { ++ dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return mali_kutf_process_fw_utf_entry(kbdev, fw->data, ++ fw->size, entry); ++#endif ++ break; ++ case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: ++ /* Trace buffer */ ++ if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Trace Buffer entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return kbase_csf_firmware_parse_trace_buffer_entry( ++ kbdev, entry, size, updatable); ++ case CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA: ++ /* Meta data section */ ++ if (size < TL_METADATA_ENTRY_NAME_OFFSET + sizeof(*entry)) { ++ dev_err(kbdev->dev, "Timeline metadata entry too short (size=%u)\n", ++ size); ++ return -EINVAL; ++ } ++ return parse_timeline_metadata_entry(kbdev, fw, entry, size); ++ } ++ ++ if (!optional) { ++ dev_err(kbdev->dev, ++ "Unsupported non-optional entry type %u in firmware\n", ++ type); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void free_global_iface(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ ++ if (iface->groups) { ++ unsigned int gid; ++ ++ for (gid = 0; gid < iface->group_num; ++gid) ++ kfree(iface->groups[gid].streams); ++ ++ kfree(iface->groups); ++ iface->groups = NULL; ++ } ++} ++ ++/** ++ * iface_gpu_va_to_cpu - Convert a GPU VA address within the shared interface ++ * region to a CPU address, using the existing mapping. ++ * @kbdev: Device pointer ++ * @gpu_va: GPU VA to convert ++ * ++ * Return: A CPU pointer to the location within the shared interface region, or ++ * NULL on failure. ++ */ ++static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va) ++{ ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ u8 *kernel_base = interface->kernel_map; ++ ++ if (gpu_va < interface->virtual || ++ gpu_va >= interface->virtual + interface->num_pages * PAGE_SIZE) { ++ dev_err(kbdev->dev, ++ "Interface address 0x%x not within %u-page region at 0x%x", ++ gpu_va, interface->num_pages, ++ interface->virtual); ++ return NULL; ++ } ++ ++ return (void *)(kernel_base + (gpu_va - interface->virtual)); ++} ++ ++static int parse_cmd_stream_info(struct kbase_device *kbdev, ++ struct kbase_csf_cmd_stream_info *sinfo, ++ u32 *stream_base) ++{ ++ sinfo->kbdev = kbdev; ++ sinfo->features = stream_base[STREAM_FEATURES/4]; ++ sinfo->input = iface_gpu_va_to_cpu(kbdev, ++ stream_base[STREAM_INPUT_VA/4]); ++ sinfo->output = iface_gpu_va_to_cpu(kbdev, ++ stream_base[STREAM_OUTPUT_VA/4]); ++ ++ if (sinfo->input == NULL || sinfo->output == NULL) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int parse_cmd_stream_group_info(struct kbase_device *kbdev, ++ struct kbase_csf_cmd_stream_group_info *ginfo, ++ u32 *group_base, u32 group_stride) ++{ ++ unsigned int sid; ++ ++ ginfo->kbdev = kbdev; ++ ginfo->features = group_base[GROUP_FEATURES/4]; ++ ginfo->input = iface_gpu_va_to_cpu(kbdev, ++ group_base[GROUP_INPUT_VA/4]); ++ ginfo->output = iface_gpu_va_to_cpu(kbdev, ++ group_base[GROUP_OUTPUT_VA/4]); ++ ++ if (ginfo->input == NULL || ginfo->output == NULL) ++ return -ENOMEM; ++ ++ ginfo->suspend_size = group_base[GROUP_SUSPEND_SIZE/4]; ++ ginfo->protm_suspend_size = group_base[GROUP_PROTM_SUSPEND_SIZE/4]; ++ ginfo->stream_num = group_base[GROUP_STREAM_NUM/4]; ++ ++ if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP || ++ ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) { ++ dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u", ++ ginfo->stream_num, ++ MIN_SUPPORTED_STREAMS_PER_GROUP, ++ MAX_SUPPORTED_STREAMS_PER_GROUP); ++ return -EINVAL; ++ } ++ ++ ginfo->stream_stride = group_base[GROUP_STREAM_STRIDE/4]; ++ ++ if (ginfo->stream_num * ginfo->stream_stride > group_stride) { ++ dev_err(kbdev->dev, ++ "group stride of 0x%x exceeded by %u CSs with stride 0x%x", ++ group_stride, ginfo->stream_num, ++ ginfo->stream_stride); ++ return -EINVAL; ++ } ++ ++ ginfo->streams = kmalloc_array(ginfo->stream_num, ++ sizeof(*ginfo->streams), GFP_KERNEL); ++ ++ if (!ginfo->streams) ++ return -ENOMEM; ++ ++ for (sid = 0; sid < ginfo->stream_num; sid++) { ++ int err; ++ u32 *stream_base = group_base + (STREAM_CONTROL_0 + ++ ginfo->stream_stride * sid) / 4; ++ ++ err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid], ++ stream_base); ++ if (err < 0) { ++ /* caller will free the memory for CSs array */ ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++static u32 get_firmware_version(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ u32 *shared_info = interface->kernel_map; ++ ++ return shared_info[GLB_VERSION/4]; ++} ++ ++static int parse_capabilities(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_firmware_interface *interface = ++ kbdev->csf.shared_interface; ++ u32 *shared_info = interface->kernel_map; ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ unsigned int gid; ++ ++ /* All offsets are in bytes, so divide by 4 for access via a u32 pointer ++ */ ++ ++ /* The version number of the global interface is expected to be a ++ * non-zero value. If it's not, the firmware may not have booted. ++ */ ++ iface->version = get_firmware_version(kbdev); ++ if (!iface->version) { ++ dev_err(kbdev->dev, "Version check failed. Firmware may have failed to boot."); ++ return -EINVAL; ++ } ++ ++ ++ iface->kbdev = kbdev; ++ iface->features = shared_info[GLB_FEATURES/4]; ++ iface->input = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_INPUT_VA/4]); ++ iface->output = iface_gpu_va_to_cpu(kbdev, ++ shared_info[GLB_OUTPUT_VA/4]); ++ ++ if (iface->input == NULL || iface->output == NULL) ++ return -ENOMEM; ++ ++ iface->group_num = shared_info[GLB_GROUP_NUM/4]; ++ ++ if (iface->group_num < MIN_SUPPORTED_CSGS || ++ iface->group_num > MAX_SUPPORTED_CSGS) { ++ dev_err(kbdev->dev, ++ "Interface containing %u CSGs outside of range %u-%u", ++ iface->group_num, MIN_SUPPORTED_CSGS, ++ MAX_SUPPORTED_CSGS); ++ return -EINVAL; ++ } ++ ++ iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; ++ iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; ++ ++ if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { ++ iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; ++ } else { ++ iface->instr_features = 0; ++ } ++ ++ if ((GROUP_CONTROL_0 + ++ (unsigned long)iface->group_num * iface->group_stride) > ++ (interface->num_pages * PAGE_SIZE)) { ++ dev_err(kbdev->dev, ++ "interface size of %u pages exceeded by %u CSGs with stride 0x%x", ++ interface->num_pages, iface->group_num, ++ iface->group_stride); ++ return -EINVAL; ++ } ++ ++ WARN_ON(iface->groups); ++ ++ iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), ++ GFP_KERNEL); ++ if (!iface->groups) ++ return -ENOMEM; ++ ++ for (gid = 0; gid < iface->group_num; gid++) { ++ int err; ++ u32 *group_base = shared_info + (GROUP_CONTROL_0 + ++ iface->group_stride * gid) / 4; ++ ++ err = parse_cmd_stream_group_info(kbdev, &iface->groups[gid], ++ group_base, iface->group_stride); ++ if (err < 0) { ++ free_global_iface(kbdev); ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++static inline void access_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value, const bool read) ++{ ++ struct kbase_csf_firmware_interface *interface; ++ ++ list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { ++ if ((gpu_addr >= interface->virtual) && ++ (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { ++ u32 offset_bytes = gpu_addr - interface->virtual; ++ u32 page_num = offset_bytes >> PAGE_SHIFT; ++ u32 offset_in_page = offset_bytes & ~PAGE_MASK; ++ struct page *target_page = as_page( ++ interface->phys[page_num]); ++ u32 *cpu_addr = kmap_atomic(target_page); ++ ++ if (read) { ++ kbase_sync_single_for_device(kbdev, ++ kbase_dma_addr(target_page) + offset_in_page, ++ sizeof(u32), DMA_BIDIRECTIONAL); ++ ++ *value = cpu_addr[offset_in_page >> 2]; ++ } else { ++ cpu_addr[offset_in_page >> 2] = *value; ++ ++ kbase_sync_single_for_device(kbdev, ++ kbase_dma_addr(target_page) + offset_in_page, ++ sizeof(u32), DMA_BIDIRECTIONAL); ++ } ++ ++ kunmap_atomic(cpu_addr); ++ return; ++ } ++ } ++ dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr); ++} ++ ++void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value) ++{ ++ access_firmware_memory(kbdev, gpu_addr, value, true); ++} ++ ++void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value) ++{ ++ access_firmware_memory(kbdev, gpu_addr, &value, false); ++} ++ ++void kbase_csf_firmware_cs_input( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); ++ input_page_write(info->input, offset, value); ++} ++ ++u32 kbase_csf_firmware_cs_input_read( ++ const struct kbase_csf_cmd_stream_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); ++ ++ dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_cs_input_mask( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ input_page_partial_write(info->input, offset, value, mask); ++} ++ ++u32 kbase_csf_firmware_cs_output( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); ++ ++ dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_csg_input( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", ++ offset, value); ++ input_page_write(info->input, offset, value); ++} ++ ++u32 kbase_csf_firmware_csg_input_read( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); ++ ++ dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_csg_input_mask( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ input_page_partial_write(info->input, offset, value, mask); ++} ++ ++u32 kbase_csf_firmware_csg_output( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); ++ ++ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_global_input( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); ++ input_page_write(iface->input, offset, value); ++} ++ ++void kbase_csf_firmware_global_input_mask( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ input_page_partial_write(iface->input, offset, value, mask); ++} ++ ++u32 kbase_csf_firmware_global_input_read( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = input_page_read(iface->input, offset); ++ ++ dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++u32 kbase_csf_firmware_global_output( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = output_page_read(iface->output, offset); ++ ++ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++/** ++ * handle_internal_firmware_fatal - Handler for CS internal firmware fault. ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * Report group fatal error to user space for all GPU command queue groups ++ * in the device, terminate them and reset GPU. ++ */ ++static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) ++{ ++ int as; ++ ++ for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { ++ unsigned long flags; ++ struct kbase_context *kctx; ++ struct kbase_fault fault; ++ ++ if (as == MCU_AS_NR) ++ continue; ++ ++ /* Only handle the fault for an active address space. Lock is ++ * taken here to atomically get reference to context in an ++ * active address space and retain its refcount. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); ++ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ continue; ++ } ++ ++ fault = (struct kbase_fault) { ++ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, ++ }; ++ ++ kbase_csf_ctx_handle_fault(kctx, &fault); ++ kbase_ctx_sched_release_ctx_lock(kctx); ++ } ++ ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++} ++ ++/** ++ * firmware_error_worker - Worker function for handling firmware internal error ++ * ++ * @data: Pointer to a work_struct embedded in kbase device. ++ * ++ * Handle the CS internal firmware error ++ */ ++static void firmware_error_worker(struct work_struct *const data) ++{ ++ struct kbase_device *const kbdev = ++ container_of(data, struct kbase_device, csf.fw_error_work); ++ ++ handle_internal_firmware_fatal(kbdev); ++} ++ ++static bool global_request_complete(struct kbase_device *const kbdev, ++ u32 const req_mask) ++{ ++ struct kbase_csf_global_iface *global_iface = ++ &kbdev->csf.global_iface; ++ bool complete = false; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & ++ req_mask) == ++ (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & ++ req_mask)) ++ complete = true; ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ return complete; ++} ++ ++static int wait_for_global_request(struct kbase_device *const kbdev, ++ u32 const req_mask) ++{ ++ const long wait_timeout = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ long remaining; ++ int err = 0; ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ global_request_complete(kbdev, req_mask), ++ wait_timeout); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", ++ req_mask); ++ err = -ETIMEDOUT; ++ } ++ ++ return err; ++} ++ ++static void set_global_request( ++ const struct kbase_csf_global_iface *const global_iface, ++ u32 const req_mask) ++{ ++ u32 glb_req; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); ++ ++ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); ++ glb_req ^= req_mask; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, ++ req_mask); ++} ++ ++static void enable_endpoints_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const shader_core_mask) ++{ ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, ++ shader_core_mask & U32_MAX); ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, ++ shader_core_mask >> 32); ++ ++ set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); ++} ++ ++static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, ++ const struct kbase_csf_global_iface *const global_iface) ++{ ++ u32 pwroff_reg; ++ ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ pwroff_reg = ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; ++ ++ kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, ++ pwroff_reg); ++ set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); ++ ++ /* Save the programed reg value in its shadow field */ ++ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; ++ ++ dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg); ++} ++ ++static void set_timeout_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const timeout) ++{ ++ kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, ++ timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); ++ ++ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++} ++ ++static void global_init(struct kbase_device *const kbdev, u64 core_mask) ++{ ++ u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | ++ GLB_ACK_IRQ_MASK_PING_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | ++ GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | ++ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | ++ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | ++ GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK; ++ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ /* Set the coherency mode for protected mode execution */ ++ WARN_ON(kbdev->system_coherency == COHERENCY_ACE); ++ kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY, ++ kbdev->system_coherency); ++ ++ /* Update shader core allocation enable mask */ ++ enable_endpoints_global(global_iface, core_mask); ++ enable_shader_poweroff_timer(kbdev, global_iface); ++ ++ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); ++ ++ /* Unmask the interrupts */ ++ kbase_csf_firmware_global_input(global_iface, ++ GLB_ACK_IRQ_MASK, ack_irq_mask); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++/** ++ * global_init_on_boot - Sends a global request to control various features. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface ++ * ++ * Currently only the request to enable endpoints and timeout for GPU progress ++ * timer is sent. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++static int global_init_on_boot(struct kbase_device *const kbdev) ++{ ++ unsigned long flags; ++ u64 core_mask; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ core_mask = kbase_pm_ca_get_core_mask(kbdev); ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ global_init(kbdev, core_mask); ++ ++ return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); ++} ++ ++void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, ++ u64 core_mask) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbdev->csf.glb_init_request_pending = true; ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ global_init(kbdev, core_mask); ++} ++ ++bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON(!kbdev->csf.glb_init_request_pending); ++ ++ if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) ++ kbdev->csf.glb_init_request_pending = false; ++ ++ return !kbdev->csf.glb_init_request_pending; ++} ++ ++void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, ++ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) ++{ ++ unsigned long flags; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ if (update_core_mask) ++ enable_endpoints_global(&kbdev->csf.global_iface, core_mask); ++ if (update_core_pwroff_timer) ++ enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | ++ GLB_REQ_CFG_PWROFF_TIMER_MASK); ++} ++ ++/** ++ * kbase_csf_firmware_reload_worker() - ++ * reload the fw image and re-enable the MCU ++ * @work: CSF Work item for reloading the firmware. ++ * ++ * This helper function will reload the firmware image and re-enable the MCU. ++ * It is supposed to be called after MCU(GPU) has been reset. ++ * Unlike the initial boot the firmware binary image is not parsed completely. ++ * Only the data sections, which were loaded in memory during the initial boot, ++ * are re-initialized either by zeroing them or copying their data from the ++ * firmware binary image. The memory allocation for the firmware pages and ++ * MMU programming is not needed for the reboot, presuming the firmware binary ++ * file on the filesystem would not change. ++ */ ++static void kbase_csf_firmware_reload_worker(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.firmware_reload_work); ++ int err; ++ ++ dev_info(kbdev->dev, "reloading firmware"); ++ ++ /* Reload just the data sections from firmware binary image */ ++ err = reload_fw_data_sections(kbdev); ++ if (err) ++ return; ++ ++ kbase_csf_tl_reader_reset(&kbdev->timeline->csf_tl_reader); ++ ++ /* Reboot the firmware */ ++ kbase_csf_firmware_enable_mcu(kbdev); ++} ++ ++void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbdev->csf.firmware_reloaded = false; ++ ++ if (kbdev->csf.firmware_reload_needed) { ++ kbdev->csf.firmware_reload_needed = false; ++ queue_work(system_wq, &kbdev->csf.firmware_reload_work); ++ } else { ++ kbase_csf_firmware_enable_mcu(kbdev); ++ } ++} ++ ++void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) ++{ ++ u32 version; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (unlikely(!kbdev->csf.firmware_inited)) ++ return; ++ ++ /* Check firmware rebooted properly: we do not expect ++ * the version number to change with a running reboot. ++ */ ++ version = get_firmware_version(kbdev); ++ ++ if (version != kbdev->csf.global_iface.version) ++ dev_err(kbdev->dev, "Version check failed in firmware reboot."); ++ ++ KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u); ++ ++ /* Tell MCU state machine to transit to next state */ ++ kbdev->csf.firmware_reloaded = true; ++ kbase_pm_update_state(kbdev); ++} ++ ++static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) ++{ ++#define HYSTERESIS_VAL_UNIT_SHIFT (10) ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_ms; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; ++ ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ dev_info(kbdev->dev, "Can't get the timestamp frequency, " ++ "use cycle counter format with firmware idle hysteresis!"); ++ } ++ ++ /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000); ++ ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; ++} ++ ++u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) ++{ ++ return kbdev->csf.gpu_idle_hysteresis_ms; ++} ++ ++u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) ++{ ++ unsigned long flags; ++ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbdev->csf.gpu_idle_hysteresis_ms = dur; ++ kbdev->csf.gpu_idle_dur_count = hysteresis_val; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", ++ hysteresis_val); ++ ++ return hysteresis_val; ++} ++ ++static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) ++{ ++#define PWROFF_VAL_UNIT_SHIFT (10) ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_us; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; ++ ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ dev_info(kbdev->dev, "Can't get the timestamp frequency, " ++ "use cycle counter with MCU Core Poweroff timer!"); ++ } ++ ++ /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000000); ++ ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; ++} ++ ++u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) ++{ ++ return kbdev->csf.mcu_core_pwroff_dur_us; ++} ++ ++u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) ++{ ++ unsigned long flags; ++ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.mcu_core_pwroff_dur_us = dur; ++ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); ++ ++ return pwroff; ++} ++ ++ ++int kbase_csf_firmware_early_init(struct kbase_device *kbdev) ++{ ++ init_waitqueue_head(&kbdev->csf.event_wait); ++ kbdev->csf.interrupt_received = false; ++ kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS; ++ ++ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_config); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); ++ INIT_WORK(&kbdev->csf.firmware_reload_work, ++ kbase_csf_firmware_reload_worker); ++ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); ++ ++ mutex_init(&kbdev->csf.reg_lock); ++ ++ return 0; ++} ++ ++int kbase_csf_firmware_init(struct kbase_device *kbdev) ++{ ++ const struct firmware *firmware; ++ const u32 magic = FIRMWARE_HEADER_MAGIC; ++ u8 version_major, version_minor; ++ u32 version_hash; ++ u32 entry_end_offset; ++ u32 entry_offset; ++ int ret; ++ ++ lockdep_assert_held(&kbdev->fw_load_lock); ++ ++ if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) ++ return -EINVAL; ++ kbdev->as_free &= ~MCU_AS_BITMASK; ++ ++ ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, ++ BASE_MEM_GROUP_DEFAULT); ++ ++ if (ret != 0) { ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; ++ return ret; ++ } ++ ++ kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; ++ kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( ++ kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS); ++ ++ kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; ++ kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( ++ kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); ++ ++ ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, ++ "Failed to setup the rb tree for managing shared interface segment\n"); ++ goto error; ++ } ++ ++ if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { ++ dev_err(kbdev->dev, ++ "Failed to load firmware image '%s'\n", ++ fw_name); ++ ret = -ENOENT; ++ goto error; ++ } ++ ++ if (firmware->size < FIRMWARE_HEADER_LENGTH) { ++ dev_err(kbdev->dev, "Firmware too small\n"); ++ ret = -EINVAL; ++ goto error; ++ } ++ ++ if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { ++ dev_err(kbdev->dev, "Incorrect firmware magic\n"); ++ ret = -EINVAL; ++ goto error; ++ } ++ ++ version_minor = firmware->data[4]; ++ version_major = firmware->data[5]; ++ ++ if (version_major != FIRMWARE_HEADER_VERSION) { ++ dev_err(kbdev->dev, ++ "Firmware header version %d.%d not understood\n", ++ version_major, version_minor); ++ ret = -EINVAL; ++ goto error; ++ } ++ ++ memcpy(&version_hash, &firmware->data[8], sizeof(version_hash)); ++ ++ dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash); ++ ++ memcpy(&entry_end_offset, &firmware->data[0x10], ++ sizeof(entry_end_offset)); ++ ++ if (entry_end_offset > firmware->size) { ++ dev_err(kbdev->dev, "Firmware image is truncated\n"); ++ ret = -EINVAL; ++ goto error; ++ } ++ ++ entry_offset = FIRMWARE_HEADER_LENGTH; ++ while (entry_offset < entry_end_offset) { ++ u32 header; ++ unsigned int size; ++ ++ memcpy(&header, &firmware->data[entry_offset], sizeof(header)); ++ ++ size = entry_size(header); ++ ++ ret = load_firmware_entry(kbdev, firmware, entry_offset, ++ header); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to load firmware image\n"); ++ goto error; ++ } ++ entry_offset += size; ++ } ++ ++ if (!kbdev->csf.shared_interface) { ++ dev_err(kbdev->dev, "Shared interface region not found\n"); ++ ret = -EINVAL; ++ goto error; ++ } else { ++ ret = setup_shared_iface_static_region(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); ++ goto error; ++ } ++ } ++ ++ ret = kbase_csf_firmware_trace_buffers_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to initialize trace buffers\n"); ++ goto error; ++ } ++ ++ /* Make sure L2 cache is powered up */ ++ kbase_pm_wait_for_l2_powered(kbdev); ++ ++ /* Load the MMU tables into the selected address space */ ++ load_mmu_tables(kbdev); ++ ++ boot_csf_firmware(kbdev); ++ ++ ret = parse_capabilities(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_doorbell_mapping_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_scheduler_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_setup_dummy_user_reg_page(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_timeout_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = global_init_on_boot(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_firmware_cfg_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ++ /* Firmware loaded successfully */ ++ release_firmware(firmware); ++ KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL, ++ (((u64)version_hash) << 32) | ++ (((u64)version_major) << 8) | version_minor); ++ return 0; ++ ++error: ++ kbase_csf_firmware_term(kbdev); ++ release_firmware(firmware); ++ return ret; ++} ++ ++void kbase_csf_firmware_term(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ int ret = 0; ++ ++ cancel_work_sync(&kbdev->csf.fw_error_work); ++ ++ ret = kbase_reset_gpu_wait(kbdev); ++ ++ WARN(ret, "failed to wait for GPU reset"); ++ ++ kbase_csf_firmware_cfg_term(kbdev); ++ ++ kbase_csf_timeout_term(kbdev); ++ ++ kbase_csf_free_dummy_user_reg_page(kbdev); ++ ++ kbase_csf_scheduler_term(kbdev); ++ ++ kbase_csf_doorbell_mapping_term(kbdev); ++ ++ /* Explicitly trigger the disabling of MCU through the state machine and ++ * wait for its completion. It may not have been disabled yet due to the ++ * power policy. ++ */ ++ kbdev->pm.backend.mcu_desired = false; ++ kbase_pm_wait_for_desired_state(kbdev); ++ ++ free_global_iface(kbdev); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.firmware_inited = false; ++ if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) { ++ kbdev->pm.backend.mcu_state = KBASE_MCU_OFF; ++ stop_csf_firmware(kbdev); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ unload_mmu_tables(kbdev); ++ ++ kbase_csf_firmware_trace_buffers_term(kbdev); ++ ++ while (!list_empty(&kbdev->csf.firmware_interfaces)) { ++ struct kbase_csf_firmware_interface *interface; ++ ++ interface = ++ list_first_entry(&kbdev->csf.firmware_interfaces, ++ struct kbase_csf_firmware_interface, ++ node); ++ list_del(&interface->node); ++ ++ vunmap(interface->kernel_map); ++ if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { ++ kbase_csf_protected_memory_free(kbdev, interface->pma, ++ interface->num_pages); ++ } else { ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ interface->num_pages, interface->phys, ++ true, false); ++ } ++ ++ kfree(interface->phys); ++ kfree(interface); ++ } ++ ++ while (!list_empty(&kbdev->csf.firmware_timeline_metadata)) { ++ struct firmware_timeline_metadata *metadata; ++ ++ metadata = list_first_entry( ++ &kbdev->csf.firmware_timeline_metadata, ++ struct firmware_timeline_metadata, ++ node); ++ list_del(&metadata->node); ++ ++ kfree(metadata); ++ } ++ ++#ifndef MALI_KBASE_BUILD ++ mali_kutf_fw_utf_entry_cleanup(kbdev); ++#endif ++ ++ /* This will also free up the region allocated for the shared interface ++ * entry parsed from the firmware image. ++ */ ++ kbase_mcu_shared_interface_region_tracker_term(kbdev); ++ ++ mutex_destroy(&kbdev->csf.reg_lock); ++ ++ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); ++ ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; ++} ++ ++void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ const u32 glb_req = ++ kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ /* The scheduler is assumed to only call the enable when its internal ++ * state indicates that the idle timer has previously been disabled. So ++ * on entry the expected field values are: ++ * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 ++ * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 ++ */ ++ ++ if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) ++ dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); ++ ++ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, ++ kbdev->csf.gpu_idle_dur_count); ++ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); ++ ++ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", ++ kbdev->csf.gpu_idle_dur_count); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ GLB_REQ_REQ_IDLE_DISABLE, ++ GLB_REQ_IDLE_DISABLE_MASK); ++ dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++void kbase_csf_firmware_ping(struct kbase_device *const kbdev) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_PING_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) ++{ ++ kbase_csf_firmware_ping(kbdev); ++ return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); ++} ++ ++int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, ++ u64 const timeout) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; ++ ++ /* The 'reg_lock' is also taken and is held till the update is not ++ * complete, to ensure the update of timeout value by multiple Users ++ * gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_timeout_global(global_iface, timeout); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ return err; ++} ++ ++void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); ++ dev_dbg(kbdev->dev, "Sending request to enter protected mode"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); ++ ++ if (!err) { ++ unsigned long irq_flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->protected_mode = true; ++ kbase_ipa_protection_mode_switch_event(kbdev); ++ kbase_ipa_control_protm_entered(kbdev); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &irq_flags); ++ kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); ++ kbase_csf_scheduler_spin_unlock(kbdev, irq_flags); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++} ++ ++void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_HALT_MASK); ++ dev_dbg(kbdev->dev, "Sending request to HALT MCU"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err = 0; ++ ++ /* Ensure GPU is powered-up until we complete config update.*/ ++ kbase_csf_scheduler_pm_active(kbdev); ++ ++ /* The 'reg_lock' is also taken and is held till the update is ++ * complete, to ensure the config update gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ err = wait_for_global_request(kbdev, ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ kbase_csf_scheduler_pm_idle(kbdev); ++ return err; ++} ++ ++/** ++ * copy_grp_and_stm - Copy CS and/or group data ++ * ++ * @iface: Global CSF interface provided by the firmware. ++ * @group_data: Pointer where to store all the group data ++ * (sequentially). ++ * @max_group_num: The maximum number of groups to be read. Can be 0, in ++ * which case group_data is unused. ++ * @stream_data: Pointer where to store all the CS data ++ * (sequentially). ++ * @max_total_stream_num: The maximum number of CSs to be read. ++ * Can be 0, in which case stream_data is unused. ++ * ++ * Return: Total number of CSs, summed across all groups. ++ */ ++static u32 copy_grp_and_stm( ++ const struct kbase_csf_global_iface * const iface, ++ struct basep_cs_group_control * const group_data, ++ u32 max_group_num, ++ struct basep_cs_stream_control * const stream_data, ++ u32 max_total_stream_num) ++{ ++ u32 i, total_stream_num = 0; ++ ++ if (WARN_ON((max_group_num > 0) && !group_data)) ++ max_group_num = 0; ++ ++ if (WARN_ON((max_total_stream_num > 0) && !stream_data)) ++ max_total_stream_num = 0; ++ ++ for (i = 0; i < iface->group_num; i++) { ++ u32 j; ++ ++ if (i < max_group_num) { ++ group_data[i].features = iface->groups[i].features; ++ group_data[i].stream_num = iface->groups[i].stream_num; ++ group_data[i].suspend_size = ++ iface->groups[i].suspend_size; ++ } ++ for (j = 0; j < iface->groups[i].stream_num; j++) { ++ if (total_stream_num < max_total_stream_num) ++ stream_data[total_stream_num].features = ++ iface->groups[i].streams[j].features; ++ total_stream_num++; ++ } ++ } ++ ++ return total_stream_num; ++} ++ ++u32 kbase_csf_firmware_get_glb_iface( ++ struct kbase_device *kbdev, ++ struct basep_cs_group_control *const group_data, ++ u32 const max_group_num, ++ struct basep_cs_stream_control *const stream_data, ++ u32 const max_total_stream_num, u32 *const glb_version, ++ u32 *const features, u32 *const group_num, u32 *const prfcnt_size, ++ u32 *instr_features) ++{ ++ const struct kbase_csf_global_iface * const iface = ++ &kbdev->csf.global_iface; ++ ++ if (WARN_ON(!glb_version) || WARN_ON(!features) || ++ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || ++ WARN_ON(!instr_features)) ++ return 0; ++ ++ *glb_version = iface->version; ++ *features = iface->features; ++ *group_num = iface->group_num; ++ *prfcnt_size = iface->prfcnt_size; ++ *instr_features = iface->instr_features; ++ ++ return copy_grp_and_stm(iface, group_data, max_group_num, ++ stream_data, max_total_stream_num); ++} ++ ++const char *kbase_csf_firmware_get_timeline_metadata( ++ struct kbase_device *kbdev, const char *name, size_t *size) ++{ ++ struct firmware_timeline_metadata *metadata; ++ ++ list_for_each_entry( ++ metadata, &kbdev->csf.firmware_timeline_metadata, node) { ++ if (!strcmp(metadata->name, name)) { ++ *size = metadata->size; ++ return metadata->data; ++ } ++ } ++ ++ *size = 0; ++ return NULL; ++} ++ ++int kbase_csf_firmware_mcu_shared_mapping_init( ++ struct kbase_device *kbdev, ++ unsigned int num_pages, ++ unsigned long cpu_map_properties, ++ unsigned long gpu_map_properties, ++ struct kbase_csf_mapping *csf_mapping) ++{ ++ struct tagged_addr *phys; ++ struct kbase_va_region *va_reg; ++ struct page **page_list; ++ void *cpu_addr; ++ int i, ret = 0; ++ pgprot_t cpu_map_prot = PAGE_KERNEL; ++ unsigned long gpu_map_prot; ++ ++ if (cpu_map_properties & PROT_READ) ++ cpu_map_prot = PAGE_KERNEL_RO; ++ ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); ++ } else { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ cpu_map_prot = pgprot_writecombine(cpu_map_prot); ++ }; ++ ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) ++ goto out; ++ ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); ++ if (!page_list) ++ goto page_list_alloc_error; ++ ++ ret = kbase_mem_pool_alloc_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false); ++ if (ret <= 0) ++ goto phys_mem_pool_alloc_error; ++ ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); ++ ++ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); ++ if (!cpu_addr) ++ goto vmap_error; ++ ++ va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, ++ num_pages, KBASE_REG_ZONE_MCU_SHARED); ++ if (!va_reg) ++ goto va_region_alloc_error; ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); ++ va_reg->flags &= ~KBASE_REG_FREE; ++ if (ret) ++ goto va_region_add_error; ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); ++ gpu_map_properties |= gpu_map_prot; ++ ++ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, ++ va_reg->start_pfn, &phys[0], num_pages, ++ gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); ++ if (ret) ++ goto mmu_insert_pages_error; ++ ++ kfree(page_list); ++ csf_mapping->phys = phys; ++ csf_mapping->cpu_addr = cpu_addr; ++ csf_mapping->va_reg = va_reg; ++ csf_mapping->num_pages = num_pages; ++ ++ return 0; ++ ++mmu_insert_pages_error: ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(va_reg); ++va_region_add_error: ++ kbase_free_alloced_region(va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++va_region_alloc_error: ++ vunmap(cpu_addr); ++vmap_error: ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false, false); ++ ++phys_mem_pool_alloc_error: ++ kfree(page_list); ++page_list_alloc_error: ++ kfree(phys); ++out: ++ /* Zero-initialize the mapping to make sure that the termination ++ * function doesn't try to unmap or free random addresses. ++ */ ++ csf_mapping->phys = NULL; ++ csf_mapping->cpu_addr = NULL; ++ csf_mapping->va_reg = NULL; ++ csf_mapping->num_pages = 0; ++ ++ return -ENOMEM; ++} ++ ++void kbase_csf_firmware_mcu_shared_mapping_term( ++ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) ++{ ++ if (csf_mapping->va_reg) { ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(csf_mapping->va_reg); ++ kbase_free_alloced_region(csf_mapping->va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ } ++ ++ if (csf_mapping->phys) { ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ csf_mapping->num_pages, csf_mapping->phys, false, ++ false); ++ } ++ ++ vunmap(csf_mapping->cpu_addr); ++ kfree(csf_mapping->phys); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h +new file mode 100644 +index 0000000..60d7065 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h +@@ -0,0 +1,811 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_FIRMWARE_H_ ++#define _KBASE_CSF_FIRMWARE_H_ ++ ++#include "device/mali_kbase_device.h" ++#include ++ ++/* ++ * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: ++ * Commit a3266bd49c721e2e0a71f352d83713fbd60caadb ++ * Author: Luis R. Rodriguez ++ * Date: Fri Aug 17 15:46:29 2018 -0700 ++ * ++ * mm: provide a fallback for PAGE_KERNEL_RO for architectures ++ * ++ * Some architectures do not define certain PAGE_KERNEL_* flags, this is ++ * either because: ++ * ++ * a) The way to implement some of these flags is *not yet ported*, or ++ * b) The architecture *has no way* to describe them ++ * ++ * [snip] ++ * ++ * This can be removed once support of 32bit ARM kernels predating 4.19 is no ++ * longer required. ++ */ ++#ifndef PAGE_KERNEL_RO ++#define PAGE_KERNEL_RO PAGE_KERNEL ++#endif ++ ++/* Address space number to claim for the firmware. */ ++#define MCU_AS_NR 0 ++#define MCU_AS_BITMASK (1 << MCU_AS_NR) ++ ++/* Number of available Doorbells */ ++#define CSF_NUM_DOORBELL ((u8)24) ++ ++/* Offset to the first HW doorbell page */ ++#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000) ++ ++/* Size of HW Doorbell page, used to calculate the offset to subsequent pages */ ++#define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000) ++ ++/* Doorbell 0 is used by the driver. */ ++#define CSF_KERNEL_DOORBELL_NR ((u32)0) ++ ++/* Offset of name inside a trace buffer entry in the firmware image */ ++#define TRACE_BUFFER_ENTRY_NAME_OFFSET (0x1C) ++ ++/* All implementations of the host interface with major version 0 must comply ++ * with these restrictions: ++ */ ++/* GLB_GROUP_NUM: At least 3 CSGs, but no more than 31 */ ++#define MIN_SUPPORTED_CSGS 3 ++#define MAX_SUPPORTED_CSGS 31 ++/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */ ++#define MIN_SUPPORTED_STREAMS_PER_GROUP 8 ++/* Maximum CSs per csg. */ ++#define MAX_SUPPORTED_STREAMS_PER_GROUP 32 ++ ++/* Waiting timeout for status change acknowledgment, in milliseconds */ ++#define CSF_FIRMWARE_TIMEOUT_MS (3000) /* Relaxed to 3000ms from 800ms due to Android */ ++ ++struct kbase_device; ++ ++ ++/** ++ * struct kbase_csf_mapping - Memory mapping for CSF memory. ++ * @phys: Physical memory allocation used by the mapping. ++ * @cpu_addr: Starting CPU address for the mapping. ++ * @va_reg: GPU virtual address region for the mapping. ++ * @num_pages: Size of the mapping, in memory pages. ++ */ ++struct kbase_csf_mapping { ++ struct tagged_addr *phys; ++ void *cpu_addr; ++ struct kbase_va_region *va_reg; ++ unsigned int num_pages; ++}; ++ ++/** ++ * struct kbase_csf_trace_buffers - List and state of firmware trace buffers. ++ * @list: List of trace buffers descriptors. ++ * @mcu_rw: Metadata for the MCU shared memory mapping used for ++ * GPU-readable,writable/CPU-writable variables. ++ * @mcu_write: Metadata for the MCU shared memory mapping used for ++ * GPU-writable/CPU-readable variables. ++ */ ++struct kbase_csf_trace_buffers { ++ struct list_head list; ++ struct kbase_csf_mapping mcu_rw; ++ struct kbase_csf_mapping mcu_write; ++}; ++ ++/** ++ * struct kbase_csf_cmd_stream_info - CSI provided by the firmware. ++ * ++ * @kbdev: Address of the instance of a GPU platform device that implements ++ * this interface. ++ * @features: Bit field of CS features (e.g. which types of jobs ++ * are supported). Bits 7:0 specify the number of work registers(-1). ++ * Bits 11:8 specify the number of scoreboard entries(-1). ++ * @input: Address of CSI input page. ++ * @output: Address of CSI output page. ++ */ ++struct kbase_csf_cmd_stream_info { ++ struct kbase_device *kbdev; ++ u32 features; ++ void *input; ++ void *output; ++}; ++ ++/** ++ * kbase_csf_firmware_cs_input() - Set a word in a CS's input page ++ * ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be written, in bytes. ++ * @value: Value to be written. ++ */ ++void kbase_csf_firmware_cs_input( ++ const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value); ++ ++/** ++ * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page ++ * ++ * Return: Value of the word read from the CS's input page. ++ * ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_cs_input_read( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset); ++ ++/** ++ * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page ++ * ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be modified, in bytes. ++ * @value: Value to be written. ++ * @mask: Bitmask with the bits to be modified set. ++ */ ++void kbase_csf_firmware_cs_input_mask( ++ const struct kbase_csf_cmd_stream_info *info, u32 offset, ++ u32 value, u32 mask); ++ ++/** ++ * kbase_csf_firmware_cs_output() - Read a word in a CS's output page ++ * ++ * Return: Value of the word read from the CS's output page. ++ * ++ * @info: CSI provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_cs_output( ++ const struct kbase_csf_cmd_stream_info *info, u32 offset); ++/** ++ * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the ++ * firmware. ++ * ++ * @kbdev: Address of the instance of a GPU platform device that implements ++ * this interface. ++ * @features: Bit mask of features. Reserved bits should be 0, and should ++ * be ignored. ++ * @input: Address of global interface input page. ++ * @output: Address of global interface output page. ++ * @suspend_size: Size in bytes for normal suspend buffer for the CSG ++ * @protm_suspend_size: Size in bytes for protected mode suspend buffer ++ * for the CSG. ++ * @stream_num: Number of CSs in the CSG. ++ * @stream_stride: Stride in bytes in JASID0 virtual address between ++ * CS capability structures. ++ * @streams: Address of an array of CS capability structures. ++ */ ++struct kbase_csf_cmd_stream_group_info { ++ struct kbase_device *kbdev; ++ u32 features; ++ void *input; ++ void *output; ++ u32 suspend_size; ++ u32 protm_suspend_size; ++ u32 stream_num; ++ u32 stream_stride; ++ struct kbase_csf_cmd_stream_info *streams; ++}; ++ ++/** ++ * kbase_csf_firmware_csg_input() - Set a word in a CSG's input page ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be written, in bytes. ++ * @value: Value to be written. ++ */ ++void kbase_csf_firmware_csg_input( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset, ++ u32 value); ++ ++/** ++ * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page ++ * ++ * Return: Value of the word read from the CSG's input page. ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_csg_input_read( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset); ++ ++/** ++ * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's ++ * input page ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be modified, in bytes. ++ * @value: Value to be written. ++ * @mask: Bitmask with the bits to be modified set. ++ */ ++void kbase_csf_firmware_csg_input_mask( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset, ++ u32 value, u32 mask); ++ ++/** ++ * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page ++ * ++ * Return: Value of the word read from the CSG's output page. ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_csg_output( ++ const struct kbase_csf_cmd_stream_group_info *info, u32 offset); ++ ++/** ++ * struct kbase_csf_global_iface - Global CSF interface ++ * provided by the firmware. ++ * ++ * @kbdev: Address of the instance of a GPU platform device that implements ++ * this interface. ++ * @version: Bits 31:16 hold the major version number and 15:0 hold the minor ++ * version number. A higher minor version is backwards-compatible ++ * with a lower minor version for the same major version. ++ * @features: Bit mask of features (e.g. whether certain types of job can ++ * be suspended). Reserved bits should be 0, and should be ignored. ++ * @input: Address of global interface input page. ++ * @output: Address of global interface output page. ++ * @group_num: Number of CSGs supported. ++ * @group_stride: Stride in bytes in JASID0 virtual address between ++ * CSG capability structures. ++ * @prfcnt_size: Performance counters size. ++ * @instr_features: Instrumentation features. (csf >= 1.1.0) ++ * @groups: Address of an array of CSG capability structures. ++ */ ++struct kbase_csf_global_iface { ++ struct kbase_device *kbdev; ++ u32 version; ++ u32 features; ++ void *input; ++ void *output; ++ u32 group_num; ++ u32 group_stride; ++ u32 prfcnt_size; ++ u32 instr_features; ++ struct kbase_csf_cmd_stream_group_info *groups; ++}; ++ ++/** ++ * kbase_csf_firmware_global_input() - Set a word in the global input page ++ * ++ * @iface: CSF interface provided by the firmware. ++ * @offset: Offset of the word to be written, in bytes. ++ * @value: Value to be written. ++ */ ++void kbase_csf_firmware_global_input( ++ const struct kbase_csf_global_iface *iface, u32 offset, u32 value); ++ ++/** ++ * kbase_csf_firmware_global_input_mask() - Set part of a word in the global ++ * input page ++ * ++ * @iface: CSF interface provided by the firmware. ++ * @offset: Offset of the word to be modified, in bytes. ++ * @value: Value to be written. ++ * @mask: Bitmask with the bits to be modified set. ++ */ ++void kbase_csf_firmware_global_input_mask( ++ const struct kbase_csf_global_iface *iface, u32 offset, ++ u32 value, u32 mask); ++ ++/** ++ * kbase_csf_firmware_global_input_read() - Read a word in a global input page ++ * ++ * Return: Value of the word read from the global input page. ++ * ++ * @info: CSG interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_global_input_read( ++ const struct kbase_csf_global_iface *info, u32 offset); ++ ++/** ++ * kbase_csf_firmware_global_output() - Read a word in the global output page ++ * ++ * Return: Value of the word read from the global output page. ++ * ++ * @iface: CSF interface provided by the firmware. ++ * @offset: Offset of the word to be read, in bytes. ++ */ ++u32 kbase_csf_firmware_global_output( ++ const struct kbase_csf_global_iface *iface, u32 offset); ++ ++/* Calculate the offset to the Hw doorbell page corresponding to the ++ * doorbell number. ++ */ ++static u32 csf_doorbell_offset(int doorbell_nr) ++{ ++ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); ++ ++ return CSF_HW_DOORBELL_PAGE_OFFSET + ++ (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); ++} ++ ++static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev, ++ int doorbell_nr) ++{ ++ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); ++ ++ kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); ++} ++ ++/** ++ * kbase_csf_read_firmware_memory - Read a value in a GPU address ++ * ++ * This function read a value in a GPU address that belongs to ++ * a private firmware memory region. The function assumes that the location ++ * is not permanently mapped on the CPU address space, therefore it maps it ++ * and then unmaps it to access it independently. ++ * ++ * @kbdev: Device pointer ++ * @gpu_addr: GPU address to read ++ * @value: output pointer to which the read value will be written. ++ */ ++void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value); ++ ++/** ++ * kbase_csf_update_firmware_memory - Write a value in a GPU address ++ * ++ * This function writes a given value in a GPU address that belongs to ++ * a private firmware memory region. The function assumes that the destination ++ * is not permanently mapped on the CPU address space, therefore it maps it ++ * and then unmaps it to access it independently. ++ * ++ * @kbdev: Device pointer ++ * @gpu_addr: GPU address to write ++ * @value: Value to write ++ */ ++void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value); ++ ++/** ++ * kbase_csf_firmware_early_init() - Early initializatin for the firmware. ++ * @kbdev: Kbase device ++ * ++ * Initialize resources related to the firmware. Must be called at kbase probe. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++int kbase_csf_firmware_early_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_init() - Load the firmware for the CSF MCU ++ * @kbdev: Kbase device ++ * ++ * Request the firmware from user space and load it into memory. ++ * ++ * Return: 0 if successful, negative error code on failure ++ */ ++int kbase_csf_firmware_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_term() - Unload the firmware ++ * @kbdev: Kbase device ++ * ++ * Frees the memory allocated by kbase_csf_firmware_init() ++ */ ++void kbase_csf_firmware_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_ping - Send the ping request to firmware. ++ * ++ * The function sends the ping request to firmware. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_ping(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. ++ * ++ * The function sends the ping request to firmware and waits to confirm it is ++ * alive. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @timeout: The maximum number of GPU cycles that is allowed to elapse ++ * without forward progress before the driver terminates a GPU ++ * command queue group. ++ * ++ * Configures the progress timeout value used by the firmware to decide ++ * when to report that a task is not making progress on an endpoint. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout); ++ ++/** ++ * kbase_csf_enter_protected_mode - Send the Global request to firmware to ++ * enter protected mode and wait for its ++ * completion. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_enter_protected_mode(struct kbase_device *kbdev); ++ ++static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) ++{ ++ return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) == ++ MCU_STATUS_HALTED); ++} ++ ++/** ++ * kbase_csf_firmware_trigger_mcu_halt - Send the Global request to firmware to ++ * halt its operation and bring itself ++ * into a known internal state for warm ++ * boot later. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_enable_mcu - Send the command to enable MCU ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) ++{ ++ /* Trigger the boot of MCU firmware, Use the AUTO mode as ++ * otherwise on fast reset, to exit protected mode, MCU will ++ * not reboot by itself to enter normal mode. ++ */ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); ++} ++ ++/** ++ * kbase_csf_firmware_disable_mcu - Send the command to disable MCU ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) ++{ ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); ++} ++ ++/** ++ * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled ++ * status. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev); ++ ++/** ++ * kbase_trigger_firmware_reload - Trigger the reboot of MCU firmware, for the ++ * cold boot case firmware image would be ++ * reloaded from filesystem into memory. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_reload_completed - The reboot of MCU firmware has ++ * completed. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_global_reinit - Send the Global configuration requests ++ * after the reboot of MCU firmware. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @core_mask: Mask of the enabled shader cores. ++ */ ++void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, ++ u64 core_mask); ++ ++/** ++ * kbase_csf_firmware_global_reinit_complete - Check the Global configuration ++ * requests, sent after the reboot of MCU firmware, have ++ * completed or not. ++ * ++ * Return: true if the Global configuration requests completed otherwise false. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_update_core_attr - Send the Global configuration request ++ * to update the requested core attribute ++ * changes. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @update_core_pwroff_timer: If true, signal the firmware needs to update ++ * the MCU power-off timer value. ++ * @update_core_mask: If true, need to do the core_mask update with ++ * the supplied core_mask value. ++ * @core_mask: New core mask value if update_core_mask is true, ++ * otherwise unused. ++ */ ++void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, ++ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask); ++ ++/** ++ * kbase_csf_firmware_core_attr_updated - Check the Global configuration ++ * request has completed or not, that was sent to update ++ * the core attributes. ++ * ++ * Return: true if the Global configuration request to update the core ++ * attributes has completed, otherwise false. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); ++ ++/** ++ * Request the global control block of CSF interface capabilities ++ * ++ * Return: Total number of CSs, summed across all groups. ++ * ++ * @kbdev: Kbase device. ++ * @group_data: Pointer where to store all the group data ++ * (sequentially). ++ * @max_group_num: The maximum number of groups to be read. ++ * Can be 0, in which case group_data is unused. ++ * @stream_data: Pointer where to store all the CS data ++ * (sequentially). ++ * @max_total_stream_num: The maximum number of CSs to be read. ++ * Can be 0, in which case stream_data is unused. ++ * @glb_version: Where to store the global interface version. ++ * @features: Where to store a bit mask of features (e.g. ++ * whether certain types of job can be suspended). ++ * @group_num: Where to store the number of CSGs ++ * supported. ++ * @prfcnt_size: Where to store the size of CSF performance counters, ++ * in bytes. Bits 31:16 hold the size of firmware ++ * performance counter data and 15:0 hold the size of ++ * hardware performance counter data. ++ * @instr_features: Instrumentation features. Bits 7:4 hold the max size ++ * of events. Bits 3:0 hold the offset update rate. ++ * (csf >= 1,1,0) ++ */ ++u32 kbase_csf_firmware_get_glb_iface( ++ struct kbase_device *kbdev, struct basep_cs_group_control *group_data, ++ u32 max_group_num, struct basep_cs_stream_control *stream_data, ++ u32 max_total_stream_num, u32 *glb_version, u32 *features, ++ u32 *group_num, u32 *prfcnt_size, u32 *instr_features); ++ ++/** ++ * Get CSF firmware header timeline metadata content ++ * ++ * Return: The firmware timeline metadata content which match @p name. ++ * ++ * @kbdev: Kbase device. ++ * @name: Name of the metadata which metadata content to be returned. ++ * @size: Metadata size if specified metadata found. ++ */ ++const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, ++ const char *name, size_t *size); ++ ++/** ++ * kbase_csf_firmware_mcu_shared_mapping_init - ++ * Allocate and map MCU shared memory. ++ * ++ * This helper function allocates memory and maps it on both the CPU ++ * and the GPU address spaces. Most of the properties of the mapping ++ * are implicit and will be automatically determined by the function, ++ * e.g. whether memory is cacheable. ++ * ++ * The client is only expected to specify whether the mapping is readable ++ * or writable in the CPU and the GPU address spaces; any other flag ++ * will be ignored by the function. ++ * ++ * Return: 0 if success, or an error code on failure. ++ * ++ * @kbdev: Kbase device the memory mapping shall belong to. ++ * @num_pages: Number of memory pages to map. ++ * @cpu_map_properties: Either PROT_READ or PROT_WRITE. ++ * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR. ++ * @csf_mapping: Object where to write metadata for the memory mapping. ++ */ ++int kbase_csf_firmware_mcu_shared_mapping_init( ++ struct kbase_device *kbdev, ++ unsigned int num_pages, ++ unsigned long cpu_map_properties, ++ unsigned long gpu_map_properties, ++ struct kbase_csf_mapping *csf_mapping); ++ ++/** ++ * kbase_csf_firmware_mcu_shared_mapping_term - Unmap and free MCU shared memory. ++ * ++ * @kbdev: Device pointer. ++ * @csf_mapping: Metadata of the memory mapping to terminate. ++ */ ++void kbase_csf_firmware_mcu_shared_mapping_term( ++ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping); ++ ++#ifndef MALI_KBASE_BUILD ++/** ++ * mali_kutf_process_fw_utf_entry() - Process the "Firmware UTF tests" section ++ * ++ * Read "Firmware UTF tests" section from the firmware image and create ++ * necessary kutf app+suite+tests. ++ * ++ * Return: 0 if successful, negative error code on failure. In both cases ++ * caller will have to invoke mali_kutf_fw_utf_entry_cleanup for the cleanup ++ * ++ * @kbdev: Kbase device structure ++ * @fw_data: Pointer to the start of firmware binary image loaded from disk ++ * @fw_size: Size (in bytes) of the firmware image ++ * @entry: Pointer to the start of the section ++ */ ++int mali_kutf_process_fw_utf_entry(struct kbase_device *kbdev, ++ const void *fw_data, size_t fw_size, const u32 *entry); ++ ++/** ++ * mali_kutf_fw_utf_entry_cleanup() - Remove the Fw UTF tests debugfs entries ++ * ++ * Destroy the kutf apps+suites+tests created on parsing "Firmware UTF tests" ++ * section from the firmware image. ++ * ++ * @kbdev: Kbase device structure ++ */ ++void mali_kutf_fw_utf_entry_cleanup(struct kbase_device *kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_DEBUG ++extern bool fw_debug; ++#endif ++ ++static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs) ++{ ++#ifdef CONFIG_MALI_DEBUG ++ return (fw_debug ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(msecs)); ++#else ++ return msecs_to_jiffies(msecs); ++#endif ++} ++ ++/** ++ * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis ++ * monitoring operation ++ * ++ * Program the firmware interface with its configured hysteresis count value ++ * and enable the firmware to act on it. The Caller is ++ * assumed to hold the kbdev->csf.scheduler.interrupt_lock. ++ * ++ * @kbdev: Kbase device structure ++ */ ++void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time ++ * hysteresis monitoring operation ++ * ++ * Program the firmware interface to disable the idle hysteresis timer. The ++ * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock. ++ * ++ * @kbdev: Kbase device structure ++ */ ++void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_get_gpu_idle_hysteresis_time - Get the firmware GPU idle ++ * detection hysteresis duration ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: the internally recorded hysteresis (nominal) value. ++ */ ++u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_set_gpu_idle_hysteresis_time - Set the firmware GPU idle ++ * detection hysteresis duration ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @dur: The duration value (unit: milliseconds) for the configuring ++ * hysteresis field for GPU idle detection ++ * ++ * The supplied value will be recorded internally without any change. But the ++ * actual field value will be subject to hysteresis source frequency scaling ++ * and maximum value limiting. The default source will be SYSTEM_TIMESTAMP ++ * counter. But in case the platform is not able to supply it, the GPU ++ * CYCLE_COUNTER source will be used as an alternative. Bit-31 on the ++ * returned value is the source configuration flag, and it is set to '1' ++ * when CYCLE_COUNTER alternative source is used. ++ * ++ * Return: the actual internally configured hysteresis field value. ++ */ ++u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur); ++ ++/** ++ * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU core power-off ++ * time value ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: the internally recorded MCU core power-off (nominal) value. The unit ++ * of the value is in micro-seconds. ++ */ ++u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU core power-off ++ * time value ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @dur: The duration value (unit: micro-seconds) for configuring MCU ++ * core power-off timer, when the shader cores' power ++ * transitions are delegated to the MCU (normal operational ++ * mode) ++ * ++ * The supplied value will be recorded internally without any change. But the ++ * actual field value will be subject to core power-off timer source frequency ++ * scaling and maximum value limiting. The default source will be ++ * SYSTEM_TIMESTAMP counter. But in case the platform is not able to supply it, ++ * the GPU CYCLE_COUNTER source will be used as an alternative. Bit-31 on the ++ * returned value is the source configuration flag, and it is set to '1' ++ * when CYCLE_COUNTER alternative source is used. ++ * ++ * The configured MCU core power-off timer will only have effect when the host ++ * driver has delegated the shader cores' power management to MCU. ++ * ++ * Return: the actual internal core power-off timer value in register defined ++ * format. ++ */ ++u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur); ++ ++/** ++ * kbase_csf_interface_version - Helper function to build the full firmware ++ * interface version in a format compatible with ++ * with GLB_VERSION register ++ * ++ * @major: major version of csf interface ++ * @minor: minor version of csf interface ++ * @patch: patch version of csf interface ++ * ++ * Return: firmware interface version ++ */ ++static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) ++{ ++ return ((major << GLB_VERSION_MAJOR_SHIFT) | ++ (minor << GLB_VERSION_MINOR_SHIFT) | ++ (patch << GLB_VERSION_PATCH_SHIFT)); ++} ++ ++/** ++ * kbase_csf_trigger_firmware_config_update - Send a firmware config update. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Any changes done to firmware configuration entry or tracebuffer entry ++ * requires a GPU silent reset to reflect the configuration changes ++ * requested, but if Firmware.header.entry.bit(30) is set then we can request a ++ * FIRMWARE_CONFIG_UPDATE rather than doing a silent reset. ++ * ++ * Return: 0 if success, or negative error code on failure. ++ */ ++int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); ++#endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c +new file mode 100644 +index 0000000..f00acb1 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c +@@ -0,0 +1,327 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include "mali_kbase_csf_firmware_cfg.h" ++#include ++ ++#if CONFIG_SYSFS ++#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config" ++ ++/** ++ * struct firmware_config - Configuration item within the MCU firmware ++ * ++ * The firmware may expose configuration options. Each option has a name, the ++ * address where the option is controlled and the minimum and maximum values ++ * that the option can take. ++ * ++ * @node: List head linking all options to ++ * kbase_device:csf.firmware_config ++ * @kbdev: Pointer to the Kbase device ++ * @kobj: Kobject corresponding to the sysfs sub-directory, ++ * inside CSF_FIRMWARE_CFG_SYSFS_DIR_NAME directory, ++ * representing the configuration option @name. ++ * @kobj_inited: kobject initialization state ++ * @updatable: Indicates whether config items can be updated with ++ * FIRMWARE_CONFIG_UPDATE ++ * @name: NUL-terminated string naming the option ++ * @address: The address in the firmware image of the configuration option ++ * @min: The lowest legal value of the configuration option ++ * @max: The maximum legal value of the configuration option ++ * @cur_val: The current value of the configuration option ++ */ ++struct firmware_config { ++ struct list_head node; ++ struct kbase_device *kbdev; ++ struct kobject kobj; ++ bool kobj_inited; ++ bool updatable; ++ char *name; ++ u32 address; ++ u32 min; ++ u32 max; ++ u32 cur_val; ++}; ++ ++#define FW_CFG_ATTR(_name, _mode) \ ++ struct attribute fw_cfg_attr_##_name = { \ ++ .name = __stringify(_name), \ ++ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ ++ } ++ ++static FW_CFG_ATTR(min, S_IRUGO); ++static FW_CFG_ATTR(max, S_IRUGO); ++static FW_CFG_ATTR(cur, S_IRUGO | S_IWUSR); ++ ++static void fw_cfg_kobj_release(struct kobject *kobj) ++{ ++ struct firmware_config *config = ++ container_of(kobj, struct firmware_config, kobj); ++ ++ kfree(config); ++} ++ ++static ssize_t show_fw_cfg(struct kobject *kobj, ++ struct attribute *attr, char *buf) ++{ ++ struct firmware_config *config = ++ container_of(kobj, struct firmware_config, kobj); ++ struct kbase_device *kbdev = config->kbdev; ++ u32 val = 0; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ if (attr == &fw_cfg_attr_max) ++ val = config->max; ++ else if (attr == &fw_cfg_attr_min) ++ val = config->min; ++ else if (attr == &fw_cfg_attr_cur) { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ val = config->cur_val; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ dev_warn(kbdev->dev, ++ "Unexpected read from entry %s/%s", ++ config->name, attr->name); ++ return -EINVAL; ++ } ++ ++ return snprintf(buf, PAGE_SIZE, "%u\n", val); ++} ++ ++static ssize_t store_fw_cfg(struct kobject *kobj, ++ struct attribute *attr, ++ const char *buf, ++ size_t count) ++{ ++ struct firmware_config *config = ++ container_of(kobj, struct firmware_config, kobj); ++ struct kbase_device *kbdev = config->kbdev; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ if (attr == &fw_cfg_attr_cur) { ++ unsigned long flags; ++ u32 val; ++ int ret = kstrtouint(buf, 0, &val); ++ ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Couldn't process %s/%s write operation.\n" ++ "Use format \n", ++ config->name, attr->name); ++ return -EINVAL; ++ } ++ ++ if ((val < config->min) || (val > config->max)) ++ return -EINVAL; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (config->cur_val == val) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return count; ++ } ++ ++ /* If configuration update cannot be performed with ++ * FIRMWARE_CONFIG_UPDATE then we need to do a ++ * silent reset before we update the memory. ++ */ ++ if (!config->updatable) { ++ /* ++ * If there is already a GPU reset pending then inform ++ * the User to retry the write. ++ */ ++ if (kbase_reset_gpu_silent(kbdev)) { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, ++ flags); ++ return -EAGAIN; ++ } ++ } ++ ++ /* ++ * GPU reset request has been placed, now update the ++ * firmware image. GPU reset will take place only after ++ * hwaccess_lock is released. ++ * Update made to firmware image in memory would not ++ * be lost on GPU reset as configuration entries reside ++ * in the RONLY section of firmware image, which is not ++ * reloaded on firmware reboot due to GPU reset. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, config->address, val); ++ ++ config->cur_val = val; ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* If we can update the config without firmware reset then ++ * we need to just trigger FIRMWARE_CONFIG_UPDATE. ++ */ ++ if (config->updatable) { ++ ret = kbase_csf_trigger_firmware_config_update(kbdev); ++ if (ret) ++ return ret; ++ } ++ ++ /* Wait for the config update to take effect */ ++ if (!config->updatable) ++ kbase_reset_gpu_wait(kbdev); ++ } else { ++ dev_warn(kbdev->dev, ++ "Unexpected write to entry %s/%s", ++ config->name, attr->name); ++ return -EINVAL; ++ } ++ ++ return count; ++} ++ ++static const struct sysfs_ops fw_cfg_ops = { ++ .show = &show_fw_cfg, ++ .store = &store_fw_cfg, ++}; ++ ++static struct attribute *fw_cfg_attrs[] = { ++ &fw_cfg_attr_min, ++ &fw_cfg_attr_max, ++ &fw_cfg_attr_cur, ++ NULL, ++}; ++ ++static struct kobj_type fw_cfg_kobj_type = { ++ .release = &fw_cfg_kobj_release, ++ .sysfs_ops = &fw_cfg_ops, ++ .default_attrs = fw_cfg_attrs, ++}; ++ ++int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) ++{ ++ struct firmware_config *config; ++ ++ kbdev->csf.fw_cfg_kobj = kobject_create_and_add( ++ CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj); ++ if (!kbdev->csf.fw_cfg_kobj) { ++ kobject_put(kbdev->csf.fw_cfg_kobj); ++ dev_err(kbdev->dev, ++ "Creation of %s sysfs sub-directory failed\n", ++ CSF_FIRMWARE_CFG_SYSFS_DIR_NAME); ++ return -ENOMEM; ++ } ++ ++ list_for_each_entry(config, &kbdev->csf.firmware_config, node) { ++ int err; ++ ++ kbase_csf_read_firmware_memory(kbdev, config->address, ++ &config->cur_val); ++ ++ err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, ++ kbdev->csf.fw_cfg_kobj, "%s", config->name); ++ if (err) { ++ kobject_put(&config->kobj); ++ dev_err(kbdev->dev, ++ "Creation of %s sysfs sub-directory failed\n", ++ config->name); ++ return err; ++ } ++ ++ config->kobj_inited = true; ++ } ++ ++ return 0; ++} ++ ++void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) ++{ ++ while (!list_empty(&kbdev->csf.firmware_config)) { ++ struct firmware_config *config; ++ ++ config = list_first_entry(&kbdev->csf.firmware_config, ++ struct firmware_config, node); ++ list_del(&config->node); ++ ++ if (config->kobj_inited) { ++ kobject_del(&config->kobj); ++ kobject_put(&config->kobj); ++ } else ++ kfree(config); ++ } ++ ++ kobject_del(kbdev->csf.fw_cfg_kobj); ++ kobject_put(kbdev->csf.fw_cfg_kobj); ++} ++ ++int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, ++ const struct firmware *fw, ++ const u32 *entry, ++ unsigned int size, bool updatable) ++{ ++ const char *name = (char *)&entry[3]; ++ struct firmware_config *config; ++ const unsigned int name_len = size - CONFIGURATION_ENTRY_NAME_OFFSET; ++ ++ /* Allocate enough space for struct firmware_config and the ++ * configuration option name (with NULL termination) ++ */ ++ config = kzalloc(sizeof(*config) + name_len + 1, GFP_KERNEL); ++ ++ if (!config) ++ return -ENOMEM; ++ ++ config->kbdev = kbdev; ++ config->updatable = updatable; ++ config->name = (char *)(config+1); ++ config->address = entry[0]; ++ config->min = entry[1]; ++ config->max = entry[2]; ++ ++ memcpy(config->name, name, name_len); ++ config->name[name_len] = 0; ++ ++ list_add(&config->node, &kbdev->csf.firmware_config); ++ ++ dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", ++ config->name, config->address, ++ config->min, config->max); ++ ++ return 0; ++} ++#else ++int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) ++{ ++ return 0; ++} ++ ++void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) ++{ ++ /* !CONFIG_SYSFS: Nothing to do here */ ++} ++ ++int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, ++ const struct firmware *fw, ++ const u32 *entry, unsigned int size) ++{ ++ return 0; ++} ++#endif /* CONFIG_SYSFS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h +new file mode 100644 +index 0000000..080c154 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h +@@ -0,0 +1,74 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_FIRMWARE_CFG_H_ ++#define _KBASE_CSF_FIRMWARE_CFG_H_ ++ ++#include ++#include "mali_kbase_csf_firmware.h" ++#include ++ ++#define CONFIGURATION_ENTRY_NAME_OFFSET (0xC) ++ ++/** ++ * kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration ++ * options present in firmware image. ++ * ++ * This function would create a sysfs directory and populate it with a ++ * sub-directory, that would contain a file per attribute, for every ++ * configuration option parsed from firmware image. ++ * ++ * @kbdev: Pointer to the Kbase device ++ * ++ * Return: The initialization error code. ++ */ ++int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_cfg_term - Delete the sysfs directory that was created ++ * for firmware configuration options. ++ * ++ * @kbdev: Pointer to the Kbase device ++ * ++ */ ++void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_cfg_option_entry_parse() - Process a ++ * "configuration option" section. ++ * ++ * Read a "configuration option" section adding it to the ++ * kbase_device:csf.firmware_config list. ++ * ++ * Return: 0 if successful, negative error code on failure ++ * ++ * @kbdev: Kbase device structure ++ * @fw: Firmware image containing the section ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section ++ * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE ++ */ ++int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, ++ const struct firmware *fw, ++ const u32 *entry, ++ unsigned int size, ++ bool updatable); ++#endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c +new file mode 100644 +index 0000000..ae2ad33 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c +@@ -0,0 +1,1389 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_csf_timeout.h" ++#include "mali_kbase_mem.h" ++#include "mali_kbase_reset_gpu.h" ++#include "mali_kbase_ctx_sched.h" ++#include "device/mali_kbase_device.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "mali_kbase_csf_scheduler.h" ++#include "mmu/mali_kbase_mmu.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) ++#include ++#endif ++#include ++ ++#ifdef CONFIG_MALI_DEBUG ++/* Makes Driver wait indefinitely for an acknowledgment for the different ++ * requests it sends to firmware. Otherwise the timeouts interfere with the ++ * use of debugger for source-level debugging of firmware as Driver initiates ++ * a GPU reset when a request times out, which always happen when a debugger ++ * is connected. ++ */ ++bool fw_debug; /* Default value of 0/false */ ++module_param(fw_debug, bool, 0444); ++MODULE_PARM_DESC(fw_debug, ++ "Enables effective use of a debugger for debugging firmware code."); ++#endif ++ ++#define DUMMY_FW_PAGE_SIZE SZ_4K ++ ++/** ++ * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware CSs ++ * ++ * @cs_kernel_input: CS kernel input memory region ++ * @cs_kernel_output: CS kernel output memory region ++ */ ++struct dummy_firmware_csi { ++ u8 cs_kernel_input[DUMMY_FW_PAGE_SIZE]; ++ u8 cs_kernel_output[DUMMY_FW_PAGE_SIZE]; ++}; ++ ++/** ++ * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware CSGs ++ * ++ * @csg_input: CSG kernel input memory region ++ * @csg_output: CSG kernel output memory region ++ * @csi: Dummy firmware CSIs ++ */ ++struct dummy_firmware_csg { ++ u8 csg_input[DUMMY_FW_PAGE_SIZE]; ++ u8 csg_output[DUMMY_FW_PAGE_SIZE]; ++ struct dummy_firmware_csi csi[8]; ++} dummy_firmware_csg; ++ ++/** ++ * struct dummy_firmware_interface - Represents a dummy interface in the MCU firmware ++ * ++ * @global_input: Global input memory region ++ * @global_output: Global output memory region ++ * @csg: Dummy firmware CSGs ++ * @node: Interface objects are on the kbase_device:csf.firmware_interfaces ++ * list using this list_head to link them ++ */ ++struct dummy_firmware_interface { ++ u8 global_input[DUMMY_FW_PAGE_SIZE]; ++ u8 global_output[DUMMY_FW_PAGE_SIZE]; ++ struct dummy_firmware_csg csg[8]; ++ struct list_head node; ++} dummy_firmware_interface; ++ ++#define CSF_GLB_REQ_CFG_MASK \ ++ (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ ++ GLB_REQ_CFG_PWROFF_TIMER_MASK) ++ ++static inline u32 input_page_read(const u32 *const input, const u32 offset) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ return input[offset / sizeof(u32)]; ++} ++ ++static inline void input_page_write(u32 *const input, const u32 offset, ++ const u32 value) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ input[offset / sizeof(u32)] = value; ++} ++ ++static inline void input_page_partial_write(u32 *const input, const u32 offset, ++ u32 value, u32 mask) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ input[offset / sizeof(u32)] = ++ (input_page_read(input, offset) & ~mask) | (value & mask); ++} ++ ++static inline u32 output_page_read(const u32 *const output, const u32 offset) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ return output[offset / sizeof(u32)]; ++} ++ ++static inline void output_page_write(u32 *const output, const u32 offset, ++ const u32 value) ++{ ++ WARN_ON(offset % sizeof(u32)); ++ ++ output[offset / sizeof(u32)] = value; ++} ++ ++/** ++ * invent_memory_setup_entry() - Invent an "interface memory setup" section ++ * ++ * Invent an "interface memory setup" section similar to one from a firmware ++ * image. If successful the interface will be added to the ++ * kbase_device:csf.firmware_interfaces list. ++ * ++ * Return: 0 if successful, negative error code on failure ++ * ++ * @kbdev: Kbase device structure ++ */ ++static int invent_memory_setup_entry(struct kbase_device *kbdev) ++{ ++ struct dummy_firmware_interface *interface = NULL; ++ ++ /* Allocate enough memory for the struct dummy_firmware_interface. ++ */ ++ interface = kzalloc(sizeof(*interface), GFP_KERNEL); ++ if (!interface) ++ return -ENOMEM; ++ ++ kbdev->csf.shared_interface = interface; ++ list_add(&interface->node, &kbdev->csf.firmware_interfaces); ++ ++ /* NO_MALI: Don't insert any firmware pages */ ++ return 0; ++} ++ ++static void free_global_iface(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ ++ if (iface->groups) { ++ unsigned int gid; ++ ++ for (gid = 0; gid < iface->group_num; ++gid) ++ kfree(iface->groups[gid].streams); ++ ++ kfree(iface->groups); ++ iface->groups = NULL; ++ } ++} ++ ++static int invent_cmd_stream_group_info(struct kbase_device *kbdev, ++ struct kbase_csf_cmd_stream_group_info *ginfo, ++ struct dummy_firmware_csg *csg) ++{ ++ unsigned int sid; ++ ++ ginfo->input = csg->csg_input; ++ ginfo->output = csg->csg_output; ++ ++ ginfo->kbdev = kbdev; ++ ginfo->features = 0; ++ ginfo->suspend_size = 64; ++ ginfo->protm_suspend_size = 64; ++ ginfo->stream_num = ARRAY_SIZE(csg->csi); ++ ginfo->stream_stride = 0; ++ ++ ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL); ++ if (ginfo->streams == NULL) { ++ return -ENOMEM; ++ } ++ ++ for (sid = 0; sid < ginfo->stream_num; ++sid) { ++ struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid]; ++ struct dummy_firmware_csi *csi = &csg->csi[sid]; ++ ++ stream->input = csi->cs_kernel_input; ++ stream->output = csi->cs_kernel_output; ++ ++ stream->kbdev = kbdev; ++ stream->features = ++ STREAM_FEATURES_WORK_REGISTERS_SET(0, 80) | ++ STREAM_FEATURES_SCOREBOARDS_SET(0, 8) | ++ STREAM_FEATURES_COMPUTE_SET(0, 1) | ++ STREAM_FEATURES_FRAGMENT_SET(0, 1) | ++ STREAM_FEATURES_TILER_SET(0, 1); ++ } ++ ++ return 0; ++} ++ ++static int invent_capabilities(struct kbase_device *kbdev) ++{ ++ struct dummy_firmware_interface *interface = kbdev->csf.shared_interface; ++ struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; ++ unsigned int gid; ++ ++ iface->input = interface->global_input; ++ iface->output = interface->global_output; ++ ++ iface->version = 1; ++ iface->kbdev = kbdev; ++ iface->features = 0; ++ iface->prfcnt_size = 64; ++ ++ if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { ++ /* update rate=1, max event size = 1<<8 = 256 */ ++ iface->instr_features = 0x81; ++ } else { ++ iface->instr_features = 0; ++ } ++ ++ iface->group_num = ARRAY_SIZE(interface->csg); ++ iface->group_stride = 0; ++ ++ iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL); ++ if (iface->groups == NULL) { ++ return -ENOMEM; ++ } ++ ++ for (gid = 0; gid < iface->group_num; ++gid) { ++ int err; ++ ++ err = invent_cmd_stream_group_info(kbdev, &iface->groups[gid], ++ &interface->csg[gid]); ++ if (err < 0) { ++ free_global_iface(kbdev); ++ return err; ++ } ++ } ++ ++ return 0; ++} ++ ++void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 *value) ++{ ++ /* NO_MALI: Nothing to do here */ ++} ++ ++ ++void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, ++ u32 gpu_addr, u32 value) ++{ ++ /* NO_MALI: Nothing to do here */ ++} ++ ++void kbase_csf_firmware_cs_input( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); ++ input_page_write(info->input, offset, value); ++ ++ if (offset == CS_REQ) { ++ /* NO_MALI: Immediately acknowledge requests */ ++ output_page_write(info->output, CS_ACK, value); ++ } ++} ++ ++u32 kbase_csf_firmware_cs_input_read( ++ const struct kbase_csf_cmd_stream_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); ++ ++ dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_cs_input_mask( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset, ++ const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ ++ /* NO_MALI: Go through kbase_csf_firmware_cs_input to capture writes */ ++ kbase_csf_firmware_cs_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); ++} ++ ++u32 kbase_csf_firmware_cs_output( ++ const struct kbase_csf_cmd_stream_info *const info, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); ++ ++ dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_csg_input( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", ++ offset, value); ++ input_page_write(info->input, offset, value); ++ ++ if (offset == CSG_REQ) { ++ /* NO_MALI: Immediately acknowledge requests */ ++ output_page_write(info->output, CSG_ACK, value); ++ } ++} ++ ++u32 kbase_csf_firmware_csg_input_read( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = input_page_read(info->input, offset); ++ ++ dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++void kbase_csf_firmware_csg_input_mask( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset, const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ ++ dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ ++ /* NO_MALI: Go through kbase_csf_firmware_csg_input to capture writes */ ++ kbase_csf_firmware_csg_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); ++} ++ ++u32 kbase_csf_firmware_csg_output( ++ const struct kbase_csf_cmd_stream_group_info *const info, ++ const u32 offset) ++{ ++ const struct kbase_device * const kbdev = info->kbdev; ++ u32 const val = output_page_read(info->output, offset); ++ ++ dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++static void ++csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface, ++ const u32 glb_req) ++{ ++ struct kbase_device *kbdev = iface->kbdev; ++ u32 glb_ack = output_page_read(iface->output, GLB_ACK); ++ /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of ++ * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters. ++ */ ++ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { ++ /* NO_MALI only uses the first buffer in the ring buffer. */ ++ input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0); ++ output_page_write(iface->output, GLB_PRFCNT_INSERT, 1); ++ kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE); ++ } ++ ++ /* Propagate enable masks to model if request to enable. */ ++ if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) { ++ u32 tiler_en, l2_en, sc_en; ++ ++ tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN); ++ l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN); ++ sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN); ++ ++ /* NO_MALI platform enabled all CSHW counters by default. */ ++ kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en); ++ kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en); ++ kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en); ++ } ++} ++ ++void kbase_csf_firmware_global_input( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); ++ input_page_write(iface->input, offset, value); ++ ++ if (offset == GLB_REQ) { ++ csf_firmware_prfcnt_process(iface, value); ++ /* NO_MALI: Immediately acknowledge requests */ ++ output_page_write(iface->output, GLB_ACK, value); ++ } ++} ++ ++void kbase_csf_firmware_global_input_mask( ++ const struct kbase_csf_global_iface *const iface, const u32 offset, ++ const u32 value, const u32 mask) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ ++ dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", ++ offset, value, mask); ++ ++ /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ ++ kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); ++} ++ ++u32 kbase_csf_firmware_global_input_read( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = input_page_read(iface->input, offset); ++ ++ dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++u32 kbase_csf_firmware_global_output( ++ const struct kbase_csf_global_iface *const iface, const u32 offset) ++{ ++ const struct kbase_device * const kbdev = iface->kbdev; ++ u32 const val = output_page_read(iface->output, offset); ++ ++ dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); ++ return val; ++} ++ ++/** ++ * handle_internal_firmware_fatal - Handler for CS internal firmware fault. ++ * ++ * @kbdev: Pointer to kbase device ++ * ++ * Report group fatal error to user space for all GPU command queue groups ++ * in the device, terminate them and reset GPU. ++ */ ++static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) ++{ ++ int as; ++ ++ for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { ++ unsigned long flags; ++ struct kbase_context *kctx; ++ struct kbase_fault fault; ++ ++ if (as == MCU_AS_NR) ++ continue; ++ ++ /* Only handle the fault for an active address space. Lock is ++ * taken here to atomically get reference to context in an ++ * active address space and retain its refcount. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); ++ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ continue; ++ } ++ ++ fault = (struct kbase_fault) { ++ .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, ++ }; ++ ++ kbase_csf_ctx_handle_fault(kctx, &fault); ++ kbase_ctx_sched_release_ctx_lock(kctx); ++ } ++ ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++} ++ ++/** ++ * firmware_error_worker - Worker function for handling firmware internal error ++ * ++ * @data: Pointer to a work_struct embedded in kbase device. ++ * ++ * Handle the CS internal firmware error ++ */ ++static void firmware_error_worker(struct work_struct *const data) ++{ ++ struct kbase_device *const kbdev = ++ container_of(data, struct kbase_device, csf.fw_error_work); ++ ++ handle_internal_firmware_fatal(kbdev); ++} ++ ++static bool global_request_complete(struct kbase_device *const kbdev, ++ u32 const req_mask) ++{ ++ struct kbase_csf_global_iface *global_iface = ++ &kbdev->csf.global_iface; ++ bool complete = false; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & ++ req_mask) == ++ (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & ++ req_mask)) ++ complete = true; ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ return complete; ++} ++ ++static int wait_for_global_request(struct kbase_device *const kbdev, ++ u32 const req_mask) ++{ ++ const long wait_timeout = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ long remaining; ++ int err = 0; ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ global_request_complete(kbdev, req_mask), ++ wait_timeout); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", ++ req_mask); ++ err = -ETIMEDOUT; ++ } ++ ++ return err; ++} ++ ++static void set_global_request( ++ const struct kbase_csf_global_iface *const global_iface, ++ u32 const req_mask) ++{ ++ u32 glb_req; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); ++ ++ glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); ++ glb_req ^= req_mask; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, ++ req_mask); ++} ++ ++static void enable_endpoints_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const shader_core_mask) ++{ ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, ++ shader_core_mask & U32_MAX); ++ kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, ++ shader_core_mask >> 32); ++ ++ set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); ++} ++ ++static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, ++ const struct kbase_csf_global_iface *const global_iface) ++{ ++ u32 pwroff_reg; ++ ++ if (kbdev->csf.firmware_hctl_core_pwr) ++ pwroff_reg = ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; ++ ++ kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, ++ pwroff_reg); ++ set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); ++ ++ /* Save the programed reg value in its shadow field */ ++ kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; ++} ++ ++static void set_timeout_global( ++ const struct kbase_csf_global_iface *const global_iface, ++ u64 const timeout) ++{ ++ kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, ++ timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); ++ ++ set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++} ++ ++static void global_init(struct kbase_device *const kbdev, u64 core_mask) ++{ ++ u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | ++ GLB_ACK_IRQ_MASK_PING_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | ++ GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | ++ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | ++ GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | ++ GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | ++ GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK; ++ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ /* Update shader core allocation enable mask */ ++ enable_endpoints_global(global_iface, core_mask); ++ enable_shader_poweroff_timer(kbdev, global_iface); ++ ++ set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); ++ ++ /* Unmask the interrupts */ ++ kbase_csf_firmware_global_input(global_iface, ++ GLB_ACK_IRQ_MASK, ack_irq_mask); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++/** ++ * global_init_on_boot - Sends a global request to control various features. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Currently only the request to enable endpoints and cycle counter is sent. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++static int global_init_on_boot(struct kbase_device *const kbdev) ++{ ++ unsigned long flags; ++ u64 core_mask; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ core_mask = kbase_pm_ca_get_core_mask(kbdev); ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ global_init(kbdev, core_mask); ++ ++ return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); ++} ++ ++void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, ++ u64 core_mask) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbdev->csf.glb_init_request_pending = true; ++ kbdev->csf.firmware_hctl_core_pwr = ++ kbase_pm_no_mcu_core_pwroff(kbdev); ++ global_init(kbdev, core_mask); ++} ++ ++bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ WARN_ON(!kbdev->csf.glb_init_request_pending); ++ ++ if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) ++ kbdev->csf.glb_init_request_pending = false; ++ ++ return !kbdev->csf.glb_init_request_pending; ++} ++ ++void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, ++ bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) ++{ ++ unsigned long flags; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ if (update_core_mask) ++ enable_endpoints_global(&kbdev->csf.global_iface, core_mask); ++ if (update_core_pwroff_timer) ++ enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | ++ GLB_REQ_CFG_PWROFF_TIMER_MASK); ++} ++ ++static void kbase_csf_firmware_reload_worker(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.firmware_reload_work); ++ unsigned long flags; ++ ++ /* Reboot the firmware */ ++ kbase_csf_firmware_enable_mcu(kbdev); ++ ++ /* Tell MCU state machine to transit to next state */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.firmware_reloaded = true; ++ kbase_pm_update_state(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbdev->csf.firmware_reloaded = false; ++ ++ if (kbdev->csf.firmware_reload_needed) { ++ kbdev->csf.firmware_reload_needed = false; ++ queue_work(system_wq, &kbdev->csf.firmware_reload_work); ++ } else { ++ kbase_csf_firmware_enable_mcu(kbdev); ++ kbdev->csf.firmware_reloaded = true; ++ } ++} ++ ++void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (unlikely(!kbdev->csf.firmware_inited)) ++ return; ++ ++ /* Tell MCU state machine to transit to next state */ ++ kbdev->csf.firmware_reloaded = true; ++ kbase_pm_update_state(kbdev); ++} ++ ++static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) ++{ ++#define HYSTERESIS_VAL_UNIT_SHIFT (10) ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_ms; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; ++ ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ dev_info(kbdev->dev, "Can't get the timestamp frequency, " ++ "use cycle counter format with firmware idle hysteresis!"); ++ } ++ ++ /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000); ++ ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; ++} ++ ++u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) ++{ ++ return kbdev->csf.gpu_idle_hysteresis_ms; ++} ++ ++u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) ++{ ++ unsigned long flags; ++ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbdev->csf.gpu_idle_hysteresis_ms = dur; ++ kbdev->csf.gpu_idle_dur_count = hysteresis_val; ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", ++ hysteresis_val); ++ ++ return hysteresis_val; ++} ++ ++static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) ++{ ++#define PWROFF_VAL_UNIT_SHIFT (10) ++ /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ ++ u64 freq = arch_timer_get_cntfrq(); ++ u64 dur_val = dur_us; ++ u32 cnt_val_u32, reg_val_u32; ++ bool src_system_timestamp = freq > 0; ++ ++ if (!src_system_timestamp) { ++ /* Get the cycle_counter source alternative */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (kbdev->pm.clk_rtm.clks[0]) ++ freq = kbdev->pm.clk_rtm.clks[0]->clock_val; ++ else ++ dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ dev_info(kbdev->dev, "Can't get the timestamp frequency, " ++ "use cycle counter with MCU Core Poweroff timer!"); ++ } ++ ++ /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ ++ dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; ++ dur_val = div_u64(dur_val, 1000000); ++ ++ /* Interface limits the value field to S32_MAX */ ++ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; ++ ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); ++ /* add the source flag */ ++ if (src_system_timestamp) ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); ++ else ++ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, ++ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); ++ ++ return reg_val_u32; ++} ++ ++u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) ++{ ++ return kbdev->csf.mcu_core_pwroff_dur_us; ++} ++ ++u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) ++{ ++ unsigned long flags; ++ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->csf.mcu_core_pwroff_dur_us = dur; ++ kbdev->csf.mcu_core_pwroff_dur_count = pwroff; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); ++ ++ return pwroff; ++} ++ ++int kbase_csf_firmware_early_init(struct kbase_device *kbdev) ++{ ++ init_waitqueue_head(&kbdev->csf.event_wait); ++ kbdev->csf.interrupt_received = false; ++ kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS; ++ ++ INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_config); ++ INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); ++ INIT_WORK(&kbdev->csf.firmware_reload_work, ++ kbase_csf_firmware_reload_worker); ++ INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); ++ ++ mutex_init(&kbdev->csf.reg_lock); ++ ++ return 0; ++} ++ ++int kbase_csf_firmware_init(struct kbase_device *kbdev) ++{ ++ int ret; ++ ++ lockdep_assert_held(&kbdev->fw_load_lock); ++ ++ if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) ++ return -EINVAL; ++ kbdev->as_free &= ~MCU_AS_BITMASK; ++ ++ ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, ++ BASE_MEM_GROUP_DEFAULT); ++ ++ if (ret != 0) { ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; ++ return ret; ++ } ++ ++ kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; ++ kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( ++ kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS); ++ ++ ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, ++ "Failed to setup the rb tree for managing shared interface segment\n"); ++ goto error; ++ } ++ ++ ret = invent_memory_setup_entry(kbdev); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to load firmware entry\n"); ++ goto error; ++ } ++ ++ /* Make sure L2 cache is powered up */ ++ kbase_pm_wait_for_l2_powered(kbdev); ++ ++ /* NO_MALI: Don't init trace buffers */ ++ ++ /* NO_MALI: Don't load the MMU tables or boot CSF firmware */ ++ ++ ret = invent_capabilities(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_doorbell_mapping_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_setup_dummy_user_reg_page(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_scheduler_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = kbase_csf_timeout_init(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ ret = global_init_on_boot(kbdev); ++ if (ret != 0) ++ goto error; ++ ++ return 0; ++ ++error: ++ kbase_csf_firmware_term(kbdev); ++ return ret; ++} ++ ++void kbase_csf_firmware_term(struct kbase_device *kbdev) ++{ ++ cancel_work_sync(&kbdev->csf.fw_error_work); ++ ++ kbase_csf_timeout_term(kbdev); ++ ++ /* NO_MALI: Don't stop firmware or unload MMU tables */ ++ ++ kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); ++ ++ kbase_csf_scheduler_term(kbdev); ++ ++ kbase_csf_free_dummy_user_reg_page(kbdev); ++ ++ kbase_csf_doorbell_mapping_term(kbdev); ++ ++ free_global_iface(kbdev); ++ ++ /* Release the address space */ ++ kbdev->as_free |= MCU_AS_BITMASK; ++ ++ while (!list_empty(&kbdev->csf.firmware_interfaces)) { ++ struct dummy_firmware_interface *interface; ++ ++ interface = list_first_entry(&kbdev->csf.firmware_interfaces, ++ struct dummy_firmware_interface, node); ++ list_del(&interface->node); ++ ++ /* NO_MALI: No cleanup in dummy interface necessary */ ++ ++ kfree(interface); ++ } ++ ++ /* NO_MALI: No trace buffers to terminate */ ++ ++#ifndef MALI_KBASE_BUILD ++ mali_kutf_fw_utf_entry_cleanup(kbdev); ++#endif ++ ++ mutex_destroy(&kbdev->csf.reg_lock); ++ ++ /* This will also free up the region allocated for the shared interface ++ * entry parsed from the firmware image. ++ */ ++ kbase_mcu_shared_interface_region_tracker_term(kbdev); ++} ++ ++void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ u32 glb_req; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ /* The scheduler is assumed to only call the enable when its internal ++ * state indicates that the idle timer has previously been disabled. So ++ * on entry the expected field values are: ++ * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 ++ * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 ++ */ ++ ++ glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); ++ if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) ++ dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); ++ ++ kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, ++ kbdev->csf.gpu_idle_dur_count); ++ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); ++ ++ dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", ++ kbdev->csf.gpu_idle_dur_count); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ GLB_REQ_REQ_IDLE_DISABLE, ++ GLB_REQ_IDLE_DISABLE_MASK); ++ ++ dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); ++ ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++void kbase_csf_firmware_ping(struct kbase_device *const kbdev) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_PING_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) ++{ ++ kbase_csf_firmware_ping(kbdev); ++ return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); ++} ++ ++int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, ++ u64 const timeout) ++{ ++ const struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err; ++ ++ /* The 'reg_lock' is also taken and is held till the update is not ++ * complete, to ensure the update of timeout value by multiple Users ++ * gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_timeout_global(global_iface, timeout); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ return err; ++} ++ ++void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); ++ dev_dbg(kbdev->dev, "Sending request to enter protected mode"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); ++} ++ ++void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ set_global_request(global_iface, GLB_REQ_HALT_MASK); ++ dev_dbg(kbdev->dev, "Sending request to HALT MCU"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ unsigned long flags; ++ int err = 0; ++ ++ /* The 'reg_lock' is also taken and is held till the update is ++ * complete, to ensure the config update gets serialized. ++ */ ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ ++ set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ err = wait_for_global_request(kbdev, ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ return err; ++} ++ ++/** ++ * copy_grp_and_stm - Copy CS and/or group data ++ * ++ * @iface: Global CSF interface provided by ++ * the firmware. ++ * @group_data: Pointer where to store all the group data ++ * (sequentially). ++ * @max_group_num: The maximum number of groups to be read. Can be 0, in ++ * which case group_data is unused. ++ * @stream_data: Pointer where to store all the stream data ++ * (sequentially). ++ * @max_total_stream_num: The maximum number of streams to be read. ++ * Can be 0, in which case stream_data is unused. ++ * ++ * Return: Total number of CSs, summed across all groups. ++ */ ++static u32 copy_grp_and_stm( ++ const struct kbase_csf_global_iface * const iface, ++ struct basep_cs_group_control * const group_data, ++ u32 max_group_num, ++ struct basep_cs_stream_control * const stream_data, ++ u32 max_total_stream_num) ++{ ++ u32 i, total_stream_num = 0; ++ ++ if (WARN_ON((max_group_num > 0) && !group_data)) ++ max_group_num = 0; ++ ++ if (WARN_ON((max_total_stream_num > 0) && !stream_data)) ++ max_total_stream_num = 0; ++ ++ for (i = 0; i < iface->group_num; i++) { ++ u32 j; ++ ++ if (i < max_group_num) { ++ group_data[i].features = iface->groups[i].features; ++ group_data[i].stream_num = iface->groups[i].stream_num; ++ group_data[i].suspend_size = ++ iface->groups[i].suspend_size; ++ } ++ for (j = 0; j < iface->groups[i].stream_num; j++) { ++ if (total_stream_num < max_total_stream_num) ++ stream_data[total_stream_num].features = ++ iface->groups[i].streams[j].features; ++ total_stream_num++; ++ } ++ } ++ ++ return total_stream_num; ++} ++ ++u32 kbase_csf_firmware_get_glb_iface( ++ struct kbase_device *kbdev, ++ struct basep_cs_group_control *const group_data, ++ u32 const max_group_num, ++ struct basep_cs_stream_control *const stream_data, ++ u32 const max_total_stream_num, u32 *const glb_version, ++ u32 *const features, u32 *const group_num, u32 *const prfcnt_size, ++ u32 *const instr_features) ++{ ++ const struct kbase_csf_global_iface * const iface = ++ &kbdev->csf.global_iface; ++ ++ if (WARN_ON(!glb_version) || WARN_ON(!features) || ++ WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || ++ WARN_ON(!instr_features)) ++ return 0; ++ ++ *glb_version = iface->version; ++ *features = iface->features; ++ *group_num = iface->group_num; ++ *prfcnt_size = iface->prfcnt_size; ++ *instr_features = iface->instr_features; ++ ++ return copy_grp_and_stm(iface, group_data, max_group_num, ++ stream_data, max_total_stream_num); ++} ++ ++const char *kbase_csf_firmware_get_timeline_metadata( ++ struct kbase_device *kbdev, const char *name, size_t *size) ++{ ++ if (WARN_ON(!kbdev) || ++ WARN_ON(!name) || ++ WARN_ON(!size)) { ++ return NULL; ++ } ++ ++ *size = 0; ++ return NULL; ++} ++ ++void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) ++{ ++ /* NO_MALI: Nothing to do here */ ++} ++ ++int kbase_csf_firmware_mcu_shared_mapping_init( ++ struct kbase_device *kbdev, ++ unsigned int num_pages, ++ unsigned long cpu_map_properties, ++ unsigned long gpu_map_properties, ++ struct kbase_csf_mapping *csf_mapping) ++{ ++ struct tagged_addr *phys; ++ struct kbase_va_region *va_reg; ++ struct page **page_list; ++ void *cpu_addr; ++ int i, ret = 0; ++ pgprot_t cpu_map_prot = PAGE_KERNEL; ++ unsigned long gpu_map_prot; ++ ++ if (cpu_map_properties & PROT_READ) ++ cpu_map_prot = PAGE_KERNEL_RO; ++ ++ if (kbdev->system_coherency == COHERENCY_ACE) { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); ++ } else { ++ gpu_map_prot = ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ cpu_map_prot = pgprot_writecombine(cpu_map_prot); ++ }; ++ ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) ++ goto out; ++ ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); ++ if (!page_list) ++ goto page_list_alloc_error; ++ ++ ret = kbase_mem_pool_alloc_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false); ++ if (ret <= 0) ++ goto phys_mem_pool_alloc_error; ++ ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); ++ ++ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); ++ if (!cpu_addr) ++ goto vmap_error; ++ ++ va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, ++ num_pages, KBASE_REG_ZONE_MCU_SHARED); ++ if (!va_reg) ++ goto va_region_alloc_error; ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); ++ va_reg->flags &= ~KBASE_REG_FREE; ++ if (ret) ++ goto va_region_add_error; ++ mutex_unlock(&kbdev->csf.reg_lock); ++ ++ gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); ++ gpu_map_properties |= gpu_map_prot; ++ ++ ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, ++ va_reg->start_pfn, &phys[0], num_pages, ++ gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); ++ if (ret) ++ goto mmu_insert_pages_error; ++ ++ kfree(page_list); ++ csf_mapping->phys = phys; ++ csf_mapping->cpu_addr = cpu_addr; ++ csf_mapping->va_reg = va_reg; ++ csf_mapping->num_pages = num_pages; ++ ++ return 0; ++ ++mmu_insert_pages_error: ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(va_reg); ++va_region_add_error: ++ kbase_free_alloced_region(va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++va_region_alloc_error: ++ vunmap(cpu_addr); ++vmap_error: ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ num_pages, phys, false, false); ++ ++phys_mem_pool_alloc_error: ++ kfree(page_list); ++page_list_alloc_error: ++ kfree(phys); ++out: ++ /* Zero-initialize the mapping to make sure that the termination ++ * function doesn't try to unmap or free random addresses. ++ */ ++ csf_mapping->phys = NULL; ++ csf_mapping->cpu_addr = NULL; ++ csf_mapping->va_reg = NULL; ++ csf_mapping->num_pages = 0; ++ ++ return -ENOMEM; ++} ++ ++void kbase_csf_firmware_mcu_shared_mapping_term( ++ struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) ++{ ++ if (csf_mapping->va_reg) { ++ mutex_lock(&kbdev->csf.reg_lock); ++ kbase_remove_va_region(csf_mapping->va_reg); ++ kbase_free_alloced_region(csf_mapping->va_reg); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ } ++ ++ if (csf_mapping->phys) { ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ csf_mapping->num_pages, csf_mapping->phys, false, ++ false); ++ } ++ ++ vunmap(csf_mapping->cpu_addr); ++ kfree(csf_mapping->phys); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c +new file mode 100644 +index 0000000..96746c6 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c +@@ -0,0 +1,195 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include "mali_kbase_csf_heap_context_alloc.h" ++ ++/* Size of one heap context structure, in bytes. */ ++#define HEAP_CTX_SIZE ((size_t)32) ++ ++/* Total size of the GPU memory region allocated for heap contexts, in bytes. */ ++#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE) ++ ++/** ++ * sub_alloc - Sub-allocate a heap context from a GPU memory region ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * ++ * Return: GPU virtual address of the allocated heap context or 0 on failure. ++ */ ++static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) ++{ ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ int heap_nr = 0; ++ size_t ctx_offset = 0; ++ u64 heap_gpu_va = 0; ++ struct kbase_vmap_struct mapping; ++ void *ctx_ptr = NULL; ++ ++ lockdep_assert_held(&ctx_alloc->lock); ++ ++ heap_nr = find_first_zero_bit(ctx_alloc->in_use, ++ MAX_TILER_HEAPS); ++ ++ if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { ++ dev_err(kctx->kbdev->dev, ++ "No free tiler heap contexts in the pool\n"); ++ return 0; ++ } ++ ++ ctx_offset = heap_nr * HEAP_CTX_SIZE; ++ heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; ++ ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, ++ HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping); ++ ++ if (unlikely(!ctx_ptr)) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to map tiler heap context %d (0x%llX)\n", ++ heap_nr, heap_gpu_va); ++ return 0; ++ } ++ ++ memset(ctx_ptr, 0, HEAP_CTX_SIZE); ++ kbase_vunmap(ctx_ptr, &mapping); ++ ++ bitmap_set(ctx_alloc->in_use, heap_nr, 1); ++ ++ dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n", ++ heap_nr, heap_gpu_va); ++ ++ return heap_gpu_va; ++} ++ ++/** ++ * sub_free - Free a heap context sub-allocated from a GPU memory region ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * @heap_gpu_va: The GPU virtual address of a heap context structure to free. ++ */ ++static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va) ++{ ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ u64 ctx_offset = 0; ++ unsigned int heap_nr = 0; ++ ++ lockdep_assert_held(&ctx_alloc->lock); ++ ++ if (WARN_ON(!ctx_alloc->region)) ++ return; ++ ++ if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) ++ return; ++ ++ ctx_offset = heap_gpu_va - ctx_alloc->gpu_va; ++ ++ if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) || ++ WARN_ON(ctx_offset % HEAP_CTX_SIZE)) ++ return; ++ ++ heap_nr = ctx_offset / HEAP_CTX_SIZE; ++ dev_dbg(kctx->kbdev->dev, ++ "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); ++ ++ bitmap_clear(ctx_alloc->in_use, heap_nr, 1); ++} ++ ++int kbase_csf_heap_context_allocator_init( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ struct kbase_context *const kctx) ++{ ++ /* We cannot pre-allocate GPU memory here because the ++ * custom VA zone may not have been created yet. ++ */ ++ ctx_alloc->kctx = kctx; ++ ctx_alloc->region = NULL; ++ ctx_alloc->gpu_va = 0; ++ ++ mutex_init(&ctx_alloc->lock); ++ bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Initialized a tiler heap context allocator\n"); ++ ++ return 0; ++} ++ ++void kbase_csf_heap_context_allocator_term( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc) ++{ ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Terminating tiler heap context allocator\n"); ++ ++ if (ctx_alloc->region) { ++ kbase_gpu_vm_lock(kctx); ++ ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; ++ kbase_mem_free_region(kctx, ctx_alloc->region); ++ kbase_gpu_vm_unlock(kctx); ++ } ++ ++ mutex_destroy(&ctx_alloc->lock); ++} ++ ++u64 kbase_csf_heap_context_allocator_alloc( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc) ++{ ++ struct kbase_context *const kctx = ctx_alloc->kctx; ++ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | ++ BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE; ++ u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); ++ u64 heap_gpu_va = 0; ++ ++#ifdef CONFIG_MALI_VECTOR_DUMP ++ flags |= BASE_MEM_PROT_CPU_RD; ++#endif ++ ++ mutex_lock(&ctx_alloc->lock); ++ ++ /* If the pool of heap contexts wasn't already allocated then ++ * allocate it. ++ */ ++ if (!ctx_alloc->region) { ++ ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, ++ 0, &flags, &ctx_alloc->gpu_va); ++ } ++ ++ /* If the pool still isn't allocated then an error occurred. */ ++ if (unlikely(!ctx_alloc->region)) { ++ dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n"); ++ } else { ++ heap_gpu_va = sub_alloc(ctx_alloc); ++ } ++ ++ mutex_unlock(&ctx_alloc->lock); ++ ++ return heap_gpu_va; ++} ++ ++void kbase_csf_heap_context_allocator_free( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va) ++{ ++ mutex_lock(&ctx_alloc->lock); ++ sub_free(ctx_alloc, heap_gpu_va); ++ mutex_unlock(&ctx_alloc->lock); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h +new file mode 100644 +index 0000000..993db63 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++ ++#ifndef _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ ++#define _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ ++ ++/** ++ * kbase_csf_heap_context_allocator_init - Initialize an allocator for heap ++ * contexts ++ * @ctx_alloc: Pointer to the heap context allocator to initialize. ++ * @kctx: Pointer to the kbase context. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_heap_context_allocator_init( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ struct kbase_context *const kctx); ++ ++/** ++ * kbase_csf_heap_context_allocator_term - Terminate an allocator for heap ++ * contexts ++ * @ctx_alloc: Pointer to the heap context allocator to terminate. ++ */ ++void kbase_csf_heap_context_allocator_term( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc); ++ ++/** ++ * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure ++ * ++ * If this function is successful then it returns the address of a ++ * zero-initialized heap context structure for use by the firmware. ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * ++ * Return: GPU virtual address of the allocated heap context or 0 on failure. ++ */ ++u64 kbase_csf_heap_context_allocator_alloc( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc); ++ ++/** ++ * kbase_csf_heap_context_allocator_free - Free a heap context structure ++ * ++ * This function returns a heap context structure to the free pool of unused ++ * contexts for possible reuse by a future call to ++ * @kbase_csf_heap_context_allocator_alloc. ++ * ++ * @ctx_alloc: Pointer to the heap context allocator. ++ * @heap_gpu_va: The GPU virtual address of a heap context structure that ++ * was allocated for the firmware. ++ */ ++void kbase_csf_heap_context_allocator_free( ++ struct kbase_csf_heap_context_allocator *const ctx_alloc, ++ u64 const heap_gpu_va); ++ ++#endif /* _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c +new file mode 100644 +index 0000000..4e26a49 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c +@@ -0,0 +1,2258 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include "device/mali_kbase_device.h" ++#include "mali_kbase_csf.h" ++#include ++ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include "mali_kbase_fence.h" ++#include "mali_kbase_sync.h" ++ ++static DEFINE_SPINLOCK(kbase_csf_fence_lock); ++#endif ++ ++static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, ++ bool ignore_waits); ++ ++static void kcpu_queue_process_worker(struct work_struct *data); ++ ++static int kbase_kcpu_map_import_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_va_region *reg; ++ int ret = 0; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ /* Take the processes mmap lock */ ++ down_read(kbase_mem_get_process_mmap_lock()); ++ kbase_gpu_vm_lock(kctx); ++ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ import_info->handle); ++ ++ if (kbase_is_region_invalid_or_free(reg) || ++ !kbase_mem_is_imported(reg->gpu_alloc->type)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ /* Pin the physical pages backing the user buffer while ++ * we are in the process context and holding the mmap lock. ++ * The dma mapping & GPU mapping of the pages would be done ++ * when the MAP_IMPORT operation is executed. ++ * ++ * Though the pages would be pinned, no reference is taken ++ * on the physical pages tracking object. When the last ++ * reference to the tracking object is dropped the pages ++ * would be unpinned if they weren't unpinned before. ++ */ ++ ret = kbase_jd_user_buf_pin_pages(kctx, reg); ++ if (ret) ++ goto out; ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT; ++ current_command->info.import.gpu_va = import_info->handle; ++ ++out: ++ kbase_gpu_vm_unlock(kctx); ++ /* Release the processes mmap lock */ ++ up_read(kbase_mem_get_process_mmap_lock()); ++ ++ return ret; ++} ++ ++static int kbase_kcpu_unmap_import_prepare_internal( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command, ++ enum base_kcpu_command_type type) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_va_region *reg; ++ int ret = 0; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ kbase_gpu_vm_lock(kctx); ++ ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ import_info->handle); ++ ++ if (kbase_is_region_invalid_or_free(reg) || ++ !kbase_mem_is_imported(reg->gpu_alloc->type)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { ++ /* The pages should have been pinned when MAP_IMPORT ++ * was enqueued previously. ++ */ ++ if (reg->gpu_alloc->nents != ++ reg->gpu_alloc->imported.user_buf.nr_pages) { ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ ++ current_command->type = type; ++ current_command->info.import.gpu_va = import_info->handle; ++ ++out: ++ kbase_gpu_vm_unlock(kctx); ++ ++ return ret; ++} ++ ++static int kbase_kcpu_unmap_import_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, ++ import_info, current_command, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT); ++} ++ ++static int kbase_kcpu_unmap_import_force_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_import_info *import_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, ++ import_info, current_command, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE); ++} ++ ++/** ++ * kbase_jit_add_to_pending_alloc_list() - Pend JIT allocation ++ * ++ * @queue: The queue containing this JIT allocation ++ * @cmd: The JIT allocation that is blocking this queue ++ */ ++static void kbase_jit_add_to_pending_alloc_list( ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *cmd) ++{ ++ struct kbase_context *const kctx = queue->kctx; ++ struct list_head *target_list_head = ++ &kctx->csf.kcpu_queues.jit_blocked_queues; ++ struct kbase_kcpu_command_queue *blocked_queue; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ list_for_each_entry(blocked_queue, ++ &kctx->csf.kcpu_queues.jit_blocked_queues, ++ jit_blocked) { ++ struct kbase_kcpu_command const*const jit_alloc_cmd = ++ &blocked_queue->commands[blocked_queue->start_offset]; ++ ++ WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC); ++ if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) { ++ target_list_head = &blocked_queue->jit_blocked; ++ break; ++ } ++ } ++ ++ list_add_tail(&queue->jit_blocked, target_list_head); ++} ++ ++/** ++ * kbase_kcpu_jit_allocate_process() - Process JIT allocation ++ * ++ * @queue: The queue containing this JIT allocation ++ * @cmd: The JIT allocation command ++ */ ++static int kbase_kcpu_jit_allocate_process( ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *cmd) ++{ ++ struct kbase_context *const kctx = queue->kctx; ++ struct kbase_kcpu_command_jit_alloc_info *alloc_info = ++ &cmd->info.jit_alloc; ++ struct base_jit_alloc_info *info = alloc_info->info; ++ struct kbase_vmap_struct mapping; ++ struct kbase_va_region *reg; ++ u32 count = alloc_info->count; ++ u64 *ptr, new_addr; ++ u32 i; ++ int ret; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (alloc_info->blocked) { ++ list_del(&queue->jit_blocked); ++ alloc_info->blocked = false; ++ } ++ ++ if (WARN_ON(!info)) ++ return -EINVAL; ++ ++ /* Check if all JIT IDs are not in use */ ++ for (i = 0; i < count; i++, info++) { ++ /* The JIT ID is still in use so fail the allocation */ ++ if (kctx->jit_alloc[info->id]) { ++ dev_warn(kctx->kbdev->dev, "JIT ID still in use\n"); ++ return -EINVAL; ++ } ++ } ++ ++ /* Now start the allocation loop */ ++ for (i = 0, info = alloc_info->info; i < count; i++, info++) { ++ /* Create a JIT allocation */ ++ reg = kbase_jit_allocate(kctx, info, true); ++ if (!reg) { ++ bool can_block = false; ++ struct kbase_kcpu_command const *jit_cmd; ++ ++ list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) { ++ if (jit_cmd == cmd) ++ break; ++ ++ if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) { ++ u8 const*const free_ids = jit_cmd->info.jit_free.ids; ++ ++ if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) { ++ /* ++ * A JIT free which is active ++ * and submitted before this ++ * command. ++ */ ++ can_block = true; ++ break; ++ } ++ } ++ } ++ ++ if (!can_block) { ++ /* ++ * No prior JIT_FREE command is active. Roll ++ * back previous allocations and fail. ++ */ ++ dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ /* There are pending frees for an active allocation ++ * so we should wait to see whether they free the ++ * memory. Add to the list of atoms for which JIT ++ * allocation is pending. ++ */ ++ kbase_jit_add_to_pending_alloc_list(queue, cmd); ++ alloc_info->blocked = true; ++ ++ /* Rollback, the whole set will be re-attempted */ ++ while (i-- > 0) { ++ info--; ++ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); ++ kctx->jit_alloc[info->id] = NULL; ++ } ++ ++ return -EAGAIN; ++ } ++ ++ /* Bind it to the user provided ID. */ ++ kctx->jit_alloc[info->id] = reg; ++ } ++ ++ for (i = 0, info = alloc_info->info; i < count; i++, info++) { ++ /* ++ * Write the address of the JIT allocation to the user provided ++ * GPU allocation. ++ */ ++ ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), ++ &mapping); ++ if (!ptr) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ reg = kctx->jit_alloc[info->id]; ++ new_addr = reg->start_pfn << PAGE_SHIFT; ++ *ptr = new_addr; ++ kbase_vunmap(kctx, &mapping); ++ } ++ ++ return 0; ++ ++fail: ++ /* Roll back completely */ ++ for (i = 0, info = alloc_info->info; i < count; i++, info++) { ++ /* Free the allocations that were successful. ++ * Mark all the allocations including the failed one and the ++ * other un-attempted allocations in the set, so we know they ++ * are in use. ++ */ ++ if (kctx->jit_alloc[info->id]) ++ kbase_jit_free(kctx, kctx->jit_alloc[info->id]); ++ ++ kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; ++ } ++ ++ return ret; ++} ++ ++static int kbase_kcpu_jit_allocate_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_jit_alloc_info *alloc_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ void __user *data = u64_to_user_ptr(alloc_info->info); ++ struct base_jit_alloc_info *info; ++ u32 count = alloc_info->count; ++ int ret = 0; ++ u32 i; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (!data || count > kcpu_queue->kctx->jit_max_allocations || ++ count > ARRAY_SIZE(kctx->jit_alloc)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); ++ if (!info) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (copy_from_user(info, data, sizeof(*info) * count) != 0) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ ++ for (i = 0; i < count; i++) { ++ ret = kbasep_jit_alloc_validate(kctx, &info[i]); ++ if (ret) ++ goto out_free; ++ } ++ ++ /* Search for duplicate JIT ids */ ++ for (i = 0; i < (count - 1); i++) { ++ u32 j; ++ ++ for (j = (i + 1); j < count; j++) { ++ if (info[i].id == info[j].id) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ } ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; ++ list_add_tail(¤t_command->info.jit_alloc.node, ++ &kctx->csf.kcpu_queues.jit_cmds_head); ++ current_command->info.jit_alloc.info = info; ++ current_command->info.jit_alloc.count = count; ++ current_command->info.jit_alloc.blocked = false; ++ ++ return 0; ++out_free: ++ kfree(info); ++out: ++ return ret; ++} ++ ++/** ++ * kbase_kcpu_jit_allocate_finish() - Finish handling the JIT_ALLOC command ++ * ++ * @queue: The queue containing this JIT allocation ++ * @cmd: The JIT allocation command ++ */ ++static void kbase_kcpu_jit_allocate_finish( ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *cmd) ++{ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ /* Remove this command from the jit_cmds_head list */ ++ list_del(&cmd->info.jit_alloc.node); ++ ++ /* ++ * If we get to this point we must have already cleared the blocked ++ * flag, otherwise it'd be a bug. ++ */ ++ if (WARN_ON(cmd->info.jit_alloc.blocked)) { ++ list_del(&queue->jit_blocked); ++ cmd->info.jit_alloc.blocked = false; ++ } ++ ++ kfree(cmd->info.jit_alloc.info); ++} ++ ++/** ++ * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands ++ * ++ * @kctx: The context containing the blocked JIT_ALLOC commands ++ */ ++static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) ++{ ++ struct kbase_kcpu_command_queue *blocked_queue; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ /* ++ * Reschedule all queues blocked by JIT_ALLOC commands. ++ * NOTE: This code traverses the list of blocked queues directly. It ++ * only works as long as the queued works are not executed at the same ++ * time. This precondition is true since we're holding the ++ * kbase_csf_kcpu_queue_context.lock . ++ */ ++ list_for_each_entry(blocked_queue, ++ &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) ++ queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work); ++} ++ ++static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command *const cmd) ++{ ++ struct kbase_kcpu_command_jit_free_info const *const free_info = ++ &cmd->info.jit_free; ++ u8 const *const ids = free_info->ids; ++ u32 const count = free_info->count; ++ u32 i; ++ int rc = 0; ++ struct kbase_context *kctx = queue->kctx; ++ ++ if (WARN_ON(!ids)) ++ return -EINVAL; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ queue->kctx->kbdev, queue); ++ ++ for (i = 0; i < count; i++) { ++ u64 pages_used = 0; ++ int item_err = 0; ++ ++ if (!kctx->jit_alloc[ids[i]]) { ++ dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n"); ++ rc = -EINVAL; ++ item_err = rc; ++ } else { ++ struct kbase_va_region *const reg = kctx->jit_alloc[ids[i]]; ++ ++ /* ++ * If the ID is valid but the allocation request failed, still ++ * succeed this command but don't try and free the allocation. ++ */ ++ if (reg != KBASE_RESERVED_REG_JIT_ALLOC) { ++ pages_used = reg->gpu_alloc->nents; ++ kbase_jit_free(kctx, reg); ++ } ++ ++ kctx->jit_alloc[ids[i]] = NULL; ++ } ++ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ queue->kctx->kbdev, queue, item_err, pages_used); ++ } ++ ++ /* Free the list of ids */ ++ kfree(ids); ++ ++ /* ++ * Remove this command from the jit_cmds_head list and retry pending ++ * allocations. ++ */ ++ list_del(&cmd->info.jit_free.node); ++ kbase_kcpu_jit_retry_pending_allocs(kctx); ++ ++ return rc; ++} ++ ++static int kbase_kcpu_jit_free_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_jit_free_info *free_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ void __user *data = u64_to_user_ptr(free_info->ids); ++ u8 *ids; ++ u32 count = free_info->count; ++ int ret; ++ u32 i; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ /* Sanity checks */ ++ if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* Copy the information for safe access and future storage */ ++ ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); ++ if (!ids) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (!data) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ ++ if (copy_from_user(ids, data, sizeof(*ids) * count)) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ ++ for (i = 0; i < count; i++) { ++ /* Fail the command if ID sent is zero */ ++ if (!ids[i]) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ } ++ ++ /* Search for duplicate JIT ids */ ++ for (i = 0; i < (count - 1); i++) { ++ u32 j; ++ ++ for (j = (i + 1); j < count; j++) { ++ if (ids[i] == ids[j]) { ++ ret = -EINVAL; ++ goto out_free; ++ } ++ } ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; ++ list_add_tail(¤t_command->info.jit_free.node, ++ &kctx->csf.kcpu_queues.jit_cmds_head); ++ current_command->info.jit_free.ids = ids; ++ current_command->info.jit_free.count = count; ++ ++ return 0; ++out_free: ++ kfree(ids); ++out: ++ return ret; ++} ++ ++static int kbase_csf_queue_group_suspend_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_group_suspend_info *suspend_buf, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct kbase_suspend_copy_buffer *sus_buf = NULL; ++ u64 addr = suspend_buf->buffer; ++ u64 page_addr = addr & PAGE_MASK; ++ u64 end_addr = addr + suspend_buf->size - 1; ++ u64 last_page_addr = end_addr & PAGE_MASK; ++ int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; ++ int pinned_pages = 0, ret = 0; ++ struct kbase_va_region *reg; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (suspend_buf->size < ++ kctx->kbdev->csf.global_iface.groups[0].suspend_size) ++ return -EINVAL; ++ ++ ret = kbase_csf_queue_group_handle_is_valid(kctx, ++ suspend_buf->group_handle); ++ if (ret) ++ return ret; ++ ++ sus_buf = kzalloc(sizeof(*sus_buf), GFP_KERNEL); ++ if (!sus_buf) ++ return -ENOMEM; ++ ++ sus_buf->size = suspend_buf->size; ++ sus_buf->nr_pages = nr_pages; ++ sus_buf->offset = addr & ~PAGE_MASK; ++ ++ sus_buf->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); ++ if (!sus_buf->pages) { ++ ret = -ENOMEM; ++ goto out_clean_sus_buf; ++ } ++ ++ /* Check if the page_addr is a valid GPU VA from SAME_VA zone, ++ * otherwise consider it is a CPU VA corresponding to the Host ++ * memory allocated by userspace. ++ */ ++ kbase_gpu_vm_lock(kctx); ++ reg = kbase_region_tracker_find_region_enclosing_address(kctx, ++ page_addr); ++ ++ if (kbase_is_region_invalid_or_free(reg)) { ++ kbase_gpu_vm_unlock(kctx); ++ pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, ++ sus_buf->pages); ++ kbase_gpu_vm_lock(kctx); ++ ++ if (pinned_pages < 0) { ++ ret = pinned_pages; ++ goto out_clean_pages; ++ } ++ if (pinned_pages != nr_pages) { ++ ret = -EINVAL; ++ goto out_clean_pages; ++ } ++ } else { ++ struct tagged_addr *page_array; ++ u64 start, end, i; ++ ++ if (!(reg->flags & BASE_MEM_SAME_VA) || ++ reg->nr_pages < nr_pages || ++ kbase_reg_current_backed_size(reg) != ++ reg->nr_pages) { ++ ret = -EINVAL; ++ goto out_clean_pages; ++ } ++ ++ start = PFN_DOWN(page_addr) - reg->start_pfn; ++ end = start + nr_pages; ++ ++ if (end > reg->nr_pages) { ++ ret = -EINVAL; ++ goto out_clean_pages; ++ } ++ ++ sus_buf->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); ++ kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); ++ page_array = kbase_get_cpu_phy_pages(reg); ++ page_array += start; ++ ++ for (i = 0; i < nr_pages; i++, page_array++) ++ sus_buf->pages[i] = as_page(*page_array); ++ } ++ ++ kbase_gpu_vm_unlock(kctx); ++ current_command->type = BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND; ++ current_command->info.suspend_buf_copy.sus_buf = sus_buf; ++ current_command->info.suspend_buf_copy.group_handle = ++ suspend_buf->group_handle; ++ return ret; ++ ++out_clean_pages: ++ kbase_gpu_vm_unlock(kctx); ++ kfree(sus_buf->pages); ++out_clean_sus_buf: ++ kfree(sus_buf); ++ ++ return ret; ++} ++ ++static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, ++ struct kbase_suspend_copy_buffer *sus_buf, ++ u8 group_handle) ++{ ++ return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); ++} ++ ++static enum kbase_csf_event_callback_action event_cqs_callback(void *param) ++{ ++ struct kbase_kcpu_command_queue *kcpu_queue = ++ (struct kbase_kcpu_command_queue *)param; ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ ++ queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); ++ ++ return KBASE_CSF_EVENT_CALLBACK_KEEP; ++} ++ ++static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_info *cqs_wait) ++{ ++ WARN_ON(!cqs_wait->nr_objs); ++ WARN_ON(!cqs_wait->objs); ++ WARN_ON(!cqs_wait->signaled); ++ WARN_ON(!queue->cqs_wait_count); ++ ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); ++ } ++ ++ kfree(cqs_wait->signaled); ++ kfree(cqs_wait->objs); ++ cqs_wait->signaled = NULL; ++ cqs_wait->objs = NULL; ++} ++ ++static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_info *cqs_wait) ++{ ++ u32 i; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ if (WARN_ON(!cqs_wait->objs)) ++ return -EINVAL; ++ ++ /* Skip the CQS waits that have already been signaled when processing */ ++ for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) { ++ if (!test_bit(i, cqs_wait->signaled)) { ++ struct kbase_vmap_struct *mapping; ++ bool sig_set; ++ u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx, ++ cqs_wait->objs[i].addr, &mapping); ++ ++ if (!queue->command_started) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( ++ kbdev, queue); ++ queue->command_started = true; ++ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START, ++ queue, cqs_wait->nr_objs, 0); ++ } ++ ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_wait->objs[i].addr); ++ queue->has_error = true; ++ return -EINVAL; ++ } ++ ++ sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val; ++ if (sig_set) { ++ bool error = false; ++ ++ bitmap_set(cqs_wait->signaled, i, 1); ++ if ((cqs_wait->inherit_err_flags & (1U << i)) && ++ evt[BASEP_EVENT_ERR_INDEX] > 0) { ++ queue->has_error = true; ++ error = true; ++ } ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END, ++ queue, cqs_wait->objs[i].addr, ++ error); ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( ++ kbdev, queue, ++ evt[BASEP_EVENT_ERR_INDEX]); ++ queue->command_started = false; ++ } ++ ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ ++ if (!sig_set) ++ break; ++ } ++ } ++ ++ /* For the queue to progress further, all cqs objects should get ++ * signaled. ++ */ ++ return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); ++} ++ ++static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, ++ struct base_kcpu_command_cqs_wait_info *cqs_wait_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct base_cqs_wait_info *objs; ++ unsigned int nr_objs = cqs_wait_info->nr_objs; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; ++ ++ if (!nr_objs) ++ return -EINVAL; ++ ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; ++ ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ if (++queue->cqs_wait_count == 1) { ++ if (kbase_csf_event_wait_add(queue->kctx, ++ event_cqs_callback, queue)) { ++ kfree(objs); ++ queue->cqs_wait_count--; ++ return -ENOMEM; ++ } ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT; ++ current_command->info.cqs_wait.nr_objs = nr_objs; ++ current_command->info.cqs_wait.objs = objs; ++ current_command->info.cqs_wait.inherit_err_flags = ++ cqs_wait_info->inherit_err_flags; ++ ++ current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs), ++ sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL); ++ if (!current_command->info.cqs_wait.signaled) { ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); ++ } ++ ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_set_info *cqs_set) ++{ ++ unsigned int i; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ if (WARN_ON(!cqs_set->objs)) ++ return; ++ ++ for (i = 0; i < cqs_set->nr_objs; i++) { ++ struct kbase_vmap_struct *mapping; ++ u32 *evt; ++ ++ evt = (u32 *)kbase_phy_alloc_mapping_get( ++ queue->kctx, cqs_set->objs[i].addr, &mapping); ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, ++ evt ? 0 : 1); ++ ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_set->objs[i].addr); ++ queue->has_error = true; ++ } else { ++ evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; ++ /* Set to signaled */ ++ evt[BASEP_EVENT_VAL_INDEX]++; ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET, ++ queue, cqs_set->objs[i].addr, ++ evt[BASEP_EVENT_ERR_INDEX]); ++ } ++ } ++ ++ kbase_csf_event_signal_notify_gpu(queue->kctx); ++ ++ kfree(cqs_set->objs); ++ cqs_set->objs = NULL; ++} ++ ++static int kbase_kcpu_cqs_set_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_cqs_set_info *cqs_set_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct base_cqs_set *objs; ++ unsigned int nr_objs = cqs_set_info->nr_objs; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; ++ ++ if (!nr_objs) ++ return -EINVAL; ++ ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; ++ ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; ++ current_command->info.cqs_set.nr_objs = nr_objs; ++ current_command->info.cqs_set.objs = objs; ++ ++ return 0; ++} ++ ++static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) ++{ ++ WARN_ON(!cqs_wait_operation->nr_objs); ++ WARN_ON(!cqs_wait_operation->objs); ++ WARN_ON(!cqs_wait_operation->signaled); ++ WARN_ON(!queue->cqs_wait_count); ++ ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); ++ } ++ ++ kfree(cqs_wait_operation->signaled); ++ kfree(cqs_wait_operation->objs); ++ cqs_wait_operation->signaled = NULL; ++ cqs_wait_operation->objs = NULL; ++} ++ ++static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) ++{ ++ u32 i; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ if (WARN_ON(!cqs_wait_operation->objs)) ++ return -EINVAL; ++ ++ /* Skip the CQS waits that have already been signaled when processing */ ++ for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { ++ if (!test_bit(i, cqs_wait_operation->signaled)) { ++ struct kbase_vmap_struct *mapping; ++ bool sig_set; ++ u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, ++ cqs_wait_operation->objs[i].addr, &mapping); ++ ++ /* GPUCORE-28172 RDT to review */ ++ if (!queue->command_started) ++ queue->command_started = true; ++ ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); ++ queue->has_error = true; ++ return -EINVAL; ++ } ++ ++ switch (cqs_wait_operation->objs[i].operation) { ++ case BASEP_CQS_WAIT_OPERATION_LE: ++ sig_set = *evt <= cqs_wait_operation->objs[i].val; ++ break; ++ case BASEP_CQS_WAIT_OPERATION_GT: ++ sig_set = *evt > cqs_wait_operation->objs[i].val; ++ break; ++ default: ++ dev_warn(kbdev->dev, ++ "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); ++ ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ queue->has_error = true; ++ ++ return -EINVAL; ++ } ++ ++ /* Increment evt up to the error_state value depending on the CQS data type */ ++ switch (cqs_wait_operation->objs[i].data_type) { ++ default: ++ dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); ++ /* Fallthrough - hint to compiler that there's really only 2 options at present */ ++ case BASEP_CQS_DATA_TYPE_U32: ++ evt = (u64 *)((u8 *)evt + sizeof(u32)); ++ break; ++ case BASEP_CQS_DATA_TYPE_U64: ++ evt = (u64 *)((u8 *)evt + sizeof(u64)); ++ break; ++ } ++ ++ if (sig_set) { ++ bitmap_set(cqs_wait_operation->signaled, i, 1); ++ if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && ++ *evt > 0) { ++ queue->has_error = true; ++ } ++ ++ /* GPUCORE-28172 RDT to review */ ++ ++ queue->command_started = false; ++ } ++ ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ ++ if (!sig_set) ++ break; ++ } ++ } ++ ++ /* For the queue to progress further, all cqs objects should get ++ * signaled. ++ */ ++ return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); ++} ++ ++static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, ++ struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct base_cqs_wait_operation_info *objs; ++ unsigned int nr_objs = cqs_wait_operation_info->nr_objs; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; ++ ++ if (!nr_objs) ++ return -EINVAL; ++ ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; ++ ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ if (++queue->cqs_wait_count == 1) { ++ if (kbase_csf_event_wait_add(queue->kctx, ++ event_cqs_callback, queue)) { ++ kfree(objs); ++ queue->cqs_wait_count--; ++ return -ENOMEM; ++ } ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION; ++ current_command->info.cqs_wait_operation.nr_objs = nr_objs; ++ current_command->info.cqs_wait_operation.objs = objs; ++ current_command->info.cqs_wait_operation.inherit_err_flags = ++ cqs_wait_operation_info->inherit_err_flags; ++ ++ current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), ++ sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); ++ if (!current_command->info.cqs_wait_operation.signaled) { ++ if (--queue->cqs_wait_count == 0) { ++ kbase_csf_event_wait_remove(queue->kctx, ++ event_cqs_callback, queue); ++ } ++ ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++static void kbase_kcpu_cqs_set_operation_process( ++ struct kbase_device *kbdev, ++ struct kbase_kcpu_command_queue *queue, ++ struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) ++{ ++ unsigned int i; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ if (WARN_ON(!cqs_set_operation->objs)) ++ return; ++ ++ for (i = 0; i < cqs_set_operation->nr_objs; i++) { ++ struct kbase_vmap_struct *mapping; ++ u64 *evt; ++ ++ evt = (u64 *)kbase_phy_alloc_mapping_get( ++ queue->kctx, cqs_set_operation->objs[i].addr, &mapping); ++ ++ /* GPUCORE-28172 RDT to review */ ++ ++ if (!evt) { ++ dev_warn(kbdev->dev, ++ "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); ++ queue->has_error = true; ++ } else { ++ switch (cqs_set_operation->objs[i].operation) { ++ case BASEP_CQS_SET_OPERATION_ADD: ++ *evt += cqs_set_operation->objs[i].val; ++ break; ++ case BASEP_CQS_SET_OPERATION_SET: ++ *evt = cqs_set_operation->objs[i].val; ++ break; ++ default: ++ dev_warn(kbdev->dev, ++ "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); ++ queue->has_error = true; ++ break; ++ } ++ ++ /* Increment evt up to the error_state value depending on the CQS data type */ ++ switch (cqs_set_operation->objs[i].data_type) { ++ default: ++ dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); ++ /* Fallthrough - hint to compiler that there's really only 2 options at present */ ++ case BASEP_CQS_DATA_TYPE_U32: ++ evt = (u64 *)((u8 *)evt + sizeof(u32)); ++ break; ++ case BASEP_CQS_DATA_TYPE_U64: ++ evt = (u64 *)((u8 *)evt + sizeof(u64)); ++ break; ++ } ++ ++ /* GPUCORE-28172 RDT to review */ ++ ++ /* Always propagate errors */ ++ *evt = queue->has_error; ++ ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++ } ++ } ++ ++ kbase_csf_event_signal_notify_gpu(queue->kctx); ++ ++ kfree(cqs_set_operation->objs); ++ cqs_set_operation->objs = NULL; ++} ++ ++static int kbase_kcpu_cqs_set_operation_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ struct base_cqs_set_operation_info *objs; ++ unsigned int nr_objs = cqs_set_operation_info->nr_objs; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) ++ return -EINVAL; ++ ++ if (!nr_objs) ++ return -EINVAL; ++ ++ objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); ++ if (!objs) ++ return -ENOMEM; ++ ++ if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), ++ nr_objs * sizeof(*objs))) { ++ kfree(objs); ++ return -ENOMEM; ++ } ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; ++ current_command->info.cqs_set_operation.nr_objs = nr_objs; ++ current_command->info.cqs_set_operation.objs = objs; ++ ++ return 0; ++} ++ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++static void kbase_csf_fence_wait_callback(struct fence *fence, ++ struct fence_cb *cb) ++#else ++static void kbase_csf_fence_wait_callback(struct dma_fence *fence, ++ struct dma_fence_cb *cb) ++#endif ++{ ++ struct kbase_kcpu_command_fence_info *fence_info = container_of(cb, ++ struct kbase_kcpu_command_fence_info, fence_cb); ++ struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue, ++ fence->context, fence->seqno); ++ ++ /* Resume kcpu command queue processing. */ ++ queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); ++} ++ ++static void kbase_kcpu_fence_wait_cancel( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (WARN_ON(!fence_info->fence)) ++ return; ++ ++ if (kcpu_queue->fence_wait_processed) { ++ bool removed = dma_fence_remove_callback(fence_info->fence, ++ &fence_info->fence_cb); ++ ++ if (removed) ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, ++ kcpu_queue, fence_info->fence->context, ++ fence_info->fence->seqno); ++ } ++ ++ /* Release the reference which is kept by the kcpu_queue */ ++ kbase_fence_put(fence_info->fence); ++ kcpu_queue->fence_wait_processed = false; ++ ++ fence_info->fence = NULL; ++} ++ ++/** ++ * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command ++ * ++ * @kcpu_queue: The queue containing the fence wait command ++ * @fence_info: Reference to a fence for which the command is waiting ++ * ++ * Return: 0 if fence wait is blocked, 1 if it is unblocked, negative error if ++ * an error has occurred and fence should no longer be waited on. ++ */ ++static int kbase_kcpu_fence_wait_process( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) ++{ ++ int fence_status = 0; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence; ++#else ++ struct dma_fence *fence; ++#endif ++ ++ lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock); ++ ++ if (WARN_ON(!fence_info->fence)) ++ return -EINVAL; ++ ++ fence = fence_info->fence; ++ ++ if (kcpu_queue->fence_wait_processed) { ++ fence_status = dma_fence_get_status(fence); ++ } else { ++ int cb_err = dma_fence_add_callback(fence, ++ &fence_info->fence_cb, ++ kbase_csf_fence_wait_callback); ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev, ++ FENCE_WAIT_START, kcpu_queue, ++ fence->context, fence->seqno); ++ fence_status = cb_err; ++ if (cb_err == 0) ++ kcpu_queue->fence_wait_processed = true; ++ else if (cb_err == -ENOENT) ++ fence_status = dma_fence_get_status(fence); ++ } ++ ++ /* ++ * At this point fence status can contain 3 types of values: ++ * - Value 0 to represent that fence in question is not signalled yet ++ * - Value 1 to represent that fence in question is signalled without ++ * errors ++ * - Negative error code to represent that some error has occurred such ++ * that waiting on it is no longer valid. ++ */ ++ ++ if (fence_status) ++ kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info); ++ ++ return fence_status; ++} ++ ++static int kbase_kcpu_fence_wait_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_fence_info *fence_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence_in; ++#else ++ struct dma_fence *fence_in; ++#endif ++ struct base_fence fence; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), ++ sizeof(fence))) ++ return -ENOMEM; ++ ++ fence_in = sync_file_get_fence(fence.basep.fd); ++ ++ if (!fence_in) ++ return -ENOENT; ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT; ++ current_command->info.fence.fence = fence_in; ++ current_command->info.fence.kcpu_queue = kcpu_queue; ++ ++ return 0; ++} ++ ++static int kbase_kcpu_fence_signal_process( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct kbase_kcpu_command_fence_info *fence_info) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++ int ret; ++ ++ if (WARN_ON(!fence_info->fence)) ++ return -EINVAL; ++ ++ ret = dma_fence_signal(fence_info->fence); ++ ++ if (unlikely(ret < 0)) { ++ dev_warn(kctx->kbdev->dev, ++ "fence_signal() failed with %d\n", ret); ++ } ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue, ++ fence_info->fence->context, ++ fence_info->fence->seqno); ++ ++ dma_fence_put(fence_info->fence); ++ fence_info->fence = NULL; ++ ++ return ret; ++} ++ ++static int kbase_kcpu_fence_signal_prepare( ++ struct kbase_kcpu_command_queue *kcpu_queue, ++ struct base_kcpu_command_fence_info *fence_info, ++ struct kbase_kcpu_command *current_command) ++{ ++ struct kbase_context *const kctx = kcpu_queue->kctx; ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence *fence_out; ++#else ++ struct dma_fence *fence_out; ++#endif ++ struct base_fence fence; ++ struct sync_file *sync_file; ++ int ret = 0; ++ int fd; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), ++ sizeof(fence))) ++ return -EFAULT; ++ ++ fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL); ++ if (!fence_out) ++ return -ENOMEM; ++ ++ dma_fence_init(fence_out, ++ &kbase_fence_ops, ++ &kbase_csf_fence_lock, ++ kcpu_queue->fence_context, ++ ++kcpu_queue->fence_seqno); ++ ++#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) ++ /* Take an extra reference to the fence on behalf of the sync file. ++ * This is only needded on older kernels where sync_file_create() ++ * does not take its own reference. This was changed in v4.9.68 ++ * where sync_file_create() now takes its own reference. ++ */ ++ dma_fence_get(fence_out); ++#endif ++ ++ /* create a sync_file fd representing the fence */ ++ sync_file = sync_file_create(fence_out); ++ if (!sync_file) { ++#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) ++ dma_fence_put(fence_out); ++#endif ++ ret = -ENOMEM; ++ goto file_create_fail; ++ } ++ ++ fd = get_unused_fd_flags(O_CLOEXEC); ++ if (fd < 0) { ++ ret = fd; ++ goto fd_flags_fail; ++ } ++ ++ fd_install(fd, sync_file->file); ++ ++ fence.basep.fd = fd; ++ ++ current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; ++ current_command->info.fence.fence = fence_out; ++ ++ if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, ++ sizeof(fence))) { ++ ret = -EFAULT; ++ goto fd_flags_fail; ++ } ++ ++ return 0; ++ ++fd_flags_fail: ++ fput(sync_file->file); ++file_create_fail: ++ dma_fence_put(fence_out); ++ ++ return ret; ++} ++#endif /* CONFIG_SYNC_FILE */ ++ ++static void kcpu_queue_process_worker(struct work_struct *data) ++{ ++ struct kbase_kcpu_command_queue *queue = container_of(data, ++ struct kbase_kcpu_command_queue, work); ++ ++ mutex_lock(&queue->kctx->csf.kcpu_queues.lock); ++ ++ kcpu_queue_process(queue, false); ++ ++ mutex_unlock(&queue->kctx->csf.kcpu_queues.lock); ++} ++ ++static int delete_queue(struct kbase_context *kctx, u32 id) ++{ ++ int err = 0; ++ ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ ++ if ((id < KBASEP_MAX_KCPU_QUEUES) && kctx->csf.kcpu_queues.array[id]) { ++ struct kbase_kcpu_command_queue *queue = ++ kctx->csf.kcpu_queues.array[id]; ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY, ++ queue, queue->num_pending_cmds, queue->cqs_wait_count); ++ ++ /* Drain the remaining work for this queue first and go past ++ * all the waits. ++ */ ++ kcpu_queue_process(queue, true); ++ ++ /* All commands should have been processed */ ++ WARN_ON(queue->num_pending_cmds); ++ ++ /* All CQS wait commands should have been cleaned up */ ++ WARN_ON(queue->cqs_wait_count); ++ ++ kctx->csf.kcpu_queues.array[id] = NULL; ++ bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); ++ ++ /* Fire the tracepoint with the mutex held to enforce correct ++ * ordering with the summary stream. ++ */ ++ KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); ++ ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ ++ cancel_work_sync(&queue->work); ++ ++ kfree(queue); ++ } else { ++ dev_warn(kctx->kbdev->dev, ++ "Attempt to delete a non-existent KCPU queue\n"); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ err = -EINVAL; ++ } ++ return err; ++} ++ ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( ++ struct kbase_device *kbdev, ++ const struct kbase_kcpu_command_queue *queue, ++ const struct kbase_kcpu_command_jit_alloc_info *jit_alloc, ++ int alloc_status) ++{ ++ u8 i; ++ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ kbdev, queue); ++ for (i = 0; i < jit_alloc->count; i++) { ++ const u8 id = jit_alloc->info[i].id; ++ const struct kbase_va_region *reg = queue->kctx->jit_alloc[id]; ++ u64 gpu_alloc_addr = 0; ++ u64 mmu_flags = 0; ++ ++ if ((alloc_status == 0) && !WARN_ON(!reg) && ++ !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) { ++#ifdef CONFIG_MALI_VECTOR_DUMP ++ struct tagged_addr phy = {0}; ++#endif /* CONFIG_MALI_VECTOR_DUMP */ ++ ++ gpu_alloc_addr = reg->start_pfn << PAGE_SHIFT; ++#ifdef CONFIG_MALI_VECTOR_DUMP ++ mmu_flags = kbase_mmu_create_ate(kbdev, ++ phy, reg->flags, ++ MIDGARD_MMU_BOTTOMLEVEL, ++ queue->kctx->jit_group_id); ++#endif /* CONFIG_MALI_VECTOR_DUMP */ ++ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ kbdev, queue, alloc_status, gpu_alloc_addr, mmu_flags); ++ } ++} ++ ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ struct kbase_device *kbdev, ++ const struct kbase_kcpu_command_queue *queue) ++{ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ kbdev, queue); ++} ++ ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ struct kbase_device *kbdev, ++ const struct kbase_kcpu_command_queue *queue) ++{ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ kbdev, queue); ++} ++ ++static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, ++ bool ignore_waits) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ bool process_next = true; ++ size_t i; ++ ++ lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); ++ ++ for (i = 0; i != queue->num_pending_cmds; ++i) { ++ struct kbase_kcpu_command *cmd = ++ &queue->commands[(u8)(queue->start_offset + i)]; ++ int status; ++ ++ switch (cmd->type) { ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ if (!queue->command_started) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( ++ kbdev, queue); ++ queue->command_started = true; ++ } ++ ++ status = 0; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ if (ignore_waits) { ++ kbase_kcpu_fence_wait_cancel(queue, ++ &cmd->info.fence); ++ } else { ++ status = kbase_kcpu_fence_wait_process(queue, ++ &cmd->info.fence); ++ ++ if (status == 0) ++ process_next = false; ++ else if (status < 0) ++ queue->has_error = true; ++ } ++#else ++ dev_warn(kbdev->dev, ++ "unexpected fence wait command found\n"); ++ ++ status = -EINVAL; ++ queue->has_error = true; ++#endif ++ ++ if (process_next) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( ++ kbdev, queue, status < 0 ? status : 0); ++ queue->command_started = false; ++ } ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( ++ kbdev, queue); ++ ++ status = 0; ++ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ status = kbase_kcpu_fence_signal_process( ++ queue, &cmd->info.fence); ++ ++ if (status < 0) ++ queue->has_error = true; ++#else ++ dev_warn(kbdev->dev, ++ "unexpected fence signal command found\n"); ++ ++ status = -EINVAL; ++ queue->has_error = true; ++#endif ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( ++ kbdev, queue, status); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ status = kbase_kcpu_cqs_wait_process(kbdev, queue, ++ &cmd->info.cqs_wait); ++ ++ if (!status && !ignore_waits) { ++ process_next = false; ++ } else { ++ /* Either all CQS objects were signaled or ++ * there was an error or the queue itself is ++ * being deleted. ++ * In all cases can move to the next command. ++ * TBD: handle the error ++ */ ++ cleanup_cqs_wait(queue, &cmd->info.cqs_wait); ++ } ++ ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ kbase_kcpu_cqs_set_process(kbdev, queue, ++ &cmd->info.cqs_set); ++ ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, ++ &cmd->info.cqs_wait_operation); ++ ++ if (!status && !ignore_waits) { ++ process_next = false; ++ } else { ++ /* Either all CQS objects were signaled or ++ * there was an error or the queue itself is ++ * being deleted. ++ * In all cases can move to the next command. ++ * TBD: handle the error ++ */ ++ cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); ++ } ++ ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ kbase_kcpu_cqs_set_operation_process(kbdev, queue, ++ &cmd->info.cqs_set_operation); ++ ++ break; ++ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: ++ /* Clear the queue's error state */ ++ queue->has_error = false; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( ++ kbdev, queue); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: { ++ struct kbase_ctx_ext_res_meta *meta = NULL; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( ++ kbdev, queue); ++ ++ kbase_gpu_vm_lock(queue->kctx); ++ meta = kbase_sticky_resource_acquire( ++ queue->kctx, cmd->info.import.gpu_va); ++ kbase_gpu_vm_unlock(queue->kctx); ++ ++ if (meta == NULL) { ++ queue->has_error = true; ++ dev_warn(kbdev->dev, ++ "failed to map an external resource\n"); ++ } ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( ++ kbdev, queue, meta ? 0 : 1); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: { ++ bool ret; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( ++ kbdev, queue); ++ ++ kbase_gpu_vm_lock(queue->kctx); ++ ret = kbase_sticky_resource_release( ++ queue->kctx, NULL, cmd->info.import.gpu_va); ++ kbase_gpu_vm_unlock(queue->kctx); ++ ++ if (!ret) { ++ queue->has_error = true; ++ dev_warn(kbdev->dev, ++ "failed to release the reference. resource not found\n"); ++ } ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( ++ kbdev, queue, ret ? 0 : 1); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: { ++ bool ret; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( ++ kbdev, queue); ++ ++ kbase_gpu_vm_lock(queue->kctx); ++ ret = kbase_sticky_resource_release_force( ++ queue->kctx, NULL, cmd->info.import.gpu_va); ++ kbase_gpu_vm_unlock(queue->kctx); ++ ++ if (!ret) { ++ queue->has_error = true; ++ dev_warn(kbdev->dev, ++ "failed to release the reference. resource not found\n"); ++ } ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( ++ kbdev, queue, ret ? 0 : 1); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: ++ { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( ++ kbdev, queue); ++ ++ status = kbase_kcpu_jit_allocate_process(queue, cmd); ++ if (status == -EAGAIN) { ++ process_next = false; ++ } else { ++ if (status != 0) ++ queue->has_error = true; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( ++ kbdev, queue, &cmd->info.jit_alloc, ++ status); ++ ++ kbase_kcpu_jit_allocate_finish(queue, cmd); ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( ++ kbdev, queue); ++ } ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( ++ kbdev, queue); ++ ++ status = kbase_kcpu_jit_free_process(queue, cmd); ++ if (status) ++ queue->has_error = true; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( ++ kbdev, queue); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { ++ struct kbase_suspend_copy_buffer *sus_buf = ++ cmd->info.suspend_buf_copy.sus_buf; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( ++ kbdev, queue); ++ ++ status = kbase_csf_queue_group_suspend_process( ++ queue->kctx, sus_buf, ++ cmd->info.suspend_buf_copy.group_handle); ++ if (status) ++ queue->has_error = true; ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( ++ kbdev, queue, status); ++ ++ if (!sus_buf->cpu_alloc) { ++ int i; ++ ++ for (i = 0; i < sus_buf->nr_pages; i++) ++ put_page(sus_buf->pages[i]); ++ } else { ++ kbase_mem_phy_alloc_kernel_unmapped( ++ sus_buf->cpu_alloc); ++ kbase_mem_phy_alloc_put(sus_buf->cpu_alloc); ++ } ++ ++ kfree(sus_buf->pages); ++ kfree(sus_buf); ++ break; ++ } ++#if MALI_UNIT_TEST ++ case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: { ++ u64 time = ktime_get_raw_ns(); ++ void *target_page = kmap(*cmd->info.sample_time.page); ++ ++ if (target_page) { ++ memcpy(target_page + ++ cmd->info.sample_time.page_offset, ++ &time, sizeof(time)); ++ kunmap(*cmd->info.sample_time.page); ++ } else { ++ dev_warn(kbdev->dev, ++ "Could not kmap target page\n"); ++ queue->has_error = true; ++ } ++ put_page(*cmd->info.sample_time.page); ++ kfree(cmd->info.sample_time.page); ++ break; ++ } ++#endif /* MALI_UNIT_TEST */ ++ default: ++ dev_warn(kbdev->dev, ++ "Unrecognized command type\n"); ++ break; ++ } /* switch */ ++ ++ /*TBD: error handling */ ++ ++ if (!process_next) ++ break; ++ } ++ ++ if (i > 0) { ++ queue->start_offset += i; ++ queue->num_pending_cmds -= i; ++ ++ /* If an attempt to enqueue commands failed then we must raise ++ * an event in case the client wants to retry now that there is ++ * free space in the buffer. ++ */ ++ if (queue->enqueue_failed) { ++ queue->enqueue_failed = false; ++ kbase_csf_event_signal_cpu_only(queue->kctx); ++ } ++ } ++} ++ ++static size_t kcpu_queue_get_space(struct kbase_kcpu_command_queue *queue) ++{ ++ return KBASEP_KCPU_QUEUE_SIZE - queue->num_pending_cmds; ++} ++ ++static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( ++ const struct kbase_kcpu_command_queue *queue, ++ const struct kbase_kcpu_command *cmd) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ ++ switch (cmd->type) { ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( ++ kbdev, queue, cmd->info.fence.fence); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( ++ kbdev, queue, cmd->info.fence.fence); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ { ++ const struct base_cqs_wait_info *waits = ++ cmd->info.cqs_wait.objs; ++ u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags; ++ unsigned int i; ++ ++ for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( ++ kbdev, queue, waits[i].addr, waits[i].val, ++ (inherit_err_flags & ((u32)1 << i)) ? 1 : 0); ++ } ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ { ++ const struct base_cqs_set *sets = cmd->info.cqs_set.objs; ++ unsigned int i; ++ ++ for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( ++ kbdev, queue, sets[i].addr); ++ } ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ { ++ /* GPUCORE-28172 RDT to review */ ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ { ++ /* GPUCORE-28172 RDT to review */ ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, ++ queue); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( ++ kbdev, queue, cmd->info.import.gpu_va); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( ++ kbdev, queue, cmd->info.import.gpu_va); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( ++ kbdev, queue, cmd->info.import.gpu_va); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: ++ { ++ u8 i; ++ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( ++ kbdev, queue); ++ for (i = 0; i < cmd->info.jit_alloc.count; i++) { ++ const struct base_jit_alloc_info *info = ++ &cmd->info.jit_alloc.info[i]; ++ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( ++ kbdev, queue, info->gpu_alloc_addr, ++ info->va_pages, info->commit_pages, ++ info->extension, info->id, info->bin_id, ++ info->max_allocations, info->flags, ++ info->usage_id); ++ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( ++ kbdev, queue); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: ++ { ++ u8 i; ++ ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( ++ kbdev, queue); ++ for (i = 0; i < cmd->info.jit_free.count; i++) { ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( ++ kbdev, queue, cmd->info.jit_free.ids[i]); ++ } ++ KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( ++ kbdev, queue); ++ break; ++ } ++ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( ++ kbdev, queue, cmd->info.suspend_buf_copy.sus_buf, ++ cmd->info.suspend_buf_copy.group_handle); ++ break; ++#if MALI_UNIT_TEST ++ case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: ++ /* ++ * This is test-only KCPU command, no need to have a timeline ++ * entry ++ */ ++ break; ++#endif /* MALI_UNIT_TEST */ ++ } ++} ++ ++int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_enqueue *enq) ++{ ++ struct kbase_kcpu_command_queue *queue = NULL; ++ void __user *user_cmds = u64_to_user_ptr(enq->addr); ++ int ret = 0; ++ u32 i; ++ ++ /* The offset to the first command that is being processed or yet to ++ * be processed is of u8 type, so the number of commands inside the ++ * queue cannot be more than 256. ++ */ ++ BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256); ++ ++ /* Whilst the backend interface allows enqueueing multiple commands in ++ * a single operation, the Base interface does not expose any mechanism ++ * to do so. And also right now the handling is missing for the case ++ * where multiple commands are submitted and the enqueue of one of the ++ * command in the set fails after successfully enqueuing other commands ++ * in the set. ++ */ ++ if (enq->nr_commands != 1) { ++ dev_err(kctx->kbdev->dev, ++ "More than one commands enqueued\n"); ++ return -EINVAL; ++ } ++ ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ ++ if (!kctx->csf.kcpu_queues.array[enq->id]) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ queue = kctx->csf.kcpu_queues.array[enq->id]; ++ ++ if (kcpu_queue_get_space(queue) < enq->nr_commands) { ++ ret = -EBUSY; ++ queue->enqueue_failed = true; ++ goto out; ++ } ++ ++ /* Copy all command's info to the command buffer. ++ * Note: it would be more efficient to process all commands in-line ++ * until we encounter an unresolved CQS_ / FENCE_WAIT, however, the ++ * interface allows multiple commands to be enqueued so we must account ++ * for the possibility to roll back. ++ */ ++ ++ for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) { ++ struct kbase_kcpu_command *kcpu_cmd = ++ &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; ++ struct base_kcpu_command command; ++ unsigned int j; ++ ++ if (copy_from_user(&command, user_cmds, sizeof(command))) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ user_cmds = (void __user *)((uintptr_t)user_cmds + ++ sizeof(struct base_kcpu_command)); ++ ++ for (j = 0; j < sizeof(command.padding); j++) { ++ if (command.padding[j] != 0) { ++ dev_dbg(kctx->kbdev->dev, ++ "base_kcpu_command padding not 0\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ ++ kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds; ++ switch (command.type) { ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ ret = kbase_kcpu_fence_wait_prepare(queue, ++ &command.info.fence, kcpu_cmd); ++#else ++ ret = -EINVAL; ++ dev_warn(kctx->kbdev->dev, "fence wait command unsupported\n"); ++#endif ++ break; ++ case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ ret = kbase_kcpu_fence_signal_prepare(queue, ++ &command.info.fence, kcpu_cmd); ++#else ++ ret = -EINVAL; ++ dev_warn(kctx->kbdev->dev, "fence signal command unsupported\n"); ++#endif ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ ret = kbase_kcpu_cqs_wait_prepare(queue, ++ &command.info.cqs_wait, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET: ++ ret = kbase_kcpu_cqs_set_prepare(queue, ++ &command.info.cqs_set, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: ++ ret = kbase_kcpu_cqs_wait_operation_prepare(queue, ++ &command.info.cqs_wait_operation, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: ++ ret = kbase_kcpu_cqs_set_operation_prepare(queue, ++ &command.info.cqs_set_operation, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: ++ kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; ++ ret = 0; ++ break; ++ case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: ++ ret = kbase_kcpu_map_import_prepare(queue, ++ &command.info.import, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: ++ ret = kbase_kcpu_unmap_import_prepare(queue, ++ &command.info.import, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: ++ ret = kbase_kcpu_unmap_import_force_prepare(queue, ++ &command.info.import, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: ++ ret = kbase_kcpu_jit_allocate_prepare(queue, ++ &command.info.jit_alloc, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_JIT_FREE: ++ ret = kbase_kcpu_jit_free_prepare(queue, ++ &command.info.jit_free, kcpu_cmd); ++ break; ++ case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: ++ ret = kbase_csf_queue_group_suspend_prepare(queue, ++ &command.info.suspend_buf_copy, ++ kcpu_cmd); ++ break; ++#if MALI_UNIT_TEST ++ case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: { ++ int const page_cnt = 1; ++ ++ kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME; ++ kcpu_cmd->info.sample_time.page_addr = ++ command.info.sample_time.time & PAGE_MASK; ++ kcpu_cmd->info.sample_time.page_offset = ++ command.info.sample_time.time & ~PAGE_MASK; ++ kcpu_cmd->info.sample_time.page = kcalloc( ++ page_cnt, sizeof(struct page *), GFP_KERNEL); ++ if (!kcpu_cmd->info.sample_time.page) { ++ ret = -ENOMEM; ++ } else { ++ int pinned_pages = get_user_pages_fast( ++ kcpu_cmd->info.sample_time.page_addr, ++ page_cnt, 1, ++ kcpu_cmd->info.sample_time.page); ++ ++ if (pinned_pages < 0) { ++ ret = pinned_pages; ++ kfree(kcpu_cmd->info.sample_time.page); ++ } else if (pinned_pages != page_cnt) { ++ ret = -EINVAL; ++ kfree(kcpu_cmd->info.sample_time.page); ++ } ++ } ++ ++ break; ++ } ++#endif /* MALI_UNIT_TEST */ ++ default: ++ dev_warn(queue->kctx->kbdev->dev, ++ "Unknown command type %u\n", command.type); ++ ret = -EINVAL; ++ break; ++ } ++ } ++ ++ if (!ret) { ++ /* We only instrument the enqueues after all commands have been ++ * successfully enqueued, as if we do them during the enqueue ++ * and there is an error, we won't be able to roll them back ++ * like is done for the command enqueues themselves. ++ */ ++ for (i = 0; i != enq->nr_commands; ++i) { ++ u8 cmd_idx = (u8)(queue->start_offset + queue->num_pending_cmds + i); ++ ++ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( ++ queue, &queue->commands[cmd_idx]); ++ } ++ ++ queue->num_pending_cmds += enq->nr_commands; ++ kcpu_queue_process(queue, false); ++ } else { ++ /* Roll back the number of enqueued commands */ ++ kctx->csf.kcpu_queues.num_cmds -= i; ++ } ++ ++out: ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ ++ return ret; ++} ++ ++int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) ++{ ++ int idx; ++ ++ bitmap_zero(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); ++ ++ for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) ++ kctx->csf.kcpu_queues.array[idx] = NULL; ++ ++ kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu", ++ WQ_UNBOUND | WQ_HIGHPRI, 0); ++ if (!kctx->csf.kcpu_queues.wq) ++ return -ENOMEM; ++ ++ mutex_init(&kctx->csf.kcpu_queues.lock); ++ ++ kctx->csf.kcpu_queues.num_cmds = 0; ++ ++ return 0; ++} ++ ++void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) ++{ ++ while (!bitmap_empty(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES)) { ++ int id = find_first_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES); ++ ++ if (WARN_ON(!kctx->csf.kcpu_queues.array[id])) ++ clear_bit(id, kctx->csf.kcpu_queues.in_use); ++ else ++ (void)delete_queue(kctx, id); ++ } ++ ++ destroy_workqueue(kctx->csf.kcpu_queues.wq); ++ mutex_destroy(&kctx->csf.kcpu_queues.lock); ++} ++ ++int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_delete *del) ++{ ++ return delete_queue(kctx, (u32)del->id); ++} ++ ++int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_new *newq) ++{ ++ struct kbase_kcpu_command_queue *queue; ++ int idx; ++ int ret = 0; ++ ++ /* The queue id is of u8 type and we use the index of the kcpu_queues ++ * array as an id, so the number of elements in the array can't be ++ * more than 256. ++ */ ++ BUILD_BUG_ON(KBASEP_MAX_KCPU_QUEUES > 256); ++ ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ ++ idx = find_first_zero_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES); ++ if (idx >= (int)KBASEP_MAX_KCPU_QUEUES) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (WARN_ON(kctx->csf.kcpu_queues.array[idx])) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ ++ if (!queue) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); ++ kctx->csf.kcpu_queues.array[idx] = queue; ++ queue->kctx = kctx; ++ queue->start_offset = 0; ++ queue->num_pending_cmds = 0; ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ queue->fence_context = dma_fence_context_alloc(1); ++ queue->fence_seqno = 0; ++ queue->fence_wait_processed = false; ++#endif ++ queue->enqueue_failed = false; ++ queue->command_started = false; ++ INIT_LIST_HEAD(&queue->jit_blocked); ++ queue->has_error = false; ++ INIT_WORK(&queue->work, kcpu_queue_process_worker); ++ queue->id = idx; ++ ++ newq->id = idx; ++ ++ /* Fire the tracepoint with the mutex held to enforce correct ordering ++ * with the summary stream. ++ */ ++ KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( ++ kctx->kbdev, queue, kctx->id, queue->num_pending_cmds); ++ ++ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue, ++ queue->fence_context, 0); ++out: ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ ++ return ret; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h +new file mode 100644 +index 0000000..2f6da55 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h +@@ -0,0 +1,356 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_KCPU_H_ ++#define _KBASE_CSF_KCPU_H_ ++ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++#include ++#else ++#include ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ ++ ++/* The maximum number of KCPU commands in flight, enqueueing more commands ++ * than this value shall block. ++ */ ++#define KBASEP_KCPU_QUEUE_SIZE ((size_t)256) ++ ++/** ++ * struct kbase_kcpu_command_import_info - Structure which holds information ++ * about the buffer to be imported ++ * ++ * @gpu_va: Address of the buffer to be imported. ++ */ ++struct kbase_kcpu_command_import_info { ++ u64 gpu_va; ++}; ++ ++/** ++ * struct kbase_kcpu_command_fence_info - Structure which holds information ++ * about the fence object enqueued in the kcpu command queue ++ * ++ * @fence_cb: Fence callback ++ * @fence: Fence ++ * @kcpu_queue: kcpu command queue ++ */ ++struct kbase_kcpu_command_fence_info { ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++ struct fence_cb fence_cb; ++ struct fence *fence; ++#else ++ struct dma_fence_cb fence_cb; ++ struct dma_fence *fence; ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ ++ struct kbase_kcpu_command_queue *kcpu_queue; ++}; ++ ++/** ++ * struct kbase_kcpu_command_cqs_set_info - Structure which holds information ++ * about CQS objects for the kcpu CQS set command ++ * ++ * @objs: Array of structures which define CQS objects to be used by ++ * the kcpu command. ++ * @nr_objs: Number of CQS objects in the array. ++ */ ++struct kbase_kcpu_command_cqs_set_info { ++ struct base_cqs_set *objs; ++ unsigned int nr_objs; ++}; ++ ++/** ++ * struct kbase_kcpu_command_cqs_wait_info - Structure which holds information ++ * about CQS objects for the kcpu CQS wait command ++ * ++ * @objs: Array of structures which define CQS objects to be used by ++ * the kcpu command. ++ * @signaled: Bit array used to report the status of the CQS wait objects. ++ * 1 is signaled, 0 otherwise. ++ * @nr_objs: Number of CQS objects in the array. ++ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field ++ * to be served as the source for importing into the ++ * queue's error-state. ++ */ ++struct kbase_kcpu_command_cqs_wait_info { ++ struct base_cqs_wait_info *objs; ++ unsigned long *signaled; ++ unsigned int nr_objs; ++ u32 inherit_err_flags; ++}; ++ ++/** ++ * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information ++ * about CQS objects for the kcpu CQS timeline set command ++ * ++ * @objs: Array of structures which define CQS timeline objects to be used by ++ * the kcpu command. ++ * @nr_objs: Number of CQS objects in the array. ++ */ ++struct kbase_kcpu_command_cqs_set_operation_info { ++ struct base_cqs_set_operation_info *objs; ++ unsigned int nr_objs; ++}; ++ ++/** ++ * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information ++ * about CQS objects for the kcpu CQS timeline wait command ++ * ++ * @objs: Array of structures which define CQS timeline objects to be used by ++ * the kcpu command. ++ * @signaled: Bit array used to report the status of the CQS wait objects. ++ * 1 is signaled, 0 otherwise. ++ * @nr_objs: Number of CQS objects in the array. ++ */ ++struct kbase_kcpu_command_cqs_wait_operation_info { ++ struct base_cqs_wait_operation_info *objs; ++ unsigned long *signaled; ++ unsigned int nr_objs; ++ u32 inherit_err_flags; ++}; ++ ++/** ++ * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information ++ * needed for the kcpu command for jit allocations ++ * ++ * @node: Used to keep track of all JIT free/alloc commands in submission ++ * order. This must be located in the front of this struct to ++ * match that of kbase_kcpu_command_jit_free_info. ++ * @info: Array of objects of the struct base_jit_alloc_info type which ++ * specify jit allocations to be made by the kcpu command. ++ * @count: Number of jit alloc objects in the array. ++ * @blocked: Whether this allocation has been put into the pending list to ++ * be retried later. ++ */ ++struct kbase_kcpu_command_jit_alloc_info { ++ struct list_head node; ++ struct base_jit_alloc_info *info; ++ u8 count; ++ bool blocked; ++}; ++ ++/** ++ * struct kbase_kcpu_command_jit_free_info - Structure which holds information ++ * needed for the kcpu jit free command ++ * ++ * @node: Used to keep track of all JIT free/alloc commands in submission ++ * order. This must be located in the front of this struct to ++ * match that of kbase_kcpu_command_jit_alloc_info. ++ * @ids: Array of identifiers of jit allocations which are to be freed ++ * by the kcpu command. ++ * @count: Number of elements in the array. ++ */ ++struct kbase_kcpu_command_jit_free_info { ++ struct list_head node; ++ u8 *ids; ++ u8 count; ++}; ++ ++/** ++ * struct kbase_suspend_copy_buffer - information about the suspend buffer ++ * to be copied. ++ * ++ * @size: size of the suspend buffer in bytes. ++ * @pages: pointer to an array of pointers to the pages which contain ++ * the user buffer. ++ * @nr_pages: number of pages. ++ * @offset: offset into the pages ++ * @cpu_alloc: Reference to physical pages of suspend buffer allocation. ++ */ ++struct kbase_suspend_copy_buffer { ++ size_t size; ++ struct page **pages; ++ int nr_pages; ++ size_t offset; ++ struct kbase_mem_phy_alloc *cpu_alloc; ++}; ++ ++/** ++ * struct base_kcpu_command_group_suspend - structure which contains ++ * suspend buffer data captured for a suspended queue group. ++ * ++ * @sus_buf: Pointer to the structure which contains details of the ++ * user buffer and its kernel pinned pages. ++ * @group_handle: Handle to the mapping of CSG. ++ */ ++struct kbase_kcpu_command_group_suspend_info { ++ struct kbase_suspend_copy_buffer *sus_buf; ++ u8 group_handle; ++}; ++ ++#if MALI_UNIT_TEST ++struct kbase_kcpu_command_sample_time_info { ++ u64 page_addr; ++ u64 page_offset; ++ struct page **page; ++}; ++#endif /* MALI_UNIT_TEST */ ++ ++/** ++ * struct kbase_cpu_command - Command which is to be part of the kernel ++ * command queue ++ * ++ * @type: Type of the command. ++ * @enqueue_ts: Denotes the relative time of enqueueing, a smaller value ++ * indicates that it has been enqueued earlier. ++ * @info: Structure which holds information about the command ++ * dependent on the command type. ++ * @info.fence: Fence ++ * @info.cqs_wait: CQS wait ++ * @info.cqs_set: CQS set ++ * @info.import: import ++ * @info.jit_alloc: jit allocation ++ * @info.jit_free: jit deallocation ++ * @info.suspend_buf_copy: suspend buffer copy ++ * @info.sample_time: sample time ++ */ ++struct kbase_kcpu_command { ++ enum base_kcpu_command_type type; ++ u64 enqueue_ts; ++ union { ++ struct kbase_kcpu_command_fence_info fence; ++ struct kbase_kcpu_command_cqs_wait_info cqs_wait; ++ struct kbase_kcpu_command_cqs_set_info cqs_set; ++ struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation; ++ struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation; ++ struct kbase_kcpu_command_import_info import; ++ struct kbase_kcpu_command_jit_alloc_info jit_alloc; ++ struct kbase_kcpu_command_jit_free_info jit_free; ++ struct kbase_kcpu_command_group_suspend_info suspend_buf_copy; ++#if MALI_UNIT_TEST ++ struct kbase_kcpu_command_sample_time_info sample_time; ++#endif /* MALI_UNIT_TEST */ ++ } info; ++}; ++ ++/** ++ * struct kbase_kcpu_command_queue - a command queue executed by the kernel ++ * ++ * @kctx: The context to which this command queue belongs. ++ * @commands: Array of commands which have been successfully ++ * enqueued to this command queue. ++ * @work: struct work_struct which contains a pointer to ++ * the function which handles processing of kcpu ++ * commands enqueued into a kcpu command queue; ++ * part of kernel API for processing workqueues ++ * @start_offset: Index of the command to be executed next ++ * @id: KCPU command queue ID. ++ * @num_pending_cmds: The number of commands enqueued but not yet ++ * executed or pending ++ * @cqs_wait_count: Tracks the number of CQS wait commands enqueued ++ * @fence_context: The dma-buf fence context number for this kcpu ++ * queue. A unique context number is allocated for ++ * each kcpu queue. ++ * @fence_seqno: The dma-buf fence sequence number for the fence ++ * that is returned on the enqueue of fence signal ++ * command. This is increased every time the ++ * fence signal command is queued. ++ * @fence_wait_processed: Used to avoid reprocessing of the fence wait ++ * command which has blocked the processing of ++ * commands that follow it. ++ * @enqueue_failed: Indicates that no space has become available in ++ * the buffer since an enqueue operation failed ++ * because of insufficient free space. ++ * @command_started: Indicates that the command at the front of the ++ * queue has been started in a previous queue ++ * process, but was not completed due to some ++ * unmet dependencies. Ensures that instrumentation ++ * of the execution start of these commands is only ++ * fired exactly once. ++ * @has_error: Indicates that the kcpu queue is in error mode ++ * or without errors since last cleaned. ++ * @jit_blocked: Used to keep track of command queues blocked ++ * by a pending JIT allocation command. ++ */ ++struct kbase_kcpu_command_queue { ++ struct kbase_context *kctx; ++ struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; ++ struct work_struct work; ++ u8 start_offset; ++ u8 id; ++ u16 num_pending_cmds; ++ u32 cqs_wait_count; ++ u64 fence_context; ++ unsigned int fence_seqno; ++ bool fence_wait_processed; ++ bool enqueue_failed; ++ bool command_started; ++ struct list_head jit_blocked; ++ bool has_error; ++}; ++ ++/** ++ * kbase_csf_kcpu_queue_new - Create new KCPU command queue. ++ * ++ * @kctx: Pointer to the kbase context within which the KCPU command ++ * queue will be created. ++ * @newq: Pointer to the structure which contains information about ++ * the new KCPU command queue to be created. ++ */ ++int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_new *newq); ++ ++/** ++ * kbase_csf_kcpu_queue_delete - Delete KCPU command queue. ++ * ++ * Return: 0 if successful, -EINVAL if the queue ID is invalid. ++ * ++ * @kctx: Pointer to the kbase context from which the KCPU command ++ * queue is to be deleted. ++ * @del: Pointer to the structure which specifies the KCPU command ++ * queue to be deleted. ++ */ ++int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_delete *del); ++ ++/** ++ * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command ++ * queue. ++ * ++ * @kctx: Pointer to the kbase context within which the KCPU command ++ * is to be enqueued into the KCPU command queue. ++ * @enq: Pointer to the structure which specifies the KCPU command ++ * as well as the KCPU command queue into which the command ++ * is to be enqueued. ++ */ ++int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_enqueue *enq); ++ ++/** ++ * kbase_csf_kcpu_queue_context_init - Initialize the kernel CPU queues context ++ * for a GPU address space ++ * ++ * @kctx: Pointer to the kbase context being initialized. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context ++ * for a GPU address space ++ * ++ * This function deletes any kernel CPU queues that weren't deleted before ++ * context termination. ++ * ++ * @kctx: Pointer to the kbase context being terminated. ++ */ ++void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); ++ ++#endif /* _KBASE_CSF_KCPU_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c +new file mode 100644 +index 0000000..0a2cde0 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c +@@ -0,0 +1,197 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_csf_kcpu_debugfs.h" ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++#include "mali_kbase_sync.h" ++#endif ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++/** ++ * kbasep_csf_kcpu_debugfs_print_queue() - Print additional info for KCPU ++ * queues blocked on CQS wait commands. ++ * ++ * @file: The seq_file to print to ++ * @kctx: The context of the KCPU queue ++ * @waits: Pointer to the KCPU CQS wait command info ++ */ ++static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, ++ struct kbase_context *kctx, ++ struct kbase_kcpu_command_cqs_wait_info *waits) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < waits->nr_objs; i++) { ++ struct kbase_vmap_struct *mapping; ++ u32 val; ++ char const *msg; ++ u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, ++ waits->objs[i].addr, &mapping); ++ ++ if (!cpu_ptr) ++ return; ++ ++ val = *cpu_ptr; ++ ++ kbase_phy_alloc_mapping_put(kctx, mapping); ++ ++ msg = (waits->inherit_err_flags && (1U << i)) ? "true" : ++ "false"; ++ seq_printf(file, " %llx(%u > %u, inherit_err: %s), ", ++ waits->objs[i].addr, val, waits->objs[i].val, msg); ++ } ++} ++ ++/** ++ * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue ++ * ++ * @file: The seq_file to print to ++ * @kctx: The context of the KCPU queue ++ * @queue: Pointer to the KCPU queue ++ */ ++static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, ++ struct kbase_context *kctx, ++ struct kbase_kcpu_command_queue *queue) ++{ ++ if (WARN_ON(!queue)) ++ return; ++ ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++ ++ seq_printf(file, "%16u, %11u, %7u, %13llu %8u", ++ queue->num_pending_cmds, queue->enqueue_failed, ++ queue->command_started ? 1 : 0, ++ queue->fence_context, queue->fence_seqno); ++ ++ if (queue->command_started) { ++ struct kbase_kcpu_command *cmd = ++ &queue->commands[queue->start_offset]; ++ switch (cmd->type) { ++#if IS_ENABLED(CONFIG_SYNC_FILE) ++ case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: ++ { ++ struct kbase_sync_fence_info info; ++ ++ kbase_sync_fence_info_get(cmd->info.fence.fence, &info); ++ seq_printf(file, ", Fence %pK %s %s", ++ info.fence, info.name, ++ kbase_sync_status_string(info.status)); ++ break; ++ } ++#endif ++ case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: ++ seq_puts(file, ", CQS "); ++ kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, ++ &cmd->info.cqs_wait); ++ break; ++ default: ++ seq_puts(file, ", U, Unknown blocking command"); ++ break; ++ } ++ } ++ ++ seq_puts(file, "\n"); ++} ++ ++/** ++ * kbasep_csf_kcpu_debugfs_show() - Print the KCPU queues debug information ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_csf_kcpu_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_context *kctx = file->private; ++ unsigned long idx; ++ ++ seq_printf(file, "MALI_CSF_KCPU_DEBUGFS_VERSION: v%u\n", MALI_CSF_KCPU_DEBUGFS_VERSION); ++ seq_puts(file, "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context & seqno, (Wait Type, Additional info)\n"); ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ ++ idx = find_first_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES); ++ ++ while (idx < KBASEP_MAX_KCPU_QUEUES) { ++ struct kbase_kcpu_command_queue *queue = ++ kctx->csf.kcpu_queues.array[idx]; ++ ++ seq_printf(file, "%9lu( %s ), ", idx, ++ queue->has_error ? "InErr" : "NoErr"); ++ kbasep_csf_kcpu_debugfs_print_queue(file, kctx, ++ kctx->csf.kcpu_queues.array[idx]); ++ ++ idx = find_next_bit(kctx->csf.kcpu_queues.in_use, ++ KBASEP_MAX_KCPU_QUEUES, idx + 1); ++ } ++ ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ return 0; ++} ++ ++static int kbasep_csf_kcpu_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_kcpu_debugfs_show, in->i_private); ++} ++ ++static const struct file_operations kbasep_csf_kcpu_debugfs_fops = { ++ .open = kbasep_csf_kcpu_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; ++#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) ++ const mode_t mode = 0444; ++#else ++ const mode_t mode = 0400; ++#endif ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, ++ kctx, &kbasep_csf_kcpu_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create KCPU debugfs entry"); ++ } ++} ++ ++ ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h +new file mode 100644 +index 0000000..08f2fda +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h +@@ -0,0 +1,37 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_KCPU_DEBUGFS_H_ ++#define _KBASE_CSF_KCPU_DEBUGFS_H_ ++ ++/* Forward declaration */ ++struct kbase_context; ++ ++#define MALI_CSF_KCPU_DEBUGFS_VERSION 0 ++ ++/** ++ * kbase_csf_kcpu_debugfs_init() - Create a debugfs entry for KCPU queues ++ * ++ * @kctx: The kbase_context for which to create the debugfs entry ++ */ ++void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx); ++ ++#endif /* _KBASE_CSF_KCPU_DEBUGFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c +new file mode 100644 +index 0000000..5997483 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c +@@ -0,0 +1,119 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_csf_protected_memory.h" ++#include ++ ++#if IS_ENABLED(CONFIG_OF) ++#include ++#endif ++ ++int kbase_csf_protected_memory_init(struct kbase_device *const kbdev) ++{ ++ int err = 0; ++ ++#if IS_ENABLED(CONFIG_OF) ++ struct device_node *pma_node = of_parse_phandle(kbdev->dev->of_node, ++ "protected-memory-allocator", 0); ++ if (!pma_node) { ++ dev_info(kbdev->dev, "Protected memory allocator not available\n"); ++ } else { ++ struct platform_device *const pdev = ++ of_find_device_by_node(pma_node); ++ ++ kbdev->csf.pma_dev = NULL; ++ if (!pdev) { ++ dev_err(kbdev->dev, "Platform device for Protected memory allocator not found\n"); ++ } else { ++ kbdev->csf.pma_dev = platform_get_drvdata(pdev); ++ if (!kbdev->csf.pma_dev) { ++ dev_info(kbdev->dev, "Protected memory allocator is not ready\n"); ++ err = -EPROBE_DEFER; ++ } else if (!try_module_get(kbdev->csf.pma_dev->owner)) { ++ dev_err(kbdev->dev, "Failed to get Protected memory allocator module\n"); ++ err = -ENODEV; ++ } else { ++ dev_info(kbdev->dev, "Protected memory allocator successfully loaded\n"); ++ } ++ } ++ of_node_put(pma_node); ++ } ++#endif ++ ++ return err; ++} ++ ++void kbase_csf_protected_memory_term(struct kbase_device *const kbdev) ++{ ++ if (kbdev->csf.pma_dev) ++ module_put(kbdev->csf.pma_dev->owner); ++} ++ ++struct protected_memory_allocation ** ++ kbase_csf_protected_memory_alloc( ++ struct kbase_device *const kbdev, ++ struct tagged_addr *phys, ++ size_t num_pages) ++{ ++ size_t i; ++ struct protected_memory_allocator_device *pma_dev = ++ kbdev->csf.pma_dev; ++ struct protected_memory_allocation **pma = ++ kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL); ++ ++ if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma) ++ return NULL; ++ ++ for (i = 0; i < num_pages; i++) { ++ pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, ++ KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); ++ if (!pma[i]) ++ break; ++ ++ phys[i] = as_tagged(pma_dev->ops.pma_get_phys_addr(pma_dev, ++ pma[i])); ++ } ++ ++ if (i != num_pages) { ++ kbase_csf_protected_memory_free(kbdev, pma, i); ++ return NULL; ++ } ++ ++ return pma; ++} ++ ++void kbase_csf_protected_memory_free( ++ struct kbase_device *const kbdev, ++ struct protected_memory_allocation **pma, ++ size_t num_pages) ++{ ++ size_t i; ++ struct protected_memory_allocator_device *pma_dev = ++ kbdev->csf.pma_dev; ++ ++ if (WARN_ON(!pma_dev) || WARN_ON(!pma)) ++ return; ++ ++ for (i = 0; i < num_pages; i++) ++ pma_dev->ops.pma_free_page(pma_dev, pma[i]); ++ ++ kfree(pma); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h +new file mode 100644 +index 0000000..4c0609e +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h +@@ -0,0 +1,71 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_PROTECTED_MEMORY_H_ ++#define _KBASE_CSF_PROTECTED_MEMORY_H_ ++ ++#include "mali_kbase.h" ++/** ++ * kbase_csf_protected_memory_init - Initilaise protected memory allocator. ++ * ++ * @kbdev: Device pointer. ++ * ++ * Return: 0 if success, or an error code on failure. ++ */ ++int kbase_csf_protected_memory_init(struct kbase_device *const kbdev); ++ ++/** ++ * kbase_csf_protected_memory_term - Terminate prtotected memory allocator. ++ * ++ * @kbdev: Device pointer. ++ */ ++void kbase_csf_protected_memory_term(struct kbase_device *const kbdev); ++ ++/** ++ * kbase_csf_protected_memory_alloc - Allocate protected memory pages. ++ * ++ * @kbdev: Device pointer. ++ * @phys: Array of physical addresses to be filled in by the protected ++ * memory allocator. ++ * @num_pages: Number of pages requested to be allocated. ++ * ++ * Return: Pointer to an array of protected memory allocations on success, ++ * or NULL on failure. ++ */ ++struct protected_memory_allocation ** ++ kbase_csf_protected_memory_alloc( ++ struct kbase_device *const kbdev, ++ struct tagged_addr *phys, ++ size_t num_pages); ++ ++/** ++ * kbase_csf_protected_memory_free - Free the allocated ++ * protected memory pages ++ * ++ * @kbdev: Device pointer. ++ * @pma: Array of pointer to protected memory allocations. ++ * @num_pages: Number of pages to be freed. ++ */ ++void kbase_csf_protected_memory_free( ++ struct kbase_device *const kbdev, ++ struct protected_memory_allocation **pma, ++ size_t num_pages); ++#endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c +new file mode 100644 +index 0000000..f6d61d7 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c +@@ -0,0 +1,629 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Waiting timeout for GPU reset to complete */ ++#define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */ ++#define DUMP_DWORDS_PER_LINE (4) ++/* 16 characters needed for a 8 byte value in hex & 1 character for space */ ++#define DUMP_HEX_CHARS_PER_DWORD ((2 * 8) + 1) ++#define DUMP_HEX_CHARS_PER_LINE \ ++ (DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD) ++ ++static inline bool ++kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state) ++{ ++ return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); ++} ++ ++static inline bool ++kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state) ++{ ++ return (state == KBASE_CSF_RESET_GPU_COMMITTED || ++ state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); ++} ++ ++static inline bool ++kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state) ++{ ++ return (state == KBASE_CSF_RESET_GPU_HAPPENING); ++} ++ ++/** ++ * DOC: Mechanism for coherent access to the HW with respect to GPU reset ++ * ++ * Access to the HW from non-atomic context outside of the reset thread must ++ * use kbase_reset_gpu_prevent_and_wait() / kbase_reset_gpu_try_prevent(). ++ * ++ * This currently works by taking the &kbase_device's csf.reset.sem, for ++ * 'write' access by the GPU reset thread and 'read' access by every other ++ * thread. The use of this rw_semaphore means: ++ * ++ * - there will be mutual exclusion (and thus waiting) between the thread doing ++ * reset ('writer') and threads trying to access the GPU for 'normal' ++ * operations ('readers') ++ * ++ * - multiple threads may prevent reset from happening without serializing each ++ * other prematurely. Note that at present the wait for reset to finish has ++ * to be done higher up in the driver than actual GPU access, at a point ++ * where it won't cause lock ordering issues. At such a point, some paths may ++ * actually lead to no GPU access, but we would prefer to avoid serializing ++ * at that level ++ * ++ * - lockdep (if enabled in the kernel) will check such uses for deadlock ++ * ++ * If instead &kbase_device's csf.reset.wait &wait_queue_head_t were used on ++ * its own, we'd also need to add a &lockdep_map and appropriate lockdep calls ++ * to make use of lockdep checking in all places where the &wait_queue_head_t ++ * is waited upon or signaled. ++ * ++ * Indeed places where we wait on &kbase_device's csf.reset.wait (such as ++ * kbase_reset_gpu_wait()) are the only places where we need extra call(s) to ++ * lockdep, and they are made on the existing rw_semaphore. ++ * ++ * For non-atomic access, the &kbase_device's csf.reset.state member should be ++ * checked instead, such as by using kbase_reset_gpu_is_active(). ++ * ++ * Ideally the &rw_semaphore should be replaced in future with a single mutex ++ * that protects any access to the GPU, via reset or otherwise. ++ */ ++ ++int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) ++{ ++ down_read(&kbdev->csf.reset.sem); ++ ++ if (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED) { ++ up_read(&kbdev->csf.reset.sem); ++ return -ENOMEM; ++ } ++ ++ if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { ++ up_read(&kbdev->csf.reset.sem); ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_prevent_and_wait); ++ ++int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) ++{ ++ if (!down_read_trylock(&kbdev->csf.reset.sem)) ++ return -EAGAIN; ++ ++ if (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED) { ++ up_read(&kbdev->csf.reset.sem); ++ return -ENOMEM; ++ } ++ ++ if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { ++ up_read(&kbdev->csf.reset.sem); ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++ ++void kbase_reset_gpu_allow(struct kbase_device *kbdev) ++{ ++ up_read(&kbdev->csf.reset.sem); ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_allow); ++ ++void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) ++{ ++#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_read(&kbdev->csf.reset.sem); ++#else ++ lockdep_assert_held(&kbdev->csf.reset.sem); ++#endif ++ WARN_ON(kbase_reset_gpu_is_active(kbdev)); ++} ++ ++void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) ++{ ++ if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) ++ return; ++ ++#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_read(&kbdev->csf.reset.sem); ++#else ++ lockdep_assert_held(&kbdev->csf.reset.sem); ++#endif ++ WARN_ON(kbase_reset_gpu_is_active(kbdev)); ++} ++ ++/* Mark the reset as now happening, and synchronize with other threads that ++ * might be trying to access the GPU ++ */ ++static void kbase_csf_reset_begin_hw_access_sync( ++ struct kbase_device *kbdev, ++ enum kbase_csf_reset_gpu_state initial_reset_state) ++{ ++ unsigned long hwaccess_lock_flags; ++ unsigned long scheduler_spin_lock_flags; ++ ++ /* Note this is a WARN/atomic_set because it is a software issue for a ++ * race to be occurring here ++ */ ++ WARN_ON(!kbase_csf_reset_state_is_committed(initial_reset_state)); ++ ++ down_write(&kbdev->csf.reset.sem); ++ ++ /* Threads in atomic context accessing the HW will hold one of these ++ * locks, so synchronize with them too. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); ++ atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING); ++ kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++} ++ ++/* Mark the reset as finished and allow others threads to once more access the ++ * GPU ++ */ ++static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, ++ int err_during_reset, ++ bool firmware_inited) ++{ ++ unsigned long hwaccess_lock_flags; ++ unsigned long scheduler_spin_lock_flags; ++ ++ WARN_ON(!kbase_csf_reset_state_is_active( ++ atomic_read(&kbdev->csf.reset.state))); ++ ++ /* Once again, we synchronize with atomic context threads accessing the ++ * HW, as otherwise any actions they defer could get lost ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); ++ ++ if (!err_during_reset) { ++ atomic_set(&kbdev->csf.reset.state, ++ KBASE_CSF_RESET_GPU_NOT_PENDING); ++ } else { ++ dev_err(kbdev->dev, "Reset failed to complete"); ++ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED); ++ } ++ ++ kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); ++ ++ /* Invoke the scheduling tick after formally finishing the reset, ++ * otherwise the tick might start too soon and notice that reset ++ * is still in progress. ++ */ ++ up_write(&kbdev->csf.reset.sem); ++ wake_up(&kbdev->csf.reset.wait); ++ ++ if (!err_during_reset && likely(firmware_inited)) ++ kbase_csf_scheduler_enable_tick_timer(kbdev); ++} ++ ++static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) ++{ ++ kbase_io_history_dump(kbdev); ++ ++ dev_err(kbdev->dev, "Register state:"); ++ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); ++ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); ++ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), ++ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), ++ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); ++ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); ++ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x TILER_CONFIG=0x%08x", ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)), ++ kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); ++} ++ ++static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev) ++{ ++ u8 *buf, *line_str; ++ unsigned int read_size; ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); ++ ++ if (tb == NULL) { ++ dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); ++ return; ++ } ++ ++ buf = kmalloc(PAGE_SIZE + DUMP_HEX_CHARS_PER_LINE + 1, GFP_KERNEL); ++ if (buf == NULL) { ++ dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped"); ++ return; ++ } ++ line_str = &buf[PAGE_SIZE]; ++ ++ dev_err(kbdev->dev, "Firmware trace buffer dump:"); ++ while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, buf, ++ PAGE_SIZE))) { ++ u64 *ptr = (u64 *)buf; ++ u32 num_dwords; ++ ++ for (num_dwords = read_size / sizeof(u64); ++ num_dwords >= DUMP_DWORDS_PER_LINE; ++ num_dwords -= DUMP_DWORDS_PER_LINE) { ++ dev_err(kbdev->dev, "%016llx %016llx %016llx %016llx", ++ ptr[0], ptr[1], ptr[2], ptr[3]); ++ ptr += DUMP_DWORDS_PER_LINE; ++ } ++ ++ if (num_dwords) { ++ int pos = 0; ++ ++ while (num_dwords--) { ++ pos += snprintf(line_str + pos, ++ DUMP_HEX_CHARS_PER_DWORD + 1, ++ "%016llx ", ptr[0]); ++ ptr++; ++ } ++ ++ dev_err(kbdev->dev, "%s", line_str); ++ } ++ } ++ ++ kfree(buf); ++} ++ ++/** ++ * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the ++ * event of an error during GPU reset. ++ * @kbdev: Pointer to KBase device ++ */ ++static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ /* Treat this as an unrecoverable error for HWCNT */ ++ kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); ++ ++ /* Re-enable counters to ensure matching enable/disable pair. ++ * This might reduce the hwcnt disable count to 0, and therefore ++ * trigger actual re-enabling of hwcnt. ++ * However, as the backend is now in the unrecoverable error state, ++ * re-enabling will immediately fail and put the context into the error ++ * state, preventing the hardware from being touched (which could have ++ * risked a hang). ++ */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, ++ bool firmware_inited, bool silent) ++{ ++ unsigned long flags; ++ int err; ++ ++ WARN_ON(kbdev->irq_reset_flush); ++ /* The reset must now be happening otherwise other threads will not ++ * have been synchronized with to stop their access to the HW ++ */ ++#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_write(&kbdev->csf.reset.sem); ++#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE ++ lockdep_assert_held_exclusive(&kbdev->csf.reset.sem); ++#else ++ lockdep_assert_held(&kbdev->csf.reset.sem); ++#endif ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++ ++ /* Reset the scheduler state before disabling the interrupts as suspend ++ * of active CSG slots would also be done as a part of reset. ++ */ ++ if (likely(firmware_inited)) ++ kbase_csf_scheduler_reset(kbdev); ++ cancel_work_sync(&kbdev->csf.firmware_reload_work); ++ ++ dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); ++ /* This call will block until counters are disabled. ++ */ ++ kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock(&kbdev->mmu_mask_change); ++ kbase_pm_reset_start_locked(kbdev); ++ ++ dev_dbg(kbdev->dev, ++ "We're about to flush out the IRQs and their bottom halves\n"); ++ kbdev->irq_reset_flush = true; ++ ++ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the ++ * spinlock; this also clears any outstanding interrupts ++ */ ++ kbase_pm_disable_interrupts_nolock(kbdev); ++ ++ spin_unlock(&kbdev->mmu_mask_change); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n"); ++ /* Must be done without any locks IRQ handlers will take. ++ */ ++ kbase_synchronize_irqs(kbdev); ++ ++ dev_dbg(kbdev->dev, "Flush out any in-flight work items\n"); ++ kbase_flush_mmu_wqs(kbdev); ++ ++ dev_dbg(kbdev->dev, ++ "The flush has completed so reset the active indicator\n"); ++ kbdev->irq_reset_flush = false; ++ ++ mutex_lock(&kbdev->pm.lock); ++ if (!silent) ++ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", ++ RESET_TIMEOUT); ++ ++ /* Output the state of some interesting registers to help in the ++ * debugging of GPU resets, and dump the firmware trace buffer ++ */ ++ if (!silent) { ++ kbase_csf_debug_dump_registers(kbdev); ++ if (likely(firmware_inited)) ++ kbase_csf_dump_firmware_trace_buffer(kbdev); ++ } ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ipa_control_handle_gpu_reset_pre(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* Tell hardware counters a reset is about to occur. ++ * If the backend is in an unrecoverable error state (e.g. due to ++ * firmware being unresponsive) this will transition the backend out of ++ * it, on the assumption a reset will fix whatever problem there was. ++ */ ++ kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface); ++ ++ /* Reset the GPU */ ++ err = kbase_pm_init_hw(kbdev, 0); ++ ++ mutex_unlock(&kbdev->pm.lock); ++ ++ if (WARN_ON(err)) { ++ kbase_csf_hwcnt_on_reset_error(kbdev); ++ return err; ++ } ++ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_restore_all_as(kbdev); ++ kbase_ipa_control_handle_gpu_reset_post(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ kbase_pm_enable_interrupts(kbdev); ++ ++ mutex_lock(&kbdev->pm.lock); ++ kbase_pm_reset_complete(kbdev); ++ /* Synchronously wait for the reload of firmware to complete */ ++ err = kbase_pm_wait_for_desired_state(kbdev); ++ mutex_unlock(&kbdev->pm.lock); ++ ++ if (WARN_ON(err)) { ++ kbase_csf_hwcnt_on_reset_error(kbdev); ++ return err; ++ } ++ ++ /* Re-enable GPU hardware counters */ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ if (!silent) ++ dev_err(kbdev->dev, "Reset complete"); ++ ++ return 0; ++} ++ ++static void kbase_csf_reset_gpu_worker(struct work_struct *data) ++{ ++ struct kbase_device *kbdev = container_of(data, struct kbase_device, ++ csf.reset.work); ++ bool firmware_inited; ++ unsigned long flags; ++ int err = 0; ++ const enum kbase_csf_reset_gpu_state initial_reset_state = ++ atomic_read(&kbdev->csf.reset.state); ++ ++ /* Ensure any threads (e.g. executing the CSF scheduler) have finished ++ * using the HW ++ */ ++ kbase_csf_reset_begin_hw_access_sync(kbdev, initial_reset_state); ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ firmware_inited = kbdev->csf.firmware_inited; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (!kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { ++ bool silent = ++ kbase_csf_reset_state_is_silent(initial_reset_state); ++ ++ err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); ++ kbase_pm_context_idle(kbdev); ++ } ++ ++ kbase_disjoint_state_down(kbdev); ++ ++ /* Allow other threads to once again use the GPU */ ++ kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited); ++} ++ ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) ++{ ++ if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) ++ kbase_hwcnt_backend_csf_on_unrecoverable_error( ++ &kbdev->hwcnt_gpu_iface); ++ ++ if (atomic_cmpxchg(&kbdev->csf.reset.state, ++ KBASE_CSF_RESET_GPU_NOT_PENDING, ++ KBASE_CSF_RESET_GPU_PREPARED) != ++ KBASE_CSF_RESET_GPU_NOT_PENDING) ++ /* Some other thread is already resetting the GPU */ ++ return false; ++ ++ return true; ++} ++KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); ++ ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, ++ unsigned int flags) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ return kbase_prepare_to_reset_gpu(kbdev, flags); ++} ++ ++void kbase_reset_gpu(struct kbase_device *kbdev) ++{ ++ /* Note this is a WARN/atomic_set because it is a software issue for ++ * a race to be occurring here ++ */ ++ if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != ++ KBASE_RESET_GPU_PREPARED)) ++ return; ++ ++ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED); ++ dev_err(kbdev->dev, "Preparing to soft-reset GPU\n"); ++ ++ kbase_disjoint_state_up(kbdev); ++ ++ queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu); ++ ++void kbase_reset_gpu_locked(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbase_reset_gpu(kbdev); ++} ++ ++int kbase_reset_gpu_silent(struct kbase_device *kbdev) ++{ ++ if (atomic_cmpxchg(&kbdev->csf.reset.state, ++ KBASE_CSF_RESET_GPU_NOT_PENDING, ++ KBASE_CSF_RESET_GPU_COMMITTED_SILENT) != ++ KBASE_CSF_RESET_GPU_NOT_PENDING) { ++ /* Some other thread is already resetting the GPU */ ++ return -EAGAIN; ++ } ++ ++ kbase_disjoint_state_up(kbdev); ++ ++ queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); ++ ++ return 0; ++} ++ ++bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) ++{ ++ enum kbase_csf_reset_gpu_state reset_state = ++ atomic_read(&kbdev->csf.reset.state); ++ ++ /* For CSF, the reset is considered active only when the reset worker ++ * is actually executing and other threads would have to wait for it to ++ * complete ++ */ ++ return kbase_csf_reset_state_is_active(reset_state); ++} ++ ++int kbase_reset_gpu_wait(struct kbase_device *kbdev) ++{ ++ const long wait_timeout = ++ kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS); ++ long remaining; ++ ++ /* Inform lockdep we might be trying to wait on a reset (as ++ * would've been done with down_read() - which has no 'timeout' ++ * variant), then use wait_event_timeout() to implement the timed ++ * wait. ++ * ++ * in CONFIG_PROVE_LOCKING builds, this should catch potential 'time ++ * bound' deadlocks such as: ++ * - incorrect lock order with respect to others locks ++ * - current thread has prevented reset ++ * - current thread is executing the reset worker ++ */ ++ might_lock_read(&kbdev->csf.reset.sem); ++ ++ remaining = wait_event_timeout( ++ kbdev->csf.reset.wait, ++ (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_NOT_PENDING) || ++ (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED), ++ wait_timeout); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete"); ++ return -ETIMEDOUT; ++ } else if (atomic_read(&kbdev->csf.reset.state) == ++ KBASE_CSF_RESET_GPU_FAILED) { ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); ++ ++int kbase_reset_gpu_init(struct kbase_device *kbdev) ++{ ++ kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", 0, 1); ++ if (kbdev->csf.reset.workq == NULL) ++ return -ENOMEM; ++ ++ INIT_WORK(&kbdev->csf.reset.work, kbase_csf_reset_gpu_worker); ++ ++ init_waitqueue_head(&kbdev->csf.reset.wait); ++ init_rwsem(&kbdev->csf.reset.sem); ++ ++ return 0; ++} ++ ++void kbase_reset_gpu_term(struct kbase_device *kbdev) ++{ ++ destroy_workqueue(kbdev->csf.reset.workq); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c +new file mode 100644 +index 0000000..5b795d6 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c +@@ -0,0 +1,5063 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include "mali_kbase_config_defaults.h" ++#include ++#include ++#include ++#include "mali_kbase_csf.h" ++#include ++#include ++#include ++#include ++#include ++ ++/* Value to indicate that a queue group is not groups_to_schedule list */ ++#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) ++ ++/* Waiting timeout for scheduler state change for descheduling a CSG */ ++#define CSG_SCHED_STOP_TIMEOUT_MS (50) ++ ++#define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS ++ ++/* Maximum number of endpoints which may run tiler jobs. */ ++#define CSG_TILER_MAX ((u8)1) ++ ++/* Maximum dynamic CSG slot priority value */ ++#define MAX_CSG_SLOT_PRIORITY ((u8)15) ++ ++/* CSF scheduler time slice value */ ++#define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ ++ ++/* ++ * CSF scheduler time threshold for converting "tock" requests into "tick" if ++ * they come too close to the end of a tick interval. This avoids scheduling ++ * twice in a row. ++ */ ++#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \ ++ CSF_SCHEDULER_TIME_TICK_MS ++ ++#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \ ++ msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS) ++ ++/* Nanoseconds per millisecond */ ++#define NS_PER_MS ((u64)1000 * 1000) ++ ++/* ++ * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock" ++ * requests are not serviced immediately, but shall wait for a minimum time in ++ * order to reduce load on the CSF scheduler thread. ++ */ ++#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */ ++ ++/* CS suspended and is idle (empty ring buffer) */ ++#define CS_IDLE_FLAG (1 << 0) ++ ++/* CS suspended and is wait for a CQS condition */ ++#define CS_WAIT_SYNC_FLAG (1 << 1) ++ ++/* 2 GPU address space slots are reserved for MCU and privileged context for HW ++ * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293. ++ */ ++#define NUM_RESERVED_AS_SLOTS (2) ++ ++static int scheduler_group_schedule(struct kbase_queue_group *group); ++static void remove_group_from_idle_wait(struct kbase_queue_group *const group); ++static ++void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, ++ struct kbase_queue_group *const group, ++ enum kbase_csf_group_state run_state); ++static struct kbase_queue_group *scheduler_get_protm_enter_async_group( ++ struct kbase_device *const kbdev, ++ struct kbase_queue_group *const group); ++static struct kbase_queue_group *get_tock_top_group( ++ struct kbase_csf_scheduler *const scheduler); ++static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); ++static int suspend_active_queue_groups(struct kbase_device *kbdev, ++ unsigned long *slot_mask); ++static void schedule_in_cycle(struct kbase_queue_group *group, bool force); ++ ++#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) ++ ++/** ++ * tick_timer_callback() - Callback function for the scheduling tick hrtimer ++ * ++ * @timer: Pointer to the device ++ * ++ * This function will enqueue the scheduling tick work item for immediate ++ * execution, if it has not been queued already. ++ * ++ * Return: enum value to indicate that timer should not be restarted. ++ */ ++static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) ++{ ++ struct kbase_device *kbdev = container_of(timer, struct kbase_device, ++ csf.scheduler.tick_timer); ++ ++ kbase_csf_scheduler_advance_tick(kbdev); ++ return HRTIMER_NORESTART; ++} ++ ++/** ++ * start_tick_timer() - Start the scheduling tick hrtimer. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function will start the scheduling tick hrtimer and is supposed to ++ * be called only from the tick work item function. The tick hrtimer should ++ * should not be active already. ++ */ ++static void start_tick_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ WARN_ON(scheduler->tick_timer_active); ++ if (likely(!work_pending(&scheduler->tick_work))) { ++ scheduler->tick_timer_active = true; ++ ++ hrtimer_start(&scheduler->tick_timer, ++ HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), ++ HRTIMER_MODE_REL); ++ } ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++/** ++ * cancel_tick_timer() - Cancel the scheduling tick hrtimer ++ * ++ * @kbdev: Pointer to the device ++ */ ++static void cancel_tick_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ scheduler->tick_timer_active = false; ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ hrtimer_cancel(&scheduler->tick_timer); ++} ++ ++/** ++ * enqueue_tick_work() - Enqueue the scheduling tick work item ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function will queue the scheduling tick work item for immediate ++ * execution. This shall only be called when both the tick hrtimer and tick ++ * work item are not active/pending. ++ */ ++static void enqueue_tick_work(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ WARN_ON(scheduler->tick_timer_active); ++ queue_work(scheduler->wq, &scheduler->tick_work); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) ++{ ++ WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); ++} ++ ++static int acquire_doorbell(struct kbase_device *kbdev) ++{ ++ int doorbell_nr; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ doorbell_nr = find_first_zero_bit( ++ kbdev->csf.scheduler.doorbell_inuse_bitmap, ++ CSF_NUM_DOORBELL); ++ ++ if (doorbell_nr >= CSF_NUM_DOORBELL) ++ return KBASEP_USER_DB_NR_INVALID; ++ ++ set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); ++ ++ return doorbell_nr; ++} ++ ++static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { ++ release_doorbell(kbdev, group->doorbell_nr); ++ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ } ++} ++ ++static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, ++ struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ ++ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { ++ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; ++ /* After this the dummy page would be mapped in */ ++ unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, ++ queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); ++ } ++ ++ mutex_unlock(&kbdev->csf.reg_lock); ++} ++ ++static void assign_user_doorbell_to_group(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) ++ group->doorbell_nr = acquire_doorbell(kbdev); ++} ++ ++static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, ++ struct kbase_queue *const queue) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ mutex_lock(&kbdev->csf.reg_lock); ++ ++ /* If bind operation for the queue hasn't completed yet, then the ++ * the CSI can't be programmed for the queue ++ * (even in stopped state) and so the doorbell also can't be assigned ++ * to it. ++ */ ++ if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && ++ (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { ++ WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); ++ queue->doorbell_nr = queue->group->doorbell_nr; ++ ++ /* After this the real Hw doorbell page would be mapped in */ ++ unmap_mapping_range( ++ kbdev->csf.db_filp->f_inode->i_mapping, ++ queue->db_file_offset << PAGE_SHIFT, ++ PAGE_SIZE, 1); ++ } ++ ++ mutex_unlock(&kbdev->csf.reg_lock); ++} ++ ++static void scheduler_doorbell_init(struct kbase_device *kbdev) ++{ ++ int doorbell_nr; ++ ++ bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, ++ CSF_NUM_DOORBELL); ++ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ /* Reserve doorbell 0 for use by kernel driver */ ++ doorbell_nr = acquire_doorbell(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ ++ WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); ++} ++ ++static u32 get_nr_active_csgs(struct kbase_device *kbdev) ++{ ++ u32 nr_active_csgs; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, ++ kbdev->csf.global_iface.group_num); ++ ++ return nr_active_csgs; ++} ++ ++/** ++ * csgs_active - returns true if any of CSG slots are in use ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: the interface is actively engaged flag. ++ */ ++static bool csgs_active(struct kbase_device *kbdev) ++{ ++ u32 nr_active_csgs; ++ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ nr_active_csgs = get_nr_active_csgs(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ ++ /* Right now if any of the CSG interfaces are in use ++ * then we need to assume that there is some work pending. ++ * In future when we have IDLE notifications from firmware implemented ++ * then we would have a better idea of the pending work. ++ */ ++ return (nr_active_csgs != 0); ++} ++ ++/** ++ * csg_slot_in_use - returns true if a queue group has been programmed on a ++ * given CSG slot. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @slot: Index/number of the CSG slot in question. ++ * ++ * Return: the interface is actively engaged flag. ++ * ++ * Note: Caller must hold the scheduler lock. ++ */ ++static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); ++} ++ ++static bool queue_group_suspended_locked(struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ ++ return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || ++ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || ++ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); ++} ++ ++static bool queue_group_idle_locked(struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ ++ return (group->run_state == KBASE_CSF_GROUP_IDLE || ++ group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); ++} ++ ++static bool queue_group_scheduled(struct kbase_queue_group *group) ++{ ++ return (group->run_state != KBASE_CSF_GROUP_INACTIVE && ++ group->run_state != KBASE_CSF_GROUP_TERMINATED && ++ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); ++} ++ ++static bool queue_group_scheduled_locked(struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); ++ ++ return queue_group_scheduled(group); ++} ++ ++/** ++ * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. ++ * ++ * @kbdev: Pointer to the GPU device ++ * ++ * This function waits for the GPU to exit protected mode which is confirmed ++ * when active_protm_grp is set to NULL. ++ */ ++static void scheduler_wait_protm_quit(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ long remaining; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL, ++ jiffies_to_msecs(wt)); ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); ++ ++ if (!remaining) ++ dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL, ++ jiffies_to_msecs(remaining)); ++} ++ ++/** ++ * scheduler_force_protm_exit() - Force GPU to exit protected mode. ++ * ++ * @kbdev: Pointer to the GPU device ++ * ++ * This function sends a ping request to the firmware and waits for the GPU ++ * to exit protected mode. ++ */ ++static void scheduler_force_protm_exit(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ kbase_csf_firmware_ping(kbdev); ++ scheduler_wait_protm_quit(kbdev); ++} ++ ++/** ++ * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up ++ * automatically for periodic tasks. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the ++ * CSF scheduler lock to already have been held. ++ * ++ * Return: true if the scheduler is configured to wake up periodically ++ */ ++static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ return kbdev->csf.scheduler.timer_enabled; ++} ++ ++static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (scheduler->gpu_idle_fw_timer_enabled) ++ return; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ ++ /* Update the timer_enabled flag requires holding interrupt_lock */ ++ scheduler->gpu_idle_fw_timer_enabled = true; ++ kbase_csf_firmware_enable_gpu_idle_timer(kbdev); ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&scheduler->interrupt_lock); ++ ++ /* Update of the timer_enabled flag requires holding interrupt_lock */ ++ if (scheduler->gpu_idle_fw_timer_enabled) { ++ scheduler->gpu_idle_fw_timer_enabled = false; ++ kbase_csf_firmware_disable_gpu_idle_timer(kbdev); ++ } ++} ++ ++static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (!scheduler->gpu_idle_fw_timer_enabled) ++ return; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ disable_gpu_idle_fw_timer_locked(kbdev); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (scheduler->state == SCHED_SUSPENDED) { ++ dev_dbg(kbdev->dev, "Re-activating the Scheduler"); ++ kbase_csf_scheduler_pm_active(kbdev); ++ scheduler->state = SCHED_INACTIVE; ++ ++ if (kick) ++ scheduler_enable_tick_timer_nolock(kbdev); ++ } ++} ++ ++static void scheduler_suspend(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { ++ dev_dbg(kbdev->dev, "Suspending the Scheduler"); ++ kbase_csf_scheduler_pm_idle(kbdev); ++ scheduler->state = SCHED_SUSPENDED; ++ } ++} ++ ++/** ++ * update_idle_suspended_group_state() - Move the queue group to a non-idle ++ * suspended state. ++ * @group: Pointer to the queue group. ++ * ++ * This function is called to change the state of queue group to non-idle ++ * suspended state, if the group was suspended when all the queues bound to it ++ * became empty or when some queues got blocked on a sync wait & others became ++ * empty. The group is also moved to the runnable list from idle wait list in ++ * the latter case. ++ * So the function gets called when a queue is kicked or sync wait condition ++ * gets satisfied. ++ */ ++static void update_idle_suspended_group_state(struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int new_val; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { ++ remove_group_from_idle_wait(group); ++ insert_group_to_runnable(scheduler, group, ++ KBASE_CSF_GROUP_SUSPENDED); ++ } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED; ++ ++ /* If scheduler is not suspended and the given group's ++ * static priority (reflected by the scan_seq_num) is inside ++ * the current tick slot-range, schedules an async tock. ++ */ ++ if (scheduler->state != SCHED_SUSPENDED && ++ group->scan_seq_num < scheduler->num_csg_slots_for_tick) ++ schedule_in_cycle(group, true); ++ } else ++ return; ++ ++ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, ++ group, new_val); ++} ++ ++int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int slot_num = group->csg_nr; ++ ++ lockdep_assert_held(&scheduler->interrupt_lock); ++ ++ if (slot_num >= 0) { ++ if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != ++ group)) ++ return -1; ++ } ++ ++ return slot_num; ++} ++ ++int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ unsigned long flags; ++ int slot_num; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ slot_num = kbase_csf_scheduler_group_get_slot_locked(group); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ return slot_num; ++} ++ ++static bool kbasep_csf_scheduler_group_is_on_slot_locked( ++ struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int slot_num = group->csg_nr; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (slot_num >= 0) { ++ if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != ++ group)) ++ return true; ++ } ++ ++ return false; ++} ++ ++bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ int slot_num = group->csg_nr; ++ ++ lockdep_assert_held(&scheduler->interrupt_lock); ++ ++ if (WARN_ON(slot_num < 0)) ++ return false; ++ ++ return test_bit(slot_num, scheduler->csgs_events_enable_mask); ++} ++ ++struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( ++ struct kbase_device *kbdev, int slot) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ ++ return kbdev->csf.scheduler.csg_slots[slot].resident_group; ++} ++ ++static int halt_stream_sync(struct kbase_queue *queue) ++{ ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ struct kbase_csf_cmd_stream_info *stream; ++ int csi_index = queue->csi_index; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ ++ if (WARN_ON(!group) || ++ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return -EINVAL; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ginfo = &global_iface->groups[group->csg_nr]; ++ stream = &ginfo->streams[csi_index]; ++ ++ if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == ++ CS_REQ_STATE_START) { ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) ++ == CS_ACK_STATE_START), remaining); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d", ++ csi_index, group->handle, group->csg_nr); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ ++ return -ETIMEDOUT; ++ } ++ ++ remaining = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ } ++ ++ /* Set state to STOP */ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, ++ CS_REQ_STATE_MASK); ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u); ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); ++ ++ /* Timed wait */ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) ++ == CS_ACK_STATE_STOP), remaining); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d", ++ queue->csi_index, group->handle, group->csg_nr); ++ ++ /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU ++ * will be reset as a work-around. ++ */ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ } ++ return (remaining) ? 0 : -ETIMEDOUT; ++} ++ ++static bool can_halt_stream(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ struct kbase_csf_csg_slot *const csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ unsigned long flags; ++ bool can_halt; ++ int slot; ++ ++ if (!queue_group_scheduled(group)) ++ return true; ++ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ slot = kbase_csf_scheduler_group_get_slot_locked(group); ++ can_halt = (slot >= 0) && ++ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, ++ flags); ++ ++ return can_halt; ++} ++ ++/** ++ * sched_halt_stream() - Stop a GPU queue when its queue group is not running ++ * on a CSG slot. ++ * @queue: Pointer to the GPU queue to stop. ++ * ++ * This function handles stopping gpu queues for groups that are either not on ++ * a CSG slot or are on the slot but undergoing transition to ++ * resume or suspend states. ++ * It waits until the queue group is scheduled on a slot and starts running, ++ * which is needed as groups that were suspended may need to resume all queues ++ * that were enabled and running at the time of suspension. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++static int sched_halt_stream(struct kbase_queue *queue) ++{ ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = ++ &kbdev->csf.scheduler; ++ struct kbase_csf_csg_slot *const csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ bool retry_needed = false; ++ bool retried = false; ++ long remaining; ++ int slot; ++ int err = 0; ++ ++ if (WARN_ON(!group)) ++ return -EINVAL; ++ ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ lockdep_assert_held(&scheduler->lock); ++ ++ slot = kbase_csf_scheduler_group_get_slot(group); ++ ++ if (slot >= 0) { ++ WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); ++ ++ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { ++ dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", ++ queue->csi_index, group->handle); ++ retry_needed = true; ++ } ++ } ++retry: ++ /* Update the group state so that it can get scheduled soon */ ++ update_idle_suspended_group_state(group); ++ ++ mutex_unlock(&scheduler->lock); ++ ++ /* This function is called when the queue group is either not on a CSG ++ * slot or is on the slot but undergoing transition. ++ * ++ * To stop the queue, the function needs to wait either for the queue ++ * group to be assigned a CSG slot (and that slot has to reach the ++ * running state) or for the eviction of the queue group from the ++ * scheduler's list. ++ * ++ * In order to evaluate the latter condition, the function doesn't ++ * really need to lock the scheduler, as any update to the run_state ++ * of the queue group by sched_evict_group() would be visible due ++ * to implicit barriers provided by the kernel waitqueue macros. ++ * ++ * The group pointer cannot disappear meanwhile, as the high level ++ * CSF context is locked. Therefore, the scheduler would be ++ * the only one to update the run_state of the group. ++ */ ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, can_halt_stream(kbdev, group), ++ kbase_csf_timeout_in_jiffies( ++ 20 * kbdev->csf.scheduler.csg_scheduling_period_ms)); ++ ++ mutex_lock(&scheduler->lock); ++ ++ if (remaining && queue_group_scheduled_locked(group)) { ++ slot = kbase_csf_scheduler_group_get_slot(group); ++ ++ /* If the group is still on slot and slot is in running state ++ * then explicitly stop the CSI of the ++ * queue. Otherwise there are different cases to consider ++ * ++ * - If the queue group was already undergoing transition to ++ * resume/start state when this function was entered then it ++ * would not have disabled the CSI of the ++ * queue being stopped and the previous wait would have ended ++ * once the slot was in a running state with CS ++ * interface still enabled. ++ * Now the group is going through another transition either ++ * to a suspend state or to a resume state (it could have ++ * been suspended before the scheduler lock was grabbed). ++ * In both scenarios need to wait again for the group to ++ * come on a slot and that slot to reach the running state, ++ * as that would guarantee that firmware will observe the ++ * CSI as disabled. ++ * ++ * - If the queue group was either off the slot or was ++ * undergoing transition to suspend state on entering this ++ * function, then the group would have been resumed with the ++ * queue's CSI in disabled state. ++ * So now if the group is undergoing another transition ++ * (after the resume) then just need to wait for the state ++ * bits in the ACK register of CSI to be ++ * set to STOP value. It is expected that firmware will ++ * process the stop/disable request of the CS ++ * interface after resuming the group before it processes ++ * another state change request of the group. ++ */ ++ if ((slot >= 0) && ++ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { ++ err = halt_stream_sync(queue); ++ } else if (retry_needed && !retried) { ++ retried = true; ++ goto retry; ++ } else if (slot >= 0) { ++ struct kbase_csf_global_iface *global_iface = ++ &kbdev->csf.global_iface; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &global_iface->groups[slot]; ++ struct kbase_csf_cmd_stream_info *stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 cs_req = ++ kbase_csf_firmware_cs_input_read(stream, CS_REQ); ++ ++ if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != ++ CS_REQ_STATE_STOP)) { ++ /* Timed wait */ ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, ++ (CS_ACK_STATE_GET( ++ kbase_csf_firmware_cs_output( ++ stream, CS_ACK)) == ++ CS_ACK_STATE_STOP), ++ kbdev->csf.fw_timeout_ms); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, ++ "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d", ++ queue->csi_index, ++ group->handle, group->csg_nr); ++ err = -ETIMEDOUT; ++ } ++ } ++ } ++ } else if (!remaining) { ++ dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d", ++ group->handle, queue->csi_index); ++ err = -ETIMEDOUT; ++ } ++ ++ return err; ++} ++ ++int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ struct kbase_queue_group *group = queue->group; ++ bool const cs_enabled = queue->enabled; ++ int err = 0; ++ ++ if (WARN_ON(!group)) ++ return -EINVAL; ++ ++ kbase_reset_gpu_assert_failed_or_prevented(kbdev); ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ ++ queue->enabled = false; ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); ++ ++ if (cs_enabled && queue_group_scheduled_locked(group)) { ++ struct kbase_csf_csg_slot *const csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ int slot = kbase_csf_scheduler_group_get_slot(group); ++ ++ /* Since the group needs to be resumed in order to stop the queue, ++ * check if GPU needs to be powered up. ++ */ ++ scheduler_wakeup(kbdev, true); ++ ++ if ((slot >= 0) && ++ (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) ++ err = halt_stream_sync(queue); ++ else ++ err = sched_halt_stream(queue); ++ ++ unassign_user_doorbell_from_queue(kbdev, queue); ++ } ++ ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ return err; ++} ++ ++static void update_hw_active(struct kbase_queue *queue, bool active) ++{ ++ CSTD_UNUSED(queue); ++ CSTD_UNUSED(active); ++} ++ ++static void program_cs_extract_init(struct kbase_queue *queue) ++{ ++ u64 *input_addr = (u64 *)queue->user_io_addr; ++ u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); ++ ++ input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = ++ output_addr[CS_EXTRACT_LO / sizeof(u64)]; ++} ++ ++static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, ++ struct kbase_queue *queue) ++{ ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ u32 const glb_version = kbdev->csf.global_iface.version; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ /* If cs_trace_command not supported, nothing to program */ ++ if (glb_version < kbase_csf_interface_version(1, 1, 0)) ++ return; ++ ++ /* Program for cs_trace if enabled. In the current arrangement, it is ++ * possible for the context to enable the cs_trace after some queues ++ * has been registered in cs_trace in disabled state. This is tracked by ++ * the queue's trace buffer base address, which had been validated at the ++ * queue's register_ex call. ++ */ ++ if (kbase_csf_scheduler_queue_has_trace(queue)) { ++ u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( ++ queue->trace_cfg, queue->kctx->as_nr); ++ ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, ++ queue->trace_buffer_size); ++ ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, ++ queue->trace_buffer_base & U32_MAX); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, ++ queue->trace_buffer_base >> 32); ++ ++ kbase_csf_firmware_cs_input( ++ stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, ++ queue->trace_offset_ptr & U32_MAX); ++ kbase_csf_firmware_cs_input( ++ stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, ++ queue->trace_offset_ptr >> 32); ++ } else { ++ /* Place the configuration to the disabled condition */ ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); ++ kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); ++ } ++} ++ ++static void program_cs(struct kbase_device *kbdev, ++ struct kbase_queue *queue, bool ring_csg_doorbell) ++{ ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ struct kbase_csf_cmd_stream_info *stream; ++ int csi_index = queue->csi_index; ++ u64 user_input; ++ u64 user_output; ++ ++ if (WARN_ON(!group)) ++ return; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; ++ ++ ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; ++ ++ if (WARN_ON(csi_index < 0) || ++ WARN_ON(csi_index >= ginfo->stream_num)) ++ return; ++ ++ assign_user_doorbell_to_queue(kbdev, queue); ++ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) ++ return; ++ ++ WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); ++ ++ if (queue->enabled && queue_group_suspended_locked(group)) ++ program_cs_extract_init(queue); ++ ++ stream = &ginfo->streams[csi_index]; ++ ++ kbase_csf_firmware_cs_input(stream, CS_BASE_LO, ++ queue->base_addr & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_BASE_HI, ++ queue->base_addr >> 32); ++ kbase_csf_firmware_cs_input(stream, CS_SIZE, ++ queue->size); ++ ++ user_input = (queue->reg->start_pfn << PAGE_SHIFT); ++ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, ++ user_input & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, ++ user_input >> 32); ++ ++ user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); ++ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, ++ user_output & 0xFFFFFFFF); ++ kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, ++ user_output >> 32); ++ ++ kbase_csf_firmware_cs_input(stream, CS_CONFIG, ++ (queue->doorbell_nr << 8) | (queue->priority & 0xF)); ++ ++ /* Program the queue's cs_trace configuration */ ++ program_cs_trace_cfg(stream, queue); ++ ++ /* Enable all interrupts for now */ ++ kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); ++ ++ /* ++ * Enable the CSG idle notification once the CS's ringbuffer ++ * becomes empty or the CS becomes sync_idle, waiting sync update ++ * or protected mode switch. ++ */ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ++ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, ++ CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); ++ ++ /* Set state to START/STOP */ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ++ queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, ++ CS_REQ_STATE_MASK); ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); ++ ++ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, ++ ring_csg_doorbell); ++ update_hw_active(queue, true); ++} ++ ++int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) ++{ ++ struct kbase_queue_group *group = queue->group; ++ struct kbase_device *kbdev = queue->kctx->kbdev; ++ bool const cs_enabled = queue->enabled; ++ int err = 0; ++ bool evicted = false; ++ ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&queue->kctx->csf.lock); ++ ++ if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) ++ return -EINVAL; ++ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, ++ group->run_state); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group, ++ queue, queue->status_wait); ++ ++ if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { ++ err = -EIO; ++ evicted = true; ++ } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) ++ && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { ++ dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", ++ queue->csi_index, group->handle); ++ } else { ++ err = scheduler_group_schedule(group); ++ ++ if (!err) { ++ queue->enabled = true; ++ if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { ++ if (cs_enabled) { ++ /* In normal situation, when a queue is ++ * already running, the queue update ++ * would be a doorbell kick on user ++ * side. However, if such a kick is ++ * shortly following a start or resume, ++ * the queue may actually in transition ++ * hence the said kick would enter the ++ * kernel as the hw_active flag is yet ++ * to be set. The sheduler needs to ++ * give a kick to the corresponding ++ * user door-bell on such a case. ++ */ ++ kbase_csf_ring_cs_user_doorbell(kbdev, queue); ++ } else ++ program_cs(kbdev, queue, true); ++ } ++ queue_delayed_work(system_long_wq, ++ &kbdev->csf.scheduler.ping_work, ++ msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); ++ } ++ } ++ ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ ++ if (evicted) ++ kbase_csf_term_descheduled_queue_group(group); ++ ++ return err; ++} ++ ++static enum kbase_csf_csg_slot_state update_csg_slot_status( ++ struct kbase_device *kbdev, s8 slot) ++{ ++ struct kbase_csf_csg_slot *csg_slot = ++ &kbdev->csf.scheduler.csg_slots[slot]; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ u32 state; ++ enum kbase_csf_csg_slot_state slot_state; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, ++ CSG_ACK)); ++ slot_state = atomic_read(&csg_slot->state); ++ ++ switch (slot_state) { ++ case CSG_SLOT_READY2RUN: ++ if ((state == CSG_ACK_STATE_START) || ++ (state == CSG_ACK_STATE_RESUME)) { ++ slot_state = CSG_SLOT_RUNNING; ++ atomic_set(&csg_slot->state, slot_state); ++ csg_slot->trigger_jiffies = jiffies; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state); ++ dev_dbg(kbdev->dev, "Group %u running on slot %d\n", ++ csg_slot->resident_group->handle, slot); ++ } ++ break; ++ case CSG_SLOT_DOWN2STOP: ++ if ((state == CSG_ACK_STATE_SUSPEND) || ++ (state == CSG_ACK_STATE_TERMINATE)) { ++ slot_state = CSG_SLOT_STOPPED; ++ atomic_set(&csg_slot->state, slot_state); ++ csg_slot->trigger_jiffies = jiffies; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); ++ dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", ++ csg_slot->resident_group->handle, slot); ++ } ++ break; ++ case CSG_SLOT_DOWN2STOP_TIMEDOUT: ++ case CSG_SLOT_READY2RUN_TIMEDOUT: ++ case CSG_SLOT_READY: ++ case CSG_SLOT_RUNNING: ++ case CSG_SLOT_STOPPED: ++ break; ++ default: ++ dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); ++ break; ++ } ++ ++ return slot_state; ++} ++ ++static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); ++} ++ ++static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) ++{ ++ enum kbase_csf_csg_slot_state slot_state; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ slot_state = update_csg_slot_status(kbdev, slot); ++ ++ return (slot_state == CSG_SLOT_STOPPED || ++ slot_state == CSG_SLOT_READY); ++} ++ ++static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ u32 state; ++ ++ state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, ++ CSG_ACK)); ++ ++ if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); ++ dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); ++ return true; ++ } ++ ++ return false; ++} ++ ++static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ struct kbase_csf_csg_slot *csg_slot = ++ kbdev->csf.scheduler.csg_slots; ++ s8 slot; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; ++ ++ slot = group->csg_nr; ++ ++ /* When in transition, wait for it to complete */ ++ if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { ++ long remaining = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ ++ dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ csg_slot_running(kbdev, slot), remaining); ++ if (!remaining) ++ dev_warn(kbdev->dev, ++ "slot %d timed out on up-running\n", slot); ++ } ++ ++ if (csg_slot_running(kbdev, slot)) { ++ unsigned long flags; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &global_iface->groups[slot]; ++ u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : ++ CSG_REQ_STATE_TERMINATE; ++ ++ dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", ++ suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); ++ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ /* Set state to SUSPEND/TERMINATE */ ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, ++ CSG_REQ_STATE_MASK); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, ++ flags); ++ atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); ++ csg_slot[slot].trigger_jiffies = jiffies; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd); ++ ++ kbase_csf_ring_csg_doorbell(kbdev, slot); ++ } ++} ++ ++static void term_csg_slot(struct kbase_queue_group *group) ++{ ++ halt_csg_slot(group, false); ++} ++ ++static void suspend_csg_slot(struct kbase_queue_group *group) ++{ ++ halt_csg_slot(group, true); ++} ++ ++/** ++ * evaluate_sync_update() - Evaluate the sync wait condition the GPU command ++ * queue has been blocked on. ++ * ++ * @queue: Pointer to the GPU command queue ++ * ++ * Return: true if sync wait condition is satisfied. ++ */ ++static bool evaluate_sync_update(struct kbase_queue *queue) ++{ ++ struct kbase_vmap_struct *mapping; ++ bool updated = false; ++ u32 *sync_ptr; ++ u32 sync_wait_cond; ++ u32 sync_current_val; ++ struct kbase_device *kbdev; ++ ++ if (WARN_ON(!queue)) ++ return false; ++ ++ kbdev = queue->kctx->kbdev; ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, ++ &mapping); ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group, ++ queue, queue->sync_ptr); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON, ++ queue->group, queue, queue->blocked_reason); ++ ++ if (!sync_ptr) { ++ dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", ++ queue->sync_ptr); ++ goto out; ++ } ++ ++ sync_wait_cond = ++ CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); ++ ++ WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && ++ (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); ++ ++ sync_current_val = READ_ONCE(*sync_ptr); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group, ++ queue, sync_current_val); ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group, ++ queue, queue->sync_value); ++ ++ if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && ++ (sync_current_val > queue->sync_value)) || ++ ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && ++ (sync_current_val <= queue->sync_value))) { ++ /* The sync wait condition is satisfied so the group to which ++ * queue is bound can be re-scheduled. ++ */ ++ updated = true; ++ } else { ++ dev_dbg(queue->kctx->kbdev->dev, ++ "sync memory not updated yet(%u)", sync_current_val); ++ } ++ ++ kbase_phy_alloc_mapping_put(queue->kctx, mapping); ++out: ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED, ++ queue->group, queue, updated); ++ return updated; ++} ++ ++/** ++ * save_slot_cs() - Save the state for blocked GPU command queue. ++ * ++ * @ginfo: Pointer to the CSG interface used by the group ++ * the queue is bound to. ++ * @queue: Pointer to the GPU command queue. ++ * ++ * This function will check if GPU command queue is blocked on a sync wait and ++ * evaluate the wait condition. If the wait condition isn't satisfied it would ++ * save the state needed to reevaluate the condition in future. ++ * The group to which queue is bound shall be in idle state. ++ * ++ * Return: true if the queue is blocked on a sync wait operation. ++ */ ++static ++bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, ++ struct kbase_queue *queue) ++{ ++ struct kbase_csf_cmd_stream_info *const stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); ++ bool is_waiting = false; ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT, ++ queue->group, queue, status); ++ ++ if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { ++ queue->status_wait = status; ++ queue->sync_ptr = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_LO); ++ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++ queue->sync_value = kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_WAIT_SYNC_VALUE); ++ ++ queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( ++ kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_SCOREBOARDS)); ++ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( ++ kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_BLOCKED_REASON)); ++ ++ if (!evaluate_sync_update(queue)) { ++ is_waiting = true; ++ } else { ++ /* Sync object already got updated & met the condition ++ * thus it doesn't need to be reevaluated and so can ++ * clear the 'status_wait' here. ++ */ ++ queue->status_wait = 0; ++ } ++ } else { ++ /* Invalidate wait status info that would have been recorded if ++ * this queue was blocked when the group (in idle state) was ++ * suspended previously. After that the group could have been ++ * unblocked due to the kicking of another queue bound to it & ++ * so the wait status info would have stuck with this queue. ++ */ ++ queue->status_wait = 0; ++ } ++ ++ return is_waiting; ++} ++ ++/** ++ * Calculate how far in the future an event should be scheduled. ++ * ++ * The objective of this function is making sure that a minimum period of ++ * time is guaranteed between handling two consecutive events. ++ * ++ * This function guarantees a minimum period of time between two consecutive ++ * events: given the minimum period and the distance between the current time ++ * and the last event, the function returns the difference between the two. ++ * However, if more time than the minimum period has already elapsed ++ * since the last event, the function will return 0 to schedule work to handle ++ * the event with the lowest latency possible. ++ * ++ * @last_event: Timestamp of the last event, in jiffies. ++ * @time_now: Timestamp of the new event to handle, in jiffies. ++ * Must be successive to last_event. ++ * @period: Minimum period between two events, in jiffies. ++ * ++ * Return: Time to delay work to handle the current event, in jiffies ++ */ ++static unsigned long get_schedule_delay(unsigned long last_event, ++ unsigned long time_now, ++ unsigned long period) ++{ ++ const unsigned long t_distance = time_now - last_event; ++ const unsigned long delay_t = (t_distance < period) ? ++ (period - t_distance) : 0; ++ ++ return delay_t; ++} ++ ++static void schedule_in_cycle(struct kbase_queue_group *group, bool force) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Only try to schedule work for this event if no requests are pending, ++ * otherwise the function will end up canceling previous work requests, ++ * and scheduler is configured to wake up periodically (or the schedule ++ * of work needs to be enforced in situation such as entering into ++ * protected mode). ++ */ ++ if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) && ++ !scheduler->tock_pending_request) { ++ const unsigned long delay = ++ get_schedule_delay(scheduler->last_schedule, jiffies, ++ CSF_SCHEDULER_TIME_TOCK_JIFFIES); ++ scheduler->tock_pending_request = true; ++ dev_dbg(kbdev->dev, "Kicking async for group %d\n", ++ group->handle); ++ mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay); ++ } ++} ++ ++static ++void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, ++ struct kbase_queue_group *const group, ++ enum kbase_csf_group_state run_state) ++{ ++ struct kbase_context *const kctx = group->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); ++ ++ if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) ++ return; ++ ++ group->run_state = run_state; ++ ++ if (run_state == KBASE_CSF_GROUP_RUNNABLE) ++ group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; ++ ++ list_add_tail(&group->link, ++ &kctx->csf.sched.runnable_groups[group->priority]); ++ kctx->csf.sched.num_runnable_grps++; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group, ++ kctx->csf.sched.num_runnable_grps); ++ ++ /* Add the kctx if not yet in runnable kctxs */ ++ if (kctx->csf.sched.num_runnable_grps == 1) { ++ /* First runnable csg, adds to the runnable_kctxs */ ++ INIT_LIST_HEAD(&kctx->csf.link); ++ list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u); ++ } ++ ++ scheduler->total_runnable_grps++; ++ ++ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && ++ (scheduler->total_runnable_grps == 1 || ++ scheduler->state == SCHED_SUSPENDED)) { ++ dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); ++ /* Fire a scheduling to start the time-slice */ ++ enqueue_tick_work(kbdev); ++ } else ++ schedule_in_cycle(group, false); ++ ++ /* Since a new group has become runnable, check if GPU needs to be ++ * powered up. ++ */ ++ scheduler_wakeup(kbdev, false); ++} ++ ++static ++void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, ++ struct kbase_queue_group *group, ++ enum kbase_csf_group_state run_state) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_queue_group *new_head_grp; ++ struct list_head *list = ++ &kctx->csf.sched.runnable_groups[group->priority]; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ WARN_ON(!queue_group_scheduled_locked(group)); ++ ++ group->run_state = run_state; ++ list_del_init(&group->link); ++ ++ if (scheduler->top_grp == group) { ++ /* ++ * Note: this disables explicit rotation in the next scheduling ++ * cycle. However, removing the top_grp is the same as an ++ * implicit rotation (e.g. if we instead rotated the top_ctx ++ * and then remove top_grp) ++ * ++ * This implicit rotation is assumed by the scheduler rotate ++ * functions. ++ */ ++ scheduler->top_grp = NULL; ++ ++ /* ++ * Trigger a scheduling tock for a CSG containing protected ++ * content in case there has been any in order to minimise ++ * latency. ++ */ ++ group = scheduler_get_protm_enter_async_group(kctx->kbdev, ++ NULL); ++ if (group) ++ schedule_in_cycle(group, true); ++ } ++ ++ kctx->csf.sched.num_runnable_grps--; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group, ++ kctx->csf.sched.num_runnable_grps); ++ new_head_grp = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_queue_group, link) : ++ NULL; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp, ++ 0u); ++ ++ if (kctx->csf.sched.num_runnable_grps == 0) { ++ struct kbase_context *new_head_kctx; ++ struct list_head *kctx_list = &scheduler->runnable_kctxs; ++ /* drop the kctx */ ++ list_del_init(&kctx->csf.link); ++ if (scheduler->top_ctx == kctx) ++ scheduler->top_ctx = NULL; ++ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx, ++ 0u); ++ new_head_kctx = (!list_empty(kctx_list)) ? ++ list_first_entry(kctx_list, struct kbase_context, csf.link) : ++ NULL; ++ KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE, ++ new_head_kctx, 0u); ++ } ++ ++ WARN_ON(scheduler->total_runnable_grps == 0); ++ scheduler->total_runnable_grps--; ++ if (!scheduler->total_runnable_grps) { ++ dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); ++ cancel_tick_timer(kctx->kbdev); ++ WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); ++ if (scheduler->state != SCHED_SUSPENDED) ++ queue_work(system_wq, &scheduler->gpu_idle_work); ++ } ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, ++ scheduler->num_active_address_spaces | ++ (((u64)scheduler->total_runnable_grps) << 32)); ++} ++ ++static void insert_group_to_idle_wait(struct kbase_queue_group *const group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ ++ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); ++ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); ++ ++ list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); ++ kctx->csf.sched.num_idle_wait_grps++; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group, ++ kctx->csf.sched.num_idle_wait_grps); ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; ++ dev_dbg(kctx->kbdev->dev, ++ "Group-%d suspended on sync_wait, total wait_groups: %u\n", ++ group->handle, kctx->csf.sched.num_idle_wait_grps); ++} ++ ++static void remove_group_from_idle_wait(struct kbase_queue_group *const group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct list_head *list = &kctx->csf.sched.idle_wait_groups; ++ struct kbase_queue_group *new_head_grp; ++ ++ lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); ++ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); ++ ++ list_del_init(&group->link); ++ WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); ++ kctx->csf.sched.num_idle_wait_grps--; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group, ++ kctx->csf.sched.num_idle_wait_grps); ++ new_head_grp = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_queue_group, link) : ++ NULL; ++ KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT, ++ new_head_grp, 0u); ++ group->run_state = KBASE_CSF_GROUP_INACTIVE; ++} ++ ++static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, ++ struct kbase_queue_group *group) ++{ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (WARN_ON(!group)) ++ return; ++ ++ remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); ++ insert_group_to_idle_wait(group); ++} ++ ++static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { ++ int new_val = ++ atomic_dec_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, ++ group, new_val); ++ } ++} ++ ++static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ WARN_ON(group->csg_nr < 0); ++ ++ if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { ++ int new_val = ++ atomic_dec_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, ++ group, new_val); ++ } ++} ++ ++static void update_offslot_non_idle_cnt_on_grp_suspend( ++ struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (scheduler->state == SCHED_BUSY) { ++ /* active phase or, async entering the protected mode */ ++ if (group->prepared_seq_num >= ++ scheduler->non_idle_scanout_grps) { ++ /* At scanout, it was tagged as on-slot idle */ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { ++ int new_val = atomic_inc_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, ++ group, new_val); ++ } ++ } else { ++ if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { ++ int new_val = atomic_dec_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, ++ group, new_val); ++ } ++ } ++ } else { ++ /* async phases */ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { ++ int new_val = atomic_inc_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, ++ group, new_val); ++ } ++ } ++} ++ ++static bool confirm_cmd_buf_empty(struct kbase_queue *queue) ++{ ++ bool cs_empty; ++ bool cs_idle; ++ u32 sb_status = 0; ++ ++ struct kbase_device const *const kbdev = queue->group->kctx->kbdev; ++ struct kbase_csf_global_iface const *const iface = ++ &kbdev->csf.global_iface; ++ ++ u32 glb_version = iface->version; ++ ++ u64 *input_addr = (u64 *)queue->user_io_addr; ++ u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); ++ ++ if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { ++ /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[queue->group->csg_nr]; ++ struct kbase_csf_cmd_stream_info const *const stream = ++ &ginfo->streams[queue->csi_index]; ++ ++ sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( ++ kbase_csf_firmware_cs_output(stream, ++ CS_STATUS_SCOREBOARDS)); ++ } ++ ++ cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == ++ output_addr[CS_EXTRACT_LO / sizeof(u64)]); ++ cs_idle = cs_empty && (!sb_status); ++ ++ return cs_idle; ++} ++ ++static void save_csg_slot(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ u32 state; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; ++ ++ ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; ++ ++ state = ++ CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); ++ ++ if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && ++ (state != CSG_ACK_STATE_TERMINATE))) { ++ u32 max_streams = ginfo->stream_num; ++ u32 i; ++ bool sync_wait = false; ++ bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & ++ CSG_STATUS_STATE_IDLE_MASK; ++ for (i = 0; idle && i < max_streams; i++) { ++ struct kbase_queue *const queue = ++ group->bound_queues[i]; ++ ++ if (!queue || !queue->enabled) ++ continue; ++ ++ if (save_slot_cs(ginfo, queue)) ++ sync_wait = true; ++ else { ++ /* Need to confirm if ringbuffer of the GPU ++ * queue is empty or not. A race can arise ++ * between the flush of GPU queue and suspend ++ * of CSG. If a queue is flushed after FW has ++ * set the IDLE bit in CSG_STATUS_STATE, then ++ * Scheduler will incorrectly consider CSG ++ * as idle. And there may not be any further ++ * flush call for the GPU queue, which would ++ * have de-idled the CSG. ++ */ ++ idle = confirm_cmd_buf_empty(queue); ++ } ++ } ++ ++ if (idle) { ++ /* Take the suspended group out of the runnable_groups ++ * list of the context and move it to the ++ * idle_wait_groups list. ++ */ ++ if (sync_wait) ++ deschedule_idle_wait_group(scheduler, group); ++ else { ++ group->run_state = ++ KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; ++ dev_dbg(kbdev->dev, "Group-%d suspended: idle", ++ group->handle); ++ } ++ } else { ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED; ++ } ++ ++ update_offslot_non_idle_cnt_on_grp_suspend(group); ++ } ++} ++ ++/* Cleanup_csg_slot after it has been vacated, ready for next csg run. ++ * Return whether there is a kctx address fault associated with the group ++ * for which the clean-up is done. ++ */ ++static bool cleanup_csg_slot(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ s8 slot; ++ struct kbase_csf_csg_slot *csg_slot; ++ unsigned long flags; ++ u32 i; ++ bool as_fault = false; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return as_fault; ++ ++ slot = group->csg_nr; ++ csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; ++ ginfo = &global_iface->groups[slot]; ++ ++ /* Now loop through all the bound CSs, and clean them via a stop */ ++ for (i = 0; i < ginfo->stream_num; i++) { ++ struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; ++ ++ if (group->bound_queues[i]) { ++ if (group->bound_queues[i]->enabled) { ++ kbase_csf_firmware_cs_input_mask(stream, ++ CS_REQ, CS_REQ_STATE_STOP, ++ CS_REQ_STATE_MASK); ++ } ++ ++ unassign_user_doorbell_from_queue(kbdev, ++ group->bound_queues[i]); ++ } ++ } ++ ++ unassign_user_doorbell_from_group(kbdev, group); ++ ++ /* The csg does not need cleanup other than drop its AS */ ++ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); ++ as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); ++ kbase_ctx_sched_release_ctx(kctx); ++ if (unlikely(group->faulted)) ++ as_fault = true; ++ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); ++ ++ /* now marking the slot is vacant */ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; ++ clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ kbdev->csf.scheduler.csg_slots_idle_mask[0]); ++ ++ group->csg_nr = KBASEP_CSG_NR_INVALID; ++ set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); ++ clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ ++ csg_slot->trigger_jiffies = jiffies; ++ atomic_set(&csg_slot->state, CSG_SLOT_READY); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); ++ dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", ++ group->handle, slot); ++ ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, slot); ++ ++ return as_fault; ++} ++ ++static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_csg_slot *csg_slot; ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ s8 slot; ++ u8 prev_prio; ++ u32 ep_cfg; ++ u32 csg_req; ++ unsigned long flags; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) ++ return; ++ ++ slot = group->csg_nr; ++ csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; ++ ginfo = &kbdev->csf.global_iface.groups[slot]; ++ ++ /* CSGs remaining on-slot can be either idle or runnable. ++ * This also applies in protected mode. ++ */ ++ WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || ++ (group->run_state == KBASE_CSF_GROUP_IDLE))); ++ ++ /* Update consumes a group from scanout */ ++ update_offslot_non_idle_cnt_for_onslot_grp(group); ++ ++ if (csg_slot->priority == prio) ++ return; ++ ++ /* Read the csg_ep_cfg back for updating the priority field */ ++ ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); ++ prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); ++ ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); ++ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); ++ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ csg_req ^= CSG_REQ_EP_CFG_MASK; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, ++ CSG_REQ_EP_CFG_MASK); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ ++ csg_slot->priority = prio; ++ ++ dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", ++ group->handle, group->kctx->tgid, group->kctx->id, slot, ++ prev_prio, prio); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio); ++ ++ kbase_csf_ring_csg_doorbell(kbdev, slot); ++ set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); ++} ++ ++static void program_csg_slot(struct kbase_queue_group *group, s8 slot, ++ u8 prio) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; ++ const u64 shader_core_mask = ++ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); ++ const u64 tiler_core_mask = ++ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); ++ const u64 compute_mask = shader_core_mask & group->compute_mask; ++ const u64 fragment_mask = shader_core_mask & group->fragment_mask; ++ const u64 tiler_mask = tiler_core_mask & group->tiler_mask; ++ const u8 num_cores = kbdev->gpu_props.num_cores; ++ const u8 compute_max = min(num_cores, group->compute_max); ++ const u8 fragment_max = min(num_cores, group->fragment_max); ++ const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); ++ struct kbase_csf_cmd_stream_group_info *ginfo; ++ u32 ep_cfg = 0; ++ u32 csg_req; ++ u32 state; ++ int i; ++ unsigned long flags; ++ const u64 normal_suspend_buf = ++ group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; ++ struct kbase_csf_csg_slot *csg_slot = ++ &kbdev->csf.scheduler.csg_slots[slot]; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (WARN_ON(slot < 0) && ++ WARN_ON(slot >= global_iface->group_num)) ++ return; ++ ++ WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); ++ ++ ginfo = &global_iface->groups[slot]; ++ ++ /* Pick an available address space for this context */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_ctx_sched_retain_ctx(kctx); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ if (kctx->as_nr == KBASEP_AS_NR_INVALID) { ++ dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", ++ group->handle, kctx->tgid, kctx->id, slot); ++ return; ++ } ++ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); ++ kbdev->csf.scheduler.csg_slots[slot].resident_group = group; ++ group->csg_nr = slot; ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ ++ assign_user_doorbell_to_group(kbdev, group); ++ ++ /* Now loop through all the bound & kicked CSs, and program them */ ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ struct kbase_queue *queue = group->bound_queues[i]; ++ ++ if (queue) ++ program_cs(kbdev, queue, false); ++ } ++ ++ ++ /* Endpoint programming for CSG */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, ++ compute_mask & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, ++ compute_mask >> 32); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, ++ fragment_mask & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, ++ fragment_mask >> 32); ++ kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, ++ tiler_mask & U32_MAX); ++ ++ ++ ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); ++ ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ++ ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); ++ ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); ++ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); ++ ++ /* Program the address space number assigned to the context */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); ++ ++ kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, ++ normal_suspend_buf & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, ++ normal_suspend_buf >> 32); ++ ++ if (group->protected_suspend_buf.reg) { ++ const u64 protm_suspend_buf = ++ group->protected_suspend_buf.reg->start_pfn << ++ PAGE_SHIFT; ++ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, ++ protm_suspend_buf & U32_MAX); ++ kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, ++ protm_suspend_buf >> 32); ++ } ++ ++ /* Enable all interrupts for now */ ++ kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); ++ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); ++ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ csg_req ^= CSG_REQ_EP_CFG_MASK; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, ++ CSG_REQ_EP_CFG_MASK); ++ ++ /* Set state to START/RESUME */ ++ if (queue_group_suspended_locked(group)) { ++ state = CSG_REQ_STATE_RESUME; ++ } else { ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); ++ state = CSG_REQ_STATE_START; ++ } ++ ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ++ state, CSG_REQ_STATE_MASK); ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++ ++ /* Update status before rings the door-bell, marking ready => run */ ++ atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); ++ csg_slot->trigger_jiffies = jiffies; ++ csg_slot->priority = prio; ++ ++ /* Trace the programming of the CSG on the slot */ ++ KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot); ++ ++ dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", ++ group->handle, kctx->tgid, kctx->id, slot, prio); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group, ++ (((u64)ep_cfg) << 32) | ++ ((((u32)kctx->as_nr) & 0xF) << 16) | ++ (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); ++ ++ kbase_csf_ring_csg_doorbell(kbdev, slot); ++ ++ /* Programming a slot consumes a group from scanout */ ++ update_offslot_non_idle_cnt_for_onslot_grp(group); ++} ++ ++static void remove_scheduled_group(struct kbase_device *kbdev, ++ struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ WARN_ON(group->prepared_seq_num == ++ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); ++ WARN_ON(list_empty(&group->link_to_schedule)); ++ ++ list_del_init(&group->link_to_schedule); ++ scheduler->ngrp_to_schedule--; ++ group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; ++ group->kctx->csf.sched.ngrp_to_schedule--; ++} ++ ++static void sched_evict_group(struct kbase_queue_group *group, bool fault, ++ bool update_non_idle_offslot_grps_cnt) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (queue_group_scheduled_locked(group)) { ++ u32 i; ++ ++ if (update_non_idle_offslot_grps_cnt && ++ (group->run_state == KBASE_CSF_GROUP_SUSPENDED || ++ group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { ++ int new_val = atomic_dec_return( ++ &scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, ++ group, new_val); ++ } ++ ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ if (group->bound_queues[i]) ++ group->bound_queues[i]->enabled = false; ++ } ++ ++ if (group->prepared_seq_num != ++ KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) ++ remove_scheduled_group(kbdev, group); ++ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) ++ remove_group_from_idle_wait(group); ++ else { ++ remove_group_from_runnable(scheduler, group, ++ KBASE_CSF_GROUP_INACTIVE); ++ } ++ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); ++ ++ if (fault) ++ group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group, ++ (((u64)scheduler->total_runnable_grps) << 32) | ++ ((u32)group->run_state)); ++ dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", ++ group->handle, scheduler->total_runnable_grps); ++ /* Notify a group has been evicted */ ++ wake_up_all(&kbdev->csf.event_wait); ++ } ++} ++ ++static int term_group_sync(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ int err = 0; ++ ++ term_csg_slot(group); ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); ++ ++ if (!remaining) { ++ dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d", ++ group->handle, group->kctx->tgid, ++ group->kctx->id, group->csg_nr); ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ err = -ETIMEDOUT; ++ } ++ ++ return err; ++} ++ ++void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) ++{ ++ struct kbase_device *kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ long remaining = ++ kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS); ++ bool force = false; ++ ++ kbase_reset_gpu_assert_failed_or_prevented(kbdev); ++ lockdep_assert_held(&group->kctx->csf.lock); ++ mutex_lock(&scheduler->lock); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); ++ while (queue_group_scheduled_locked(group)) { ++ u32 saved_state = scheduler->state; ++ ++ if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) { ++ sched_evict_group(group, false, true); ++ } else if (saved_state == SCHED_INACTIVE || force) { ++ bool as_faulty; ++ ++ term_group_sync(group); ++ /* Treat the csg been terminated */ ++ as_faulty = cleanup_csg_slot(group); ++ /* remove from the scheduler list */ ++ sched_evict_group(group, as_faulty, false); ++ } ++ ++ /* waiting scheduler state to change */ ++ if (queue_group_scheduled_locked(group)) { ++ mutex_unlock(&scheduler->lock); ++ remaining = wait_event_timeout( ++ kbdev->csf.event_wait, ++ saved_state != scheduler->state, ++ remaining); ++ if (!remaining) { ++ dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d", ++ group->handle, group->csg_nr); ++ force = true; ++ } ++ mutex_lock(&scheduler->lock); ++ } ++ } ++ ++ mutex_unlock(&scheduler->lock); ++} ++ ++/** ++ * scheduler_group_schedule() - Schedule a GPU command queue group on firmware ++ * ++ * @group: Pointer to the queue group to be scheduled. ++ * ++ * This function would enable the scheduling of GPU command queue group on ++ * firmware. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++static int scheduler_group_schedule(struct kbase_queue_group *group) ++{ ++ struct kbase_context *kctx = group->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ lockdep_assert_held(&scheduler->lock); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) ++ update_idle_suspended_group_state(group); ++ else if (queue_group_idle_locked(group)) { ++ WARN_ON(kctx->csf.sched.num_runnable_grps == 0); ++ WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); ++ ++ if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) ++ update_idle_suspended_group_state(group); ++ else { ++ struct kbase_queue_group *protm_grp; ++ unsigned long flags; ++ ++ WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( ++ group)); ++ ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ ++ /* A normal mode CSG could be idle onslot during ++ * protected mode. In this case clear the ++ * appropriate bit in csg_slots_idle_mask. ++ */ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ if (protm_grp && protm_grp != group) { ++ clear_bit((unsigned int)group->csg_nr, ++ scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); ++ } ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, ++ flags); ++ ++ /* If GPU is in protected mode then any doorbells rang ++ * would have no effect. Check if GPU is in protected ++ * mode and if this group has higher priority than the ++ * active protected mode group. If so prompt the FW ++ * to exit protected mode. ++ */ ++ if (protm_grp && ++ group->scan_seq_num < protm_grp->scan_seq_num) { ++ /* Prompt the FW to exit protected mode */ ++ scheduler_force_protm_exit(kbdev); ++ } ++ } ++ } else if (!queue_group_scheduled_locked(group)) { ++ int new_val; ++ insert_group_to_runnable(&kbdev->csf.scheduler, group, ++ KBASE_CSF_GROUP_RUNNABLE); ++ /* A new group into the scheduler */ ++ new_val = atomic_inc_return( ++ &kbdev->csf.scheduler.non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, ++ group, new_val); ++ } ++ ++ /* Since a group has become active now, check if GPU needs to be ++ * powered up. Also rekick the Scheduler. ++ */ ++ scheduler_wakeup(kbdev, true); ++ ++ return 0; ++} ++ ++/** ++ * set_max_csg_slots() - Set the number of available CSG slots ++ * ++ * @kbdev: Pointer of the GPU device. ++ * ++ * This function would set/limit the number of CSG slots that ++ * can be used in the given tick/tock. It would be less than the total CSG ++ * slots supported by firmware if the number of GPU address space slots ++ * required to utilize all the CSG slots is more than the available ++ * address space slots. ++ */ ++static inline void set_max_csg_slots(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; ++ unsigned int max_address_space_slots = ++ kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; ++ ++ WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); ++ ++ if (likely(scheduler->num_active_address_spaces <= ++ max_address_space_slots)) ++ scheduler->num_csg_slots_for_tick = total_csg_slots; ++} ++ ++/** ++ * count_active_address_space() - Count the number of GPU address space slots ++ * ++ * @kbdev: Pointer of the GPU device. ++ * @kctx: Pointer of the Kbase context. ++ * ++ * This function would update the counter that is tracking the number of GPU ++ * address space slots that would be required to program the CS ++ * group slots from the groups at the head of groups_to_schedule list. ++ */ ++static inline void count_active_address_space(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; ++ unsigned int max_address_space_slots = ++ kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; ++ ++ if (scheduler->ngrp_to_schedule <= total_csg_slots) { ++ if (kctx->csf.sched.ngrp_to_schedule == 1) ++ scheduler->num_active_address_spaces++; ++ ++ if (scheduler->num_active_address_spaces <= ++ max_address_space_slots) ++ scheduler->num_csg_slots_for_tick++; ++ } ++} ++ ++/* Two schemes are used in assigning the priority to CSG slots for a given ++ * CSG from the 'groups_to_schedule' list. ++ * This is needed as an idle on-slot group is deprioritized by moving it to ++ * the tail of 'groups_to_schedule' list. As a result it can either get ++ * evicted from the CSG slot in current tick/tock dealing, or its position ++ * can be after the lower priority non-idle groups in the 'groups_to_schedule' ++ * list. The latter case can result in the on-slot subset containing both ++ * non-idle and idle CSGs, and is handled through the 2nd scheme described ++ * below. ++ * ++ * First scheme :- If all the slots are going to be occupied by the non-idle or ++ * idle groups, then a simple assignment of the priority is done as per the ++ * position of a group in the 'groups_to_schedule' list. So maximum priority ++ * gets assigned to the slot of a group which is at the head of the list. ++ * Here the 'groups_to_schedule' list would effectively be ordered as per the ++ * static priority of groups. ++ * ++ * Second scheme :- If the slots are going to be occupied by a mix of idle and ++ * non-idle groups then the priority assignment needs to ensure that the ++ * priority of a slot belonging to a higher priority idle group will always be ++ * greater than the priority of a slot belonging to a lower priority non-idle ++ * group, reflecting the original position of a group in the scan order (i.e ++ * static priority) 'scan_seq_num', which is set during the prepare phase of a ++ * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it ++ * is idle. ++ * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first ++ * 'slots_for_tick' groups in the original scan order are assigned a priority in ++ * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick), ++ * whereas rest of the groups are assigned the priority in the subrange ++ * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher ++ * priority group ends up after the non-idle lower priority groups in the ++ * 'groups_to_schedule' list, it will get a higher slot priority. And this will ++ * enable the FW to quickly start the execution of higher priority group when it ++ * gets de-idled. ++ */ ++static u8 get_slot_priority(struct kbase_queue_group *group) ++{ ++ struct kbase_csf_scheduler *scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ u8 slot_prio; ++ u32 slots_for_tick = scheduler->num_csg_slots_for_tick; ++ u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; ++ /* Check if all the slots are going to be occupied by the non-idle or ++ * idle groups. ++ */ ++ if (scheduler->non_idle_scanout_grps >= slots_for_tick || ++ !scheduler->non_idle_scanout_grps) { ++ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots); ++ } else { ++ /* There will be a mix of idle and non-idle groups. */ ++ if (group->scan_seq_num < slots_for_tick) ++ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - ++ group->scan_seq_num); ++ else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) ++ slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); ++ else ++ slot_prio = 0; ++ } ++ return slot_prio; ++} ++ ++/** ++ * update_resident_groups_priority() - Update the priority of resident groups ++ * ++ * @kbdev: The GPU device. ++ * ++ * This function will update the priority of all resident queue groups ++ * that are at the head of groups_to_schedule list, preceding the first ++ * non-resident group. ++ * ++ * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on ++ * the priority update. ++ */ ++static void update_resident_groups_priority(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = scheduler->num_csg_slots_for_tick; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ while (!list_empty(&scheduler->groups_to_schedule)) { ++ struct kbase_queue_group *group = ++ list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); ++ bool resident = ++ kbasep_csf_scheduler_group_is_on_slot_locked(group); ++ ++ if ((group->prepared_seq_num >= num_groups) || !resident) ++ break; ++ ++ update_csg_slot_priority(group, ++ get_slot_priority(group)); ++ ++ /* Drop the head group from the list */ ++ remove_scheduled_group(kbdev, group); ++ scheduler->remaining_tick_slots--; ++ } ++} ++ ++/** ++ * program_group_on_vacant_csg_slot() - Program a non-resident group on the ++ * given vacant CSG slot. ++ * @kbdev: Pointer to the GPU device. ++ * @slot: Vacant CSG slot number. ++ * ++ * This function will program a non-resident group at the head of ++ * kbase_csf_scheduler.groups_to_schedule list on the given vacant ++ * CSG slot, provided the initial position of the non-resident ++ * group in the list is less than the number of CSG slots and there is ++ * an available GPU address space slot. ++ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after ++ * programming the slot. ++ */ ++static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, ++ s8 slot) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *const group = ++ list_empty(&scheduler->groups_to_schedule) ? NULL : ++ list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); ++ u32 num_groups = scheduler->num_csg_slots_for_tick; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ if (group && (group->prepared_seq_num < num_groups)) { ++ bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group); ++ ++ if (!WARN_ON(ret)) { ++ if (kctx_as_enabled(group->kctx) && !group->faulted) { ++ program_csg_slot(group, slot, ++ get_slot_priority(group)); ++ ++ if (likely(csg_slot_in_use(kbdev, slot))) { ++ /* Drop the head group from the list */ ++ remove_scheduled_group(kbdev, group); ++ scheduler->remaining_tick_slots--; ++ } ++ } else { ++ update_offslot_non_idle_cnt_for_faulty_grp( ++ group); ++ remove_scheduled_group(kbdev, group); ++ } ++ } ++ } ++} ++ ++/** ++ * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident ++ * group and update the priority of resident groups. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @slot: Vacant CSG slot number. ++ * ++ * This function will first update the priority of all resident queue groups ++ * that are at the head of groups_to_schedule list, preceding the first ++ * non-resident group, it will then try to program the given CS ++ * group slot with the non-resident group. Finally update the priority of all ++ * resident queue groups following the non-resident group. ++ * ++ * kbase_csf_scheduler.remaining_tick_slots would also be adjusted. ++ */ ++static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_csf_csg_slot *const csg_slot = ++ scheduler->csg_slots; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); ++ ++ /* First update priority for already resident groups (if any) ++ * before the non-resident group ++ */ ++ update_resident_groups_priority(kbdev); ++ ++ /* Now consume the vacant slot for the non-resident group */ ++ program_group_on_vacant_csg_slot(kbdev, slot); ++ ++ /* Now update priority for already resident groups (if any) ++ * following the non-resident group ++ */ ++ update_resident_groups_priority(kbdev); ++} ++ ++static bool slots_state_changed(struct kbase_device *kbdev, ++ unsigned long *slots_mask, ++ bool (*state_check_func)(struct kbase_device *, s8)) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; ++ bool changed = false; ++ u32 i; ++ ++ for_each_set_bit(i, slots_mask, num_groups) { ++ if (state_check_func(kbdev, (s8)i)) { ++ set_bit(i, changed_slots); ++ changed = true; ++ } ++ } ++ ++ if (changed) ++ bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS); ++ ++ return changed; ++} ++ ++/** ++ * program_suspending_csg_slots() - Program the CSG slots vacated on suspension ++ * of queue groups running on them. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * ++ * This function will first wait for the ongoing suspension to complete on a ++ * CSG slot and will then program the vacant slot with the ++ * non-resident queue group inside the groups_to_schedule list. ++ * The programming of the non-resident queue group on the vacant slot could ++ * fail due to unavailability of free GPU address space slot and so the ++ * programming is re-attempted after the ongoing suspension has completed ++ * for all the CSG slots. ++ * The priority of resident groups before and after the non-resident group ++ * in the groups_to_schedule list would also be updated. ++ * This would be repeated for all the slots undergoing suspension. ++ * GPU reset would be initiated if the wait for suspend times out. ++ */ ++static void program_suspending_csg_slots(struct kbase_device *kbdev) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); ++ DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; ++ bool suspend_wait_failed = false; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ /* In the current implementation, csgs_events_enable_mask would be used ++ * only to indicate suspending CSGs. ++ */ ++ bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, ++ MAX_SUPPORTED_CSGS); ++ ++ while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { ++ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); ++ ++ bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_state_changed(kbdev, changed, ++ csg_slot_stopped_raw), ++ remaining); ++ ++ if (remaining) { ++ u32 i; ++ ++ for_each_set_bit(i, changed, num_groups) { ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[i].resident_group; ++ ++ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) { ++ continue; ++ } ++ /* The on slot csg is now stopped */ ++ clear_bit(i, slot_mask); ++ ++ if (likely(group)) { ++ bool as_fault; ++ /* Only do save/cleanup if the ++ * group is not terminated during ++ * the sleep. ++ */ ++ save_csg_slot(group); ++ as_fault = cleanup_csg_slot(group); ++ /* If AS fault detected, evict it */ ++ if (as_fault) { ++ sched_evict_group(group, true, true); ++ set_bit(i, evicted_mask); ++ } ++ } ++ ++ program_vacant_csg_slot(kbdev, (s8)i); ++ } ++ } else { ++ u32 i; ++ ++ /* Groups that have failed to suspend in time shall ++ * raise a fatal error as they could no longer be ++ * safely resumed. ++ */ ++ for_each_set_bit(i, slot_mask, num_groups) { ++ struct kbase_queue_group *const group = ++ scheduler->csg_slots[i].resident_group; ++ ++ struct base_gpu_queue_group_error const ++ err_payload = { .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { ++ .fatal_group = { ++ .status = ++ GPU_EXCEPTION_TYPE_SW_FAULT_2, ++ } } }; ++ ++ if (unlikely(group == NULL)) ++ continue; ++ ++ kbase_csf_add_group_fatal_error(group, ++ &err_payload); ++ kbase_event_wakeup(group->kctx); ++ ++ /* TODO GPUCORE-25328: The CSG can't be ++ * terminated, the GPU will be reset as a ++ * work-around. ++ */ ++ dev_warn( ++ kbdev->dev, ++ "Group %d of context %d_%d on slot %u failed to suspend", ++ group->handle, group->kctx->tgid, ++ group->kctx->id, i); ++ ++ /* The group has failed suspension, stop ++ * further examination. ++ */ ++ clear_bit(i, slot_mask); ++ set_bit(i, scheduler->csgs_events_enable_mask); ++ update_offslot_non_idle_cnt_for_onslot_grp( ++ group); ++ } ++ ++ suspend_wait_failed = true; ++ } ++ } ++ ++ if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) ++ dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", ++ num_groups, evicted_mask); ++ ++ if (likely(!suspend_wait_failed)) { ++ u32 i; ++ ++ while (scheduler->ngrp_to_schedule && ++ scheduler->remaining_tick_slots) { ++ i = find_first_zero_bit(scheduler->csg_inuse_bitmap, ++ num_groups); ++ if (WARN_ON(i == num_groups)) ++ break; ++ program_vacant_csg_slot(kbdev, (s8)i); ++ if (!csg_slot_in_use(kbdev, (int)i)) { ++ dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); ++ break; ++ } ++ } ++ } else { ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ } ++} ++ ++static void suspend_queue_group(struct kbase_queue_group *group) ++{ ++ unsigned long flags; ++ struct kbase_csf_scheduler *const scheduler = ++ &group->kctx->kbdev->csf.scheduler; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ /* This shall be used in program_suspending_csg_slots() where we ++ * assume that whilst CSGs are being suspended, this bitmask is not ++ * used by anything else i.e., it indicates only the CSGs going ++ * through suspension. ++ */ ++ clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ /* If AS fault detected, terminate the group */ ++ if (!kctx_as_enabled(group->kctx) || group->faulted) ++ term_csg_slot(group); ++ else ++ suspend_csg_slot(group); ++} ++ ++static void wait_csg_slots_start(struct kbase_device *kbdev) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++ u32 i; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ /* extract start slot flags for check */ ++ for (i = 0; i < num_groups; i++) { ++ if (atomic_read(&scheduler->csg_slots[i].state) == ++ CSG_SLOT_READY2RUN) ++ set_bit(i, slot_mask); ++ } ++ ++ while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { ++ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); ++ ++ bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_state_changed(kbdev, changed, csg_slot_running), ++ remaining); ++ ++ if (remaining) { ++ for_each_set_bit(i, changed, num_groups) { ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[i].resident_group; ++ ++ /* The on slot csg is now running */ ++ clear_bit(i, slot_mask); ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ } ++ } else { ++ dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n", ++ num_groups, slot_mask); ++ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ break; ++ } ++ } ++} ++ ++/** ++ * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state ++ * flagged after the completion of a CSG status ++ * update command ++ * ++ * This function is called at the start of scheduling tick to check the ++ * idle status of a queue group resident on a CSG slot. ++ * The caller must make sure the corresponding status update command has ++ * been called and completed before checking this status. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @slot: The given slot for checking an occupying resident group's idle ++ * state. ++ * ++ * Return: true if the group resident on slot is idle, otherwise false. ++ */ ++static bool group_on_slot_is_idle(struct kbase_device *kbdev, ++ unsigned long slot) ++{ ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & ++ CSG_STATUS_STATE_IDLE_MASK; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ return idle; ++} ++ ++/** ++ * slots_update_state_changed() - Check the handshake state of a subset of ++ * command group slots. ++ * ++ * Checks the state of a subset of slots selected through the slots_mask ++ * bit_map. Records which slots' handshake completed and send it back in the ++ * slots_done bit_map. ++ * ++ * @kbdev: The GPU device. ++ * @field_mask: The field mask for checking the state in the csg_req/ack. ++ * @slots_mask: A bit_map specifying the slots to check. ++ * @slots_done: A cleared bit_map for returning the slots that ++ * have finished update. ++ * ++ * Return: true if the slots_done is set for at least one slot. ++ * Otherwise false. ++ */ ++static ++bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, ++ const unsigned long *slots_mask, unsigned long *slots_done) ++{ ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ bool changed = false; ++ u32 i; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ for_each_set_bit(i, slots_mask, num_groups) { ++ struct kbase_csf_cmd_stream_group_info const *const ginfo = ++ &kbdev->csf.global_iface.groups[i]; ++ u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); ++ ++ state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ ++ if (!(state & field_mask)) { ++ set_bit(i, slots_done); ++ changed = true; ++ } ++ } ++ ++ return changed; ++} ++ ++/** ++ * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on ++ * the specified groups. ++ * ++ * This function waits for the acknowledgement of the request that have ++ * already been placed for the CSG slots by the caller. Currently used for ++ * the CSG priority update and status update requests. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @field_mask: The field mask for checking the state in the csg_req/ack. ++ * @slot_mask: Bitmap reflecting the slots, the function will modify ++ * the acknowledged slots by clearing their corresponding ++ * bits. ++ * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. ++ * ++ * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For ++ * timed out condition with unacknowledged slots, their bits remain ++ * set in the slot_mask. ++ */ ++static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, ++ u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) ++{ ++ const u32 num_groups = kbdev->csf.global_iface.group_num; ++ long remaining = wait_in_jiffies; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ while (!bitmap_empty(slot_mask, num_groups) && ++ !kbase_reset_gpu_is_active(kbdev)) { ++ DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_update_state_changed(kbdev, field_mask, ++ slot_mask, dones), ++ remaining); ++ ++ if (remaining) ++ bitmap_andnot(slot_mask, slot_mask, dones, num_groups); ++ else ++ /* Timed-out on the wait */ ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) ++{ ++ unsigned long *slot_mask = ++ kbdev->csf.scheduler.csg_slots_prio_update; ++ long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, ++ slot_mask, wait_time); ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (ret != 0) { ++ /* The update timeout is not regarded as a serious ++ * issue, no major consequences are expected as a ++ * result, so just warn the case. ++ */ ++ dev_warn( ++ kbdev->dev, ++ "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", ++ slot_mask[0]); ++ } ++} ++ ++void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct list_head *evicted_groups) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ u32 slot; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ mutex_lock(&scheduler->lock); ++ ++ /* This code is only called during reset, so we don't wait for the CSG ++ * slots to be stopped ++ */ ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++ ++ KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u); ++ for (slot = 0; slot < num_groups; slot++) { ++ group = kbdev->csf.scheduler.csg_slots[slot].resident_group; ++ if (group && group->kctx == kctx) { ++ bool as_fault; ++ ++ term_csg_slot(group); ++ as_fault = cleanup_csg_slot(group); ++ /* remove the group from the scheduler list */ ++ sched_evict_group(group, as_fault, false); ++ /* return the evicted group to the caller */ ++ list_add_tail(&group->link, evicted_groups); ++ set_bit(slot, slot_mask); ++ } ++ } ++ ++ dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", ++ kctx->tgid, kctx->id, num_groups, slot_mask); ++ ++ mutex_unlock(&scheduler->lock); ++} ++ ++/** ++ * scheduler_slot_protm_ack - Acknowledging the protected region requests ++ * from the resident group on a given slot. ++ * ++ * The function assumes that the given slot is in stable running state and ++ * has already been judged by the caller on that any pending protected region ++ * requests of the resident group should be acknowledged. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @group: Pointer to the resident group on the given slot. ++ * @slot: The slot that the given group is actively operating on. ++ * ++ * Return: true if the group has pending protm request(s) and is acknowledged. ++ * The caller should arrange to enter the protected mode for servicing ++ * it. Otherwise return false, indicating the group has no pending protm ++ * request. ++ */ ++static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, ++ struct kbase_queue_group *const group, ++ const int slot) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool protm_ack = false; ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[slot]; ++ u32 max_csi; ++ int i; ++ ++ if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) ++ return protm_ack; ++ ++ lockdep_assert_held(&scheduler->lock); ++ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); ++ ++ max_csi = ginfo->stream_num; ++ for (i = find_first_bit(group->protm_pending_bitmap, max_csi); ++ i < max_csi; ++ i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { ++ struct kbase_queue *queue = group->bound_queues[i]; ++ ++ clear_bit(i, group->protm_pending_bitmap); ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group, ++ queue, group->protm_pending_bitmap[0]); ++ ++ if (!WARN_ON(!queue) && queue->enabled) { ++ struct kbase_csf_cmd_stream_info *stream = ++ &ginfo->streams[i]; ++ u32 cs_protm_ack = kbase_csf_firmware_cs_output( ++ stream, CS_ACK) & ++ CS_ACK_PROTM_PEND_MASK; ++ u32 cs_protm_req = kbase_csf_firmware_cs_input_read( ++ stream, CS_REQ) & ++ CS_REQ_PROTM_PEND_MASK; ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, ++ queue, cs_protm_ack ^ cs_protm_req); ++ ++ if (cs_protm_ack == cs_protm_req) { ++ dev_dbg(kbdev->dev, ++ "PROTM-ack already done for queue-%d group-%d slot-%d", ++ queue->csi_index, group->handle, slot); ++ continue; ++ } ++ ++ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ++ cs_protm_ack, ++ CS_ACK_PROTM_PEND_MASK); ++ protm_ack = true; ++ dev_dbg(kbdev->dev, ++ "PROTM-ack for queue-%d, group-%d slot-%d", ++ queue->csi_index, group->handle, slot); ++ } ++ } ++ ++ return protm_ack; ++} ++ ++/** ++ * scheduler_group_check_protm_enter - Request the given group to be evaluated ++ * for triggering the protected mode. ++ * ++ * The function assumes the given group is either an active running group or ++ * the scheduler internally maintained field scheduler->top_grp. ++ * ++ * If the GPU is not already running in protected mode and the input group ++ * has protected region requests from its bound queues, the requests are ++ * acknowledged and the GPU is instructed to enter the protected mode. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @input_grp: Pointer to the GPU queue group. ++ */ ++static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, ++ struct kbase_queue_group *const input_grp) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ bool protm_in_use; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ ++ protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp, ++ protm_in_use); ++ ++ /* Firmware samples the PROTM_PEND ACK bit for CSs when ++ * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit ++ * is set for a CS after Host has sent the PROTM_ENTER ++ * Global request, then there is no guarantee that firmware will ++ * notice that prior to switching to protected mode. And firmware ++ * may not again raise the PROTM_PEND interrupt for that CS ++ * later on. To avoid that uncertainty PROTM_PEND ACK bit ++ * is not set for a CS if the request to enter protected ++ * mode has already been sent. It will be set later (after the exit ++ * from protected mode has taken place) when the group to which ++ * CS is bound becomes the top group. ++ * ++ * The actual decision of entering protected mode is hinging on the ++ * input group is the top priority group, or, in case the previous ++ * top-group is evicted from the scheduler during the tick, its would ++ * be replacement, and that it is currently in a stable state (i.e. the ++ * slot state is running). ++ */ ++ if (!protm_in_use && !WARN_ON(!input_grp)) { ++ const int slot = ++ kbase_csf_scheduler_group_get_slot_locked(input_grp); ++ ++ /* check the input_grp is running and requesting protected mode ++ */ ++ if (slot >= 0 && ++ atomic_read(&scheduler->csg_slots[slot].state) == ++ CSG_SLOT_RUNNING) { ++ if (kctx_as_enabled(input_grp->kctx) && ++ scheduler_slot_protm_ack(kbdev, input_grp, slot)) { ++ /* Option of acknowledging to multiple ++ * CSGs from the same kctx is dropped, ++ * after consulting with the ++ * architecture team. See the comment in ++ * GPUCORE-21394. ++ */ ++ ++ /* Disable the idle timer */ ++ disable_gpu_idle_fw_timer_locked(kbdev); ++ ++ /* Switch to protected mode */ ++ scheduler->active_protm_grp = input_grp; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM, ++ input_grp, 0u); ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ kbase_csf_enter_protected_mode(kbdev); ++ return; ++ } ++ } ++ } ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++static void scheduler_apply(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ const u32 total_csg_slots = kbdev->csf.global_iface.group_num; ++ const u32 available_csg_slots = scheduler->num_csg_slots_for_tick; ++ u32 suspend_cnt = 0; ++ u32 remain_cnt = 0; ++ u32 resident_cnt = 0; ++ struct kbase_queue_group *group; ++ u32 i; ++ u32 spare; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Suspend those resident groups not in the run list */ ++ for (i = 0; i < total_csg_slots; i++) { ++ group = scheduler->csg_slots[i].resident_group; ++ if (group) { ++ resident_cnt++; ++ if (group->prepared_seq_num >= available_csg_slots) { ++ suspend_queue_group(group); ++ suspend_cnt++; ++ } else ++ remain_cnt++; ++ } ++ } ++ ++ /* Initialize the remaining avialable csg slots for the tick/tock */ ++ scheduler->remaining_tick_slots = available_csg_slots; ++ ++ /* If there are spare slots, apply heads in the list */ ++ spare = (available_csg_slots > resident_cnt) ? ++ (available_csg_slots - resident_cnt) : 0; ++ while (!list_empty(&scheduler->groups_to_schedule)) { ++ group = list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); ++ ++ if (kbasep_csf_scheduler_group_is_on_slot_locked(group) && ++ group->prepared_seq_num < available_csg_slots) { ++ /* One of the resident remainders */ ++ update_csg_slot_priority(group, ++ get_slot_priority(group)); ++ } else if (spare != 0) { ++ s8 slot = (s8)find_first_zero_bit( ++ kbdev->csf.scheduler.csg_inuse_bitmap, ++ total_csg_slots); ++ ++ if (WARN_ON(slot >= (s8)total_csg_slots)) ++ break; ++ ++ if (!kctx_as_enabled(group->kctx) || group->faulted) { ++ /* Drop the head group and continue */ ++ update_offslot_non_idle_cnt_for_faulty_grp( ++ group); ++ remove_scheduled_group(kbdev, group); ++ continue; ++ } ++ program_csg_slot(group, slot, ++ get_slot_priority(group)); ++ if (unlikely(!csg_slot_in_use(kbdev, slot))) ++ break; ++ ++ spare--; ++ } else ++ break; ++ ++ /* Drop the head csg from the list */ ++ remove_scheduled_group(kbdev, group); ++ if (!WARN_ON(!scheduler->remaining_tick_slots)) ++ scheduler->remaining_tick_slots--; ++ } ++ ++ /* Dealing with groups currently going through suspend */ ++ program_suspending_csg_slots(kbdev); ++} ++ ++static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, ++ struct kbase_context *kctx, int priority) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group; ++ ++ lockdep_assert_held(&scheduler->lock); ++ if (WARN_ON(priority < 0) || ++ WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) ++ return; ++ ++ if (!kctx_as_enabled(kctx)) ++ return; ++ ++ list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], ++ link) { ++ if (WARN_ON(!list_empty(&group->link_to_schedule))) ++ /* This would be a bug */ ++ list_del_init(&group->link_to_schedule); ++ ++ if (unlikely(group->faulted)) ++ continue; ++ ++ /* Set the scanout sequence number, starting from 0 */ ++ group->scan_seq_num = scheduler->csg_scan_count_for_tick++; ++ ++ if (queue_group_idle_locked(group)) { ++ list_add_tail(&group->link_to_schedule, ++ &scheduler->idle_groups_to_schedule); ++ continue; ++ } ++ ++ if (!scheduler->ngrp_to_schedule) { ++ /* keep the top csg's origin */ ++ scheduler->top_ctx = kctx; ++ scheduler->top_grp = group; ++ } ++ ++ list_add_tail(&group->link_to_schedule, ++ &scheduler->groups_to_schedule); ++ group->prepared_seq_num = scheduler->ngrp_to_schedule++; ++ ++ kctx->csf.sched.ngrp_to_schedule++; ++ count_active_address_space(kbdev, kctx); ++ } ++} ++ ++/** ++ * scheduler_rotate_groups() - Rotate the runnable queue groups to provide ++ * fairness of scheduling within a single ++ * kbase_context. ++ * ++ * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned ++ * the highest slot priority) is guaranteed to get the resources that it ++ * needs we only rotate the kbase_context corresponding to it - ++ * kbase_csf_scheduler's top_ctx. ++ * ++ * The priority level chosen for rotation is the one containing the previous ++ * scheduling cycle's kbase_csf_scheduler's top_grp. ++ * ++ * In a 'fresh-slice-cycle' this always corresponds to the highest group ++ * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority ++ * level of the previous scheduling cycle's first runnable kbase_context. ++ * ++ * We choose this priority level because when higher priority work is ++ * scheduled, we should always cause the scheduler to run and do a scan. The ++ * scan always enumerates the highest priority work first (whether that be ++ * based on process priority or group priority), and thus ++ * kbase_csf_scheduler's top_grp will point to the first of those high priority ++ * groups, which necessarily must be the highest priority group in ++ * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick ++ * up that group appropriately. ++ * ++ * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL), ++ * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but ++ * will set up kbase_csf_scheduler's top_ctx again for the next scheduling ++ * cycle. Implicitly, a rotation had already occurred by removing ++ * the kbase_csf_scheduler's top_grp ++ * ++ * If kbase_csf_scheduler's top_grp became idle and all other groups belonging ++ * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's ++ * top_ctx are also idle, then the effect of this will be to rotate idle ++ * groups, which might not actually become resident in the next ++ * scheduling slice. However this is acceptable since a queue group becoming ++ * idle is implicitly a rotation (as above with evicted queue groups), as it ++ * automatically allows a new queue group to take the maximum slot priority ++ * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of ++ * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will ++ * be for a group in the next lowest priority level or in absence of those the ++ * next kbase_context's queue groups. ++ * ++ * @kbdev: Pointer to the GPU device. ++ */ ++static void scheduler_rotate_groups(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_context *const top_ctx = scheduler->top_ctx; ++ struct kbase_queue_group *const top_grp = scheduler->top_grp; ++ ++ lockdep_assert_held(&scheduler->lock); ++ if (top_ctx && top_grp) { ++ struct list_head *list = ++ &top_ctx->csf.sched.runnable_groups[top_grp->priority]; ++ ++ WARN_ON(top_grp->kctx != top_ctx); ++ if (!WARN_ON(list_empty(list))) { ++ struct kbase_queue_group *new_head_grp; ++ list_move_tail(&top_grp->link, list); ++ new_head_grp = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_queue_group, link) : ++ NULL; ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE, ++ top_grp, top_ctx->csf.sched.num_runnable_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE, ++ new_head_grp, 0u); ++ dev_dbg(kbdev->dev, ++ "groups rotated for a context, num_runnable_groups: %u\n", ++ scheduler->top_ctx->csf.sched.num_runnable_grps); ++ } ++ } ++} ++ ++static void scheduler_rotate_ctxs(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct list_head *list = &scheduler->runnable_kctxs; ++ ++ lockdep_assert_held(&scheduler->lock); ++ if (scheduler->top_ctx) { ++ if (!WARN_ON(list_empty(list))) { ++ struct kbase_context *pos; ++ bool found = false; ++ ++ /* Locate the ctx on the list */ ++ list_for_each_entry(pos, list, csf.link) { ++ if (scheduler->top_ctx == pos) { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!WARN_ON(!found)) { ++ struct kbase_context *new_head_kctx; ++ list_move_tail(&pos->csf.link, list); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos, ++ 0u); ++ new_head_kctx = (!list_empty(list)) ? ++ list_first_entry(list, struct kbase_context, csf.link) : ++ NULL; ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE, ++ new_head_kctx, 0u); ++ dev_dbg(kbdev->dev, "contexts rotated\n"); ++ } ++ } ++ } ++} ++ ++/** ++ * scheduler_update_idle_slots_status() - Get the status update for the CSG ++ * slots for which the IDLE notification was received ++ * previously. ++ * ++ * This function sends a CSG status update request for all the CSG slots ++ * present in the bitmap scheduler->csg_slots_idle_mask and wait for the ++ * request to complete. ++ * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by ++ * this function. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @csg_bitmap: Bitmap of the CSG slots for which ++ * the status update request completed successfully. ++ * @failed_csg_bitmap: Bitmap of the CSG slots for which ++ * the status update request timedout. ++ */ ++static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, ++ unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ const u32 num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_global_iface *const global_iface = ++ &kbdev->csf.global_iface; ++ unsigned long flags, i; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; ++ struct kbase_csf_cmd_stream_group_info *const ginfo = ++ &global_iface->groups[i]; ++ u32 csg_req; ++ ++ clear_bit(i, scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); ++ if (WARN_ON(!group)) ++ continue; ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group, ++ i); ++ ++ csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); ++ csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; ++ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, ++ CSG_REQ_STATUS_UPDATE_MASK); ++ ++ set_bit(i, csg_bitmap); ++ } ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ /* The groups are aggregated into a single kernel doorbell request */ ++ if (!bitmap_empty(csg_bitmap, num_groups)) { ++ long wt = ++ kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); ++ u32 db_slots = (u32)csg_bitmap[0]; ++ ++ kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); ++ ++ if (wait_csg_slots_handshake_ack(kbdev, ++ CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { ++ dev_warn( ++ kbdev->dev, ++ "Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", ++ csg_bitmap[0]); ++ ++ /* Store the bitmap of timed out slots */ ++ bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); ++ csg_bitmap[0] = ~csg_bitmap[0] & db_slots; ++ } else { ++ KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL, ++ db_slots); ++ csg_bitmap[0] = db_slots; ++ } ++ } ++} ++ ++/** ++ * scheduler_handle_idle_slots() - Update the idle status of queue groups ++ * resident on CSG slots for which the ++ * IDLE notification was received previously. ++ * ++ * This function is called at the start of scheduling tick/tock to reconfirm ++ * the idle status of queue groups resident on CSG slots for ++ * which idle notification was received previously, i.e. all the CSG slots ++ * present in the bitmap scheduler->csg_slots_idle_mask. ++ * The confirmation is done by sending the CSG status update request to the ++ * firmware. On completion, the firmware will mark the idleness at the ++ * slot's interface CSG_STATUS_STATE register accordingly. ++ * ++ * The run state of the groups resident on still idle CSG slots is changed to ++ * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is ++ * updated accordingly. ++ * The bits corresponding to slots for which the status update request timedout ++ * remain set in scheduler->csg_slots_idle_mask. ++ * ++ * @kbdev: Pointer to the GPU device. ++ */ ++static void scheduler_handle_idle_slots(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ unsigned long flags, i; ++ DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; ++ DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ scheduler_update_idle_slots_status(kbdev, csg_bitmap, ++ failed_csg_bitmap); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ for_each_set_bit(i, csg_bitmap, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; ++ ++ if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING)) ++ continue; ++ if (WARN_ON(!group)) ++ continue; ++ if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && ++ group->run_state != KBASE_CSF_GROUP_IDLE)) ++ continue; ++ if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) ++ continue; ++ ++ if (group_on_slot_is_idle(kbdev, i)) { ++ group->run_state = KBASE_CSF_GROUP_IDLE; ++ set_bit(i, scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, ++ group, scheduler->csg_slots_idle_mask[0]); ++ } else ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ } ++ ++ bitmap_or(scheduler->csg_slots_idle_mask, ++ scheduler->csg_slots_idle_mask, ++ failed_csg_bitmap, num_groups); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL, ++ scheduler->csg_slots_idle_mask[0]); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++static void scheduler_scan_idle_groups(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *group, *n; ++ ++ list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, ++ link_to_schedule) { ++ ++ WARN_ON(!queue_group_idle_locked(group)); ++ ++ if (!scheduler->ngrp_to_schedule) { ++ /* keep the top csg's origin */ ++ scheduler->top_ctx = group->kctx; ++ scheduler->top_grp = group; ++ } ++ ++ group->prepared_seq_num = scheduler->ngrp_to_schedule++; ++ list_move_tail(&group->link_to_schedule, ++ &scheduler->groups_to_schedule); ++ ++ group->kctx->csf.sched.ngrp_to_schedule++; ++ count_active_address_space(kbdev, group->kctx); ++ } ++} ++ ++static void scheduler_rotate(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Dealing with rotation */ ++ scheduler_rotate_groups(kbdev); ++ scheduler_rotate_ctxs(kbdev); ++} ++ ++static struct kbase_queue_group *get_tock_top_group( ++ struct kbase_csf_scheduler *const scheduler) ++{ ++ struct kbase_context *kctx; ++ int i; ++ ++ lockdep_assert_held(&scheduler->lock); ++ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { ++ list_for_each_entry(kctx, ++ &scheduler->runnable_kctxs, csf.link) { ++ struct kbase_queue_group *group; ++ ++ list_for_each_entry(group, ++ &kctx->csf.sched.runnable_groups[i], ++ link) { ++ if (queue_group_idle_locked(group)) ++ continue; ++ ++ return group; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, ++ bool is_suspend) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; ++ ++ int ret = suspend_active_queue_groups(kbdev, slot_mask); ++ ++ if (ret) { ++ /* The suspend of CSGs failed, trigger the GPU reset and wait ++ * for it to complete to be in a deterministic state. ++ */ ++ dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", ++ kbdev->csf.global_iface.group_num, slot_mask); ++ ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) ++ kbase_reset_gpu(kbdev); ++ ++ if (is_suspend) { ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_wait(kbdev); ++ mutex_lock(&scheduler->lock); ++ } ++ return -1; ++ } ++ ++ /* Check if the groups became active whilst the suspend was ongoing, ++ * but only for the case where the system suspend is not in progress ++ */ ++ if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps)) ++ return -1; ++ ++ return 0; ++} ++ ++static bool scheduler_idle_suspendable(struct kbase_device *kbdev) ++{ ++ bool suspend; ++ unsigned long flags; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (scheduler->state == SCHED_SUSPENDED) ++ return false; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (scheduler->total_runnable_grps) { ++ spin_lock(&scheduler->interrupt_lock); ++ ++ /* Check both on-slots and off-slots groups idle status */ ++ suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) && ++ !atomic_read(&scheduler->non_idle_offslot_grps) && ++ kbase_pm_idle_groups_sched_suspendable(kbdev); ++ ++ spin_unlock(&scheduler->interrupt_lock); ++ } else ++ suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return suspend; ++} ++ ++static void gpu_idle_worker(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of( ++ work, struct kbase_device, csf.scheduler.gpu_idle_work); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool reset_active = false; ++ bool scheduler_is_idle_suspendable = false; ++ bool all_groups_suspended = false; ++ ++ KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u); ++ ++#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ ++ (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) ++ ++ if (kbase_reset_gpu_try_prevent(kbdev)) { ++ dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); ++ KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, ++ __ENCODE_KTRACE_INFO(true, false, false)); ++ return; ++ } ++ mutex_lock(&scheduler->lock); ++ ++ /* Cycle completed, disable the firmware idle timer */ ++ disable_gpu_idle_fw_timer(kbdev); ++ scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); ++ reset_active = kbase_reset_gpu_is_active(kbdev); ++ if (scheduler_is_idle_suspendable && !reset_active) { ++ all_groups_suspended = ++ !suspend_active_groups_on_powerdown(kbdev, false); ++ ++ if (all_groups_suspended) { ++ dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now"); ++ scheduler_suspend(kbdev); ++ cancel_tick_timer(kbdev); ++ } else { ++ dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)", ++ atomic_read(&scheduler->non_idle_offslot_grps)); ++ /* Bring forward the next tick */ ++ kbase_csf_scheduler_advance_tick(kbdev); ++ } ++ } ++ ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++ KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, ++ __ENCODE_KTRACE_INFO(reset_active, scheduler_is_idle_suspendable, all_groups_suspended)); ++#undef __ENCODE_KTRACE_INFO ++} ++ ++static int scheduler_prepare(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ int i; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* Empty the groups_to_schedule */ ++ while (!list_empty(&scheduler->groups_to_schedule)) { ++ struct kbase_queue_group *grp = ++ list_first_entry(&scheduler->groups_to_schedule, ++ struct kbase_queue_group, ++ link_to_schedule); ++ ++ remove_scheduled_group(kbdev, grp); ++ } ++ ++ /* Pre-scan init scheduler fields */ ++ if (WARN_ON(scheduler->ngrp_to_schedule != 0)) ++ scheduler->ngrp_to_schedule = 0; ++ scheduler->top_ctx = NULL; ++ scheduler->top_grp = NULL; ++ scheduler->csg_scan_count_for_tick = 0; ++ WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule)); ++ scheduler->num_active_address_spaces = 0; ++ scheduler->num_csg_slots_for_tick = 0; ++ bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); ++ ++ /* Scan out to run groups */ ++ for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { ++ struct kbase_context *kctx; ++ ++ list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) ++ scheduler_ctx_scan_groups(kbdev, kctx, i); ++ } ++ ++ /* Update this tick's non-idle groups */ ++ scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; ++ ++ /* Initial number of non-idle off-slot groups, before the scheduler's ++ * scheduler_apply() operation. This gives a sensible start point view ++ * of the tick. It will be subject to up/downs during the scheduler ++ * active phase. ++ */ ++ atomic_set(&scheduler->non_idle_offslot_grps, ++ scheduler->non_idle_scanout_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL, ++ scheduler->non_idle_scanout_grps); ++ ++ /* Adds those idle but runnable groups to the scanout list */ ++ scheduler_scan_idle_groups(kbdev); ++ ++ /* After adding the idle CSGs, the two counts should be the same */ ++ WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule); ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, ++ scheduler->num_active_address_spaces | ++ (((u64)scheduler->ngrp_to_schedule) << 32)); ++ set_max_csg_slots(kbdev); ++ dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n", ++ scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces); ++ return 0; ++} ++ ++static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ /* After the scheduler apply operation, the internal variable ++ * scheduler->non_idle_offslot_grps reflects the end-point view ++ * of the count at the end of the active phase. ++ * ++ * Any changes that follow (after the scheduler has dropped the ++ * scheduler->lock), reflects async operations to the scheduler, ++ * such as a group gets killed (evicted) or a new group inserted, ++ * cqs wait-sync triggered state transtion etc. ++ * ++ * The condition for enable the idle timer is that there is no ++ * non-idle groups off-slots. If there is non-idle group off-slot, ++ * the timer should be disabled. ++ */ ++ if (atomic_read(&scheduler->non_idle_offslot_grps)) ++ disable_gpu_idle_fw_timer(kbdev); ++ else ++ enable_gpu_idle_fw_timer(kbdev); ++} ++ ++static void schedule_actions(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ struct kbase_queue_group *protm_grp; ++ int ret; ++ bool skip_idle_slots_update; ++ bool new_protm_top_grp = false; ++ ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&scheduler->lock); ++ ++ ret = kbase_pm_wait_for_desired_state(kbdev); ++ if (ret) { ++ dev_err(kbdev->dev, "Wait for MCU power on failed"); ++ return; ++ } ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ /* Skip updating on-slot idle CSGs if GPU is in protected mode. */ ++ if (!skip_idle_slots_update) ++ scheduler_handle_idle_slots(kbdev); ++ ++ scheduler_prepare(kbdev); ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ ++ /* Avoid update if the top-group remains unchanged and in protected ++ * mode. For the said case, all the slots update is effectively ++ * competing against the active protected mode group (typically the ++ * top-group). If we update other slots, even on leaving the ++ * top-group slot untouched, the firmware would exit the protected mode ++ * for interacting with the host-driver. After it, as the top-group ++ * would again raise the request for entering protected mode, we would ++ * be actively doing the switching over twice without progressing the ++ * queue jobs. ++ */ ++ if (protm_grp && scheduler->top_grp == protm_grp) { ++ int new_val; ++ dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", ++ protm_grp->handle); ++ new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, ++ protm_grp, new_val); ++ } else if (scheduler->top_grp) { ++ if (protm_grp) ++ dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d", ++ protm_grp->handle); ++ ++ if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap, ++ kbdev->csf.global_iface.groups[0].stream_num)) { ++ dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d", ++ scheduler->top_grp->handle, ++ scheduler->top_grp->kctx->tgid, ++ scheduler->top_grp->kctx->id); ++ ++ /* When entering protected mode all CSG slots can be occupied ++ * but only the protected mode CSG will be running. Any event ++ * that would trigger the execution of an on-slot idle CSG will ++ * need to be handled by the host during protected mode. ++ */ ++ new_protm_top_grp = true; ++ } ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ scheduler_apply(kbdev); ++ ++ /* Post-apply, all the committed groups in this tick are on ++ * slots, time to arrange the idle timer on/off decision. ++ */ ++ scheduler_handle_idle_timer_onoff(kbdev); ++ ++ /* Scheduler is dropping the exec of the previous protm_grp, ++ * Until the protm quit completes, the GPU is effectively ++ * locked in the secure mode. ++ */ ++ if (protm_grp) ++ scheduler_force_protm_exit(kbdev); ++ ++ wait_csg_slots_start(kbdev); ++ wait_csg_slots_finish_prio_update(kbdev); ++ ++ if (new_protm_top_grp) { ++ scheduler_group_check_protm_enter(kbdev, ++ scheduler->top_grp); ++ } ++ ++ return; ++ } ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ return; ++} ++ ++static void schedule_on_tock(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.scheduler.tock_work.work); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ int err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; ++ ++ mutex_lock(&scheduler->lock); ++ if (scheduler->state == SCHED_SUSPENDED) ++ goto exit_no_schedule_unlock; ++ ++ WARN_ON(!(scheduler->state == SCHED_INACTIVE)); ++ scheduler->state = SCHED_BUSY; ++ ++ /* Undertaking schedule action steps */ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u); ++ schedule_actions(kbdev); ++ ++ /* Record time information */ ++ scheduler->last_schedule = jiffies; ++ ++ /* Tock is serviced */ ++ scheduler->tock_pending_request = false; ++ ++ scheduler->state = SCHED_INACTIVE; ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++ ++ dev_dbg(kbdev->dev, ++ "Waking up for event after schedule-on-tock completes."); ++ wake_up_all(&kbdev->csf.event_wait); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); ++ return; ++ ++exit_no_schedule_unlock: ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++static void schedule_on_tick(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.scheduler.tick_work); ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ int err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; ++ ++ mutex_lock(&scheduler->lock); ++ ++ WARN_ON(scheduler->tick_timer_active); ++ if (scheduler->state == SCHED_SUSPENDED) ++ goto exit_no_schedule_unlock; ++ ++ scheduler->state = SCHED_BUSY; ++ /* Do scheduling stuff */ ++ scheduler_rotate(kbdev); ++ ++ /* Undertaking schedule action steps */ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL, ++ scheduler->total_runnable_grps); ++ schedule_actions(kbdev); ++ ++ /* Record time information */ ++ scheduler->last_schedule = jiffies; ++ ++ /* Kicking next scheduling if needed */ ++ if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && ++ (scheduler->total_runnable_grps > 0)) { ++ start_tick_timer(kbdev); ++ dev_dbg(kbdev->dev, ++ "scheduling for next tick, num_runnable_groups:%u\n", ++ scheduler->total_runnable_grps); ++ } ++ ++ scheduler->state = SCHED_INACTIVE; ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++ ++ dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); ++ wake_up_all(&kbdev->csf.event_wait); ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, ++ scheduler->total_runnable_grps); ++ return; ++ ++exit_no_schedule_unlock: ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++static int wait_csg_slots_suspend(struct kbase_device *kbdev, ++ const unsigned long *slot_mask, ++ unsigned int timeout_ms) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ int err = 0; ++ DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); ++ ++ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) ++ && remaining) { ++ DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); ++ ++ bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); ++ ++ remaining = wait_event_timeout(kbdev->csf.event_wait, ++ slots_state_changed(kbdev, changed, ++ csg_slot_stopped_locked), ++ remaining); ++ ++ if (remaining) { ++ u32 i; ++ ++ for_each_set_bit(i, changed, num_groups) { ++ struct kbase_queue_group *group; ++ ++ if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) ++ continue; ++ ++ /* The on slot csg is now stopped */ ++ clear_bit(i, slot_mask_local); ++ ++ group = scheduler->csg_slots[i].resident_group; ++ if (likely(group)) { ++ /* Only do save/cleanup if the ++ * group is not terminated during ++ * the sleep. ++ */ ++ save_csg_slot(group); ++ if (cleanup_csg_slot(group)) ++ sched_evict_group(group, true, true); ++ } ++ } ++ } else { ++ dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n", ++ num_groups, slot_mask_local); ++ err = -ETIMEDOUT; ++ } ++ } ++ ++ return err; ++} ++ ++static int suspend_active_queue_groups(struct kbase_device *kbdev, ++ unsigned long *slot_mask) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ u32 slot_num; ++ int ret; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ for (slot_num = 0; slot_num < num_groups; slot_num++) { ++ struct kbase_queue_group *group = ++ scheduler->csg_slots[slot_num].resident_group; ++ ++ if (group) { ++ suspend_queue_group(group); ++ set_bit(slot_num, slot_mask); ++ } ++ } ++ ++ ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); ++ return ret; ++} ++ ++static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; ++ int ret; ++ int ret2; ++ ++ mutex_lock(&scheduler->lock); ++ ++ ret = suspend_active_queue_groups(kbdev, slot_mask); ++ ++ if (ret) { ++ dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", ++ kbdev->csf.global_iface.group_num, slot_mask); ++ } ++ ++ /* Need to flush the GPU cache to ensure suspend buffer ++ * contents are not lost on reset of GPU. ++ * Do this even if suspend operation had timed out for some of ++ * the CSG slots. ++ * In case the scheduler already in suspended state, the ++ * cache clean is required as the async reset request from ++ * the debugfs may race against the scheduler suspend operation ++ * due to the extra context ref-count, which prevents the ++ * L2 powering down cache clean operation in the non racing ++ * case. ++ */ ++ kbase_gpu_start_cache_clean(kbdev); ++ ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, ++ kbdev->reset_timeout_ms); ++ if (ret2) { ++ dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset"); ++ if (!ret) ++ ret = ret2; ++ } ++ ++ mutex_unlock(&scheduler->lock); ++ ++ return ret; ++} ++ ++/** ++ * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode ++ * groups when reset is done during ++ * protected mode execution. ++ * ++ * @group: Pointer to the device. ++ * ++ * This function is called at the time of GPU reset, before the suspension of ++ * queue groups, to handle the case when the reset is getting performed whilst ++ * GPU is in protected mode. ++ * On entry to protected mode all the groups, except the top group that executes ++ * in protected mode, are implicitly suspended by the FW. Thus this function ++ * simply marks the normal mode groups as suspended (and cleans up the ++ * corresponding CSG slots) to prevent their potential forceful eviction from ++ * the Scheduler. So if GPU was in protected mode and there was no fault, then ++ * only the protected mode group would be suspended in the regular way post exit ++ * from this function. And if GPU was in normal mode, then all on-slot groups ++ * will get suspended in the regular way. ++ * ++ * Return: true if the groups remaining on the CSG slots need to be suspended in ++ * the regular way by sending CSG SUSPEND reqs to FW, otherwise false. ++ */ ++static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 const num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_queue_group *protm_grp; ++ bool suspend_on_slot_groups; ++ unsigned long flags; ++ u32 csg_nr; ++ ++ mutex_lock(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ ++ /* If GPU wasn't in protected mode or had exited it before the GPU reset ++ * then all the on-slot groups can be suspended in the regular way by ++ * sending CSG SUSPEND requests to FW. ++ * If there wasn't a fault for protected mode group, then it would ++ * also need to be suspended in the regular way before the reset. ++ */ ++ suspend_on_slot_groups = !(protm_grp && protm_grp->faulted); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ if (!protm_grp) ++ goto unlock; ++ ++ /* GPU is in protected mode, so all the on-slot groups barring the ++ * the protected mode group can be marked as suspended right away. ++ */ ++ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { ++ struct kbase_queue_group *const group = ++ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; ++ int new_val; ++ ++ if (!group || (group == protm_grp)) ++ continue; ++ ++ cleanup_csg_slot(group); ++ group->run_state = KBASE_CSF_GROUP_SUSPENDED; ++ ++ /* Simply treat the normal mode groups as non-idle. The tick ++ * scheduled after the reset will re-initialize the counter ++ * anyways. ++ */ ++ new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, ++ group, new_val); ++ } ++ ++unlock: ++ mutex_unlock(&scheduler->lock); ++ return suspend_on_slot_groups; ++} ++ ++static void scheduler_inner_reset(struct kbase_device *kbdev) ++{ ++ u32 const num_groups = kbdev->csf.global_iface.group_num; ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ WARN_ON(csgs_active(kbdev)); ++ ++ /* Cancel any potential queued delayed work(s) */ ++ cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); ++ cancel_tick_timer(kbdev); ++ cancel_work_sync(&scheduler->tick_work); ++ cancel_delayed_work_sync(&scheduler->tock_work); ++ cancel_delayed_work_sync(&scheduler->ping_work); ++ ++ mutex_lock(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); ++ if (scheduler->active_protm_grp) ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, ++ scheduler->active_protm_grp, 0u); ++ scheduler->active_protm_grp = NULL; ++ memset(kbdev->csf.scheduler.csg_slots, 0, ++ num_groups * sizeof(struct kbase_csf_csg_slot)); ++ bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ scheduler->top_ctx = NULL; ++ scheduler->top_grp = NULL; ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, ++ scheduler->num_active_address_spaces | ++ (((u64)scheduler->total_runnable_grps) << 32)); ++ ++ mutex_unlock(&scheduler->lock); ++} ++ ++void kbase_csf_scheduler_reset(struct kbase_device *kbdev) ++{ ++ struct kbase_context *kctx; ++ ++ WARN_ON(!kbase_reset_gpu_is_active(kbdev)); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u); ++ ++ if (scheduler_handle_reset_in_protected_mode(kbdev) && ++ !suspend_active_queue_groups_on_reset(kbdev)) { ++ /* As all groups have been successfully evicted from the CSG ++ * slots, clear out thee scheduler data fields and return ++ */ ++ scheduler_inner_reset(kbdev); ++ return; ++ } ++ ++ mutex_lock(&kbdev->kctx_list_lock); ++ ++ /* The loop to iterate over the kbase contexts is present due to lock ++ * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock. ++ * CSF ioctls first take kctx->csf.lock which is context-specific and ++ * then take kbdev->csf.scheduler.lock for global actions like assigning ++ * a CSG slot. ++ * If the lock ordering constraint was not there then could have ++ * directly looped over the active queue groups. ++ */ ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ /* Firmware reload would reinitialize the CSG & CS interface IO ++ * pages, so just need to internally mark the currently active ++ * queue groups as terminated (similar to the unexpected OoM ++ * event case). ++ * No further work can now get executed for the active groups ++ * (new groups would have to be created to execute work) and ++ * in near future Clients would be duly informed of this ++ * reset. The resources (like User IO pages, GPU queue memory) ++ * allocated for the associated queues would be freed when the ++ * Clients do the teardown when they become aware of the reset. ++ */ ++ kbase_csf_active_queue_groups_reset(kbdev, kctx); ++ } ++ ++ mutex_unlock(&kbdev->kctx_list_lock); ++ ++ /* After queue groups reset, the scheduler data fields clear out */ ++ scheduler_inner_reset(kbdev); ++} ++ ++static void firmware_aliveness_monitor(struct work_struct *work) ++{ ++ struct kbase_device *kbdev = container_of(work, struct kbase_device, ++ csf.scheduler.ping_work.work); ++ int err; ++ ++ /* Ensure that reset will not be occurring while this function is being ++ * executed as otherwise calling kbase_reset_gpu when reset is already ++ * occurring is a programming error. ++ * ++ * We must use the 'try' variant as the Reset worker can try to flush ++ * this workqueue, which would otherwise deadlock here if we tried to ++ * wait for the reset (and thus ourselves) to complete. ++ */ ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ if (err) { ++ /* It doesn't matter whether the value was -EAGAIN or a fatal ++ * error, just stop processing. In case of -EAGAIN, the Reset ++ * worker will restart the scheduler later to resume ping ++ */ ++ return; ++ } ++ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ ++#ifdef CONFIG_MALI_DEBUG ++ if (fw_debug) { ++ /* ping requests cause distraction in firmware debugging */ ++ goto exit; ++ } ++#endif ++ ++ if (kbdev->csf.scheduler.state == SCHED_SUSPENDED) ++ goto exit; ++ ++ if (get_nr_active_csgs(kbdev) != 1) ++ goto exit; ++ ++ if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) ++ goto exit; ++ ++ if (kbase_pm_context_active_handle_suspend(kbdev, ++ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { ++ /* Suspend pending - no real need to ping */ ++ goto exit; ++ } ++ ++ kbase_pm_wait_for_desired_state(kbdev); ++ ++ err = kbase_csf_firmware_ping_wait(kbdev); ++ ++ if (err) { ++ /* It is acceptable to enqueue a reset whilst we've prevented ++ * them, it will happen after we've allowed them again ++ */ ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } else if (get_nr_active_csgs(kbdev) == 1) { ++ queue_delayed_work(system_long_wq, ++ &kbdev->csf.scheduler.ping_work, ++ msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); ++ } ++ ++ kbase_pm_context_idle(kbdev); ++exit: ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ kbase_reset_gpu_allow(kbdev); ++ return; ++} ++ ++int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, ++ struct kbase_suspend_copy_buffer *sus_buf) ++{ ++ struct kbase_context *const kctx = group->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ int err = 0; ++ ++ kbase_reset_gpu_assert_prevented(kbdev); ++ lockdep_assert_held(&kctx->csf.lock); ++ mutex_lock(&scheduler->lock); ++ ++ if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { ++ DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; ++ ++ set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask); ++ ++ if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) ++ suspend_queue_group(group); ++ err = wait_csg_slots_suspend(kbdev, slot_mask, ++ kbdev->csf.fw_timeout_ms); ++ if (err) { ++ dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d", ++ group->handle, group->csg_nr); ++ goto exit; ++ } ++ } ++ ++ if (queue_group_suspended_locked(group)) { ++ unsigned int target_page_nr = 0, i = 0; ++ u64 offset = sus_buf->offset; ++ size_t to_copy = sus_buf->size; ++ ++ if (scheduler->state != SCHED_SUSPENDED) { ++ /* Similar to the case of HW counters, need to flush ++ * the GPU cache before reading from the suspend buffer ++ * pages as they are mapped and cached on GPU side. ++ */ ++ kbase_gpu_start_cache_clean(kbdev); ++ kbase_gpu_wait_cache_clean(kbdev); ++ } else { ++ /* Make sure power down transitions have completed, ++ * i.e. L2 has been powered off as that would ensure ++ * its contents are flushed to memory. ++ * This is needed as Scheduler doesn't wait for the ++ * power down to finish. ++ */ ++ kbase_pm_wait_for_desired_state(kbdev); ++ } ++ ++ for (i = 0; i < PFN_UP(sus_buf->size) && ++ target_page_nr < sus_buf->nr_pages; i++) { ++ struct page *pg = ++ as_page(group->normal_suspend_buf.phy[i]); ++ void *sus_page = kmap(pg); ++ ++ if (sus_page) { ++ kbase_sync_single_for_cpu(kbdev, ++ kbase_dma_addr(pg), ++ PAGE_SIZE, DMA_BIDIRECTIONAL); ++ ++ err = kbase_mem_copy_to_pinned_user_pages( ++ sus_buf->pages, sus_page, ++ &to_copy, sus_buf->nr_pages, ++ &target_page_nr, offset); ++ kunmap(pg); ++ if (err) ++ break; ++ } else { ++ err = -ENOMEM; ++ break; ++ } ++ } ++ schedule_in_cycle(group, false); ++ } else { ++ /* If addr-space fault, the group may have been evicted */ ++ err = -EIO; ++ } ++ ++exit: ++ mutex_unlock(&scheduler->lock); ++ return err; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf); ++ ++/** ++ * group_sync_updated() - Evaluate sync wait condition of all blocked command ++ * queues of the group. ++ * ++ * @group: Pointer to the command queue group that has blocked command queue(s) ++ * bound to it. ++ * ++ * Return: true if sync wait condition is satisfied for at least one blocked ++ * queue of the group. ++ */ ++static bool group_sync_updated(struct kbase_queue_group *group) ++{ ++ bool updated = false; ++ int stream; ++ ++ /* Groups can also be blocked on-slot during protected mode. */ ++ WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && ++ group->run_state != KBASE_CSF_GROUP_IDLE); ++ ++ for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { ++ struct kbase_queue *const queue = group->bound_queues[stream]; ++ ++ /* To check the necessity of sync-wait evaluation, ++ * we rely on the cached 'status_wait' instead of reading it ++ * directly from shared memory as the CSG has been already ++ * evicted from the CSG slot, thus this CSG doesn't have ++ * valid information in the shared memory. ++ */ ++ if (queue && queue->enabled && ++ CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) ++ if (evaluate_sync_update(queue)) { ++ updated = true; ++ queue->status_wait = 0; ++ } ++ } ++ ++ return updated; ++} ++ ++/** ++ * scheduler_get_protm_enter_async_group() - Check if the GPU queue group ++ * can be now allowed to execute in protected mode. ++ * ++ * @kbdev: Pointer to the GPU device. ++ * @group: Pointer to the GPU queue group. ++ * ++ * This function is called outside the scheduling tick/tock to determine ++ * if the given GPU queue group can now execute in protected mode or not. ++ * If the group pointer passed is NULL then the evaluation is done for the ++ * highest priority group on the scheduler maintained group lists without ++ * tick associated rotation actions. This is referred as the 'top-group' ++ * in a tock action sense. ++ * ++ * It returns the same group pointer, that was passed as an argument, if that ++ * group matches the highest priority group and has pending protected region ++ * requests otherwise NULL is returned. ++ * ++ * If the group pointer passed is NULL then the internal evaluated highest ++ * priority group is returned if that has pending protected region requests ++ * otherwise NULL is returned. ++ * ++ * The evaluated highest priority group may not necessarily be the same as the ++ * scheduler->top_grp. This can happen if there is dynamic de-idle update ++ * during the tick interval for some on-slots groups that were idle during the ++ * scheduler normal scheduling action, where the scheduler->top_grp was set. ++ * The recorded scheduler->top_grp is untouched by this evualuation, so will not ++ * affect the scheduler context/priority list rotation arrangement. ++ * ++ * Return: the pointer to queue group that can currently execute in protected ++ * mode or NULL. ++ */ ++static struct kbase_queue_group *scheduler_get_protm_enter_async_group( ++ struct kbase_device *const kbdev, ++ struct kbase_queue_group *const group) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *match_grp, *input_grp; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ if (scheduler->state != SCHED_INACTIVE) ++ return NULL; ++ ++ match_grp = get_tock_top_group(scheduler); ++ input_grp = group ? group : match_grp; ++ ++ if (input_grp && (input_grp == match_grp)) { ++ struct kbase_csf_cmd_stream_group_info *ginfo = ++ &kbdev->csf.global_iface.groups[0]; ++ unsigned long *pending = ++ input_grp->protm_pending_bitmap; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ ++ if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || ++ bitmap_empty(pending, ginfo->stream_num)) ++ input_grp = NULL; ++ ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ } else { ++ input_grp = NULL; ++ } ++ ++ return input_grp; ++} ++ ++void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) ++{ ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ int err = kbase_reset_gpu_try_prevent(kbdev); ++ /* Regardless of whether reset failed or is currently happening, exit ++ * early ++ */ ++ if (err) ++ return; ++ ++ mutex_lock(&scheduler->lock); ++ ++ /* Check if the group is now eligible for execution in protected mode. */ ++ if (scheduler_get_protm_enter_async_group(kbdev, group)) ++ scheduler_group_check_protm_enter(kbdev, group); ++ ++ mutex_unlock(&scheduler->lock); ++ kbase_reset_gpu_allow(kbdev); ++} ++ ++/** ++ * check_sync_update_for_idle_group_protm() - Check the sync wait condition ++ * for all the queues bound to ++ * the given group. ++ * ++ * @group: Pointer to the group that requires evaluation. ++ * ++ * This function is called if the GPU is in protected mode and there are on ++ * slot idle groups with higher priority than the active protected mode group. ++ * This function will evaluate the sync condition, if any, of all the queues ++ * bound to the given group. ++ * ++ * Return true if the sync condition of at least one queue has been satisfied. ++ */ ++static bool check_sync_update_for_idle_group_protm( ++ struct kbase_queue_group *group) ++{ ++ struct kbase_device *const kbdev = group->kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = ++ &kbdev->csf.scheduler; ++ bool sync_update_done = false; ++ int i; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { ++ struct kbase_queue *queue = group->bound_queues[i]; ++ ++ if (queue && queue->enabled && !sync_update_done) { ++ struct kbase_csf_cmd_stream_group_info *const ginfo = ++ &kbdev->csf.global_iface.groups[group->csg_nr]; ++ struct kbase_csf_cmd_stream_info *const stream = ++ &ginfo->streams[queue->csi_index]; ++ u32 status = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT); ++ unsigned long flags; ++ ++ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, ++ queue->group, queue, status); ++ ++ if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) ++ continue; ++ ++ /* Save the information of sync object of the command ++ * queue so the callback function, 'group_sync_updated' ++ * can evaluate the sync object when it gets updated ++ * later. ++ */ ++ queue->status_wait = status; ++ queue->sync_ptr = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT_SYNC_POINTER_LO); ++ queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; ++ queue->sync_value = kbase_csf_firmware_cs_output( ++ stream, CS_STATUS_WAIT_SYNC_VALUE); ++ queue->blocked_reason = ++ CS_STATUS_BLOCKED_REASON_REASON_GET( ++ kbase_csf_firmware_cs_output( ++ stream, ++ CS_STATUS_BLOCKED_REASON)); ++ ++ if (!evaluate_sync_update(queue)) ++ continue; ++ ++ /* Update csg_slots_idle_mask and group's run_state */ ++ if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) { ++ /* Only clear the group's idle flag if it has been dealt ++ * with by the scheduler's tick/tock action, otherwise ++ * leave it untouched. ++ */ ++ spin_lock_irqsave(&scheduler->interrupt_lock, ++ flags); ++ clear_bit((unsigned int)group->csg_nr, ++ scheduler->csg_slots_idle_mask); ++ KBASE_KTRACE_ADD_CSF_GRP( ++ kbdev, CSG_SLOT_IDLE_CLEAR, group, ++ scheduler->csg_slots_idle_mask[0]); ++ spin_unlock_irqrestore( ++ &scheduler->interrupt_lock, flags); ++ group->run_state = KBASE_CSF_GROUP_RUNNABLE; ++ } ++ ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); ++ sync_update_done = true; ++ } ++ } ++ ++ return sync_update_done; ++} ++ ++/** ++ * check_sync_update_for_idle_groups_protm() - Check the sync wait condition ++ * for the idle groups on slot ++ * during protected mode. ++ * ++ * @kbdev: Pointer to the GPU device ++ * ++ * This function checks the gpu queues of all the idle groups on slot during ++ * protected mode that has a higher priority than the active protected mode ++ * group. ++ * ++ * Return true if the sync condition of at least one queue in a group has been ++ * satisfied. ++ */ ++static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ struct kbase_queue_group *protm_grp; ++ bool exit_protm = false; ++ unsigned long flags; ++ u32 num_groups; ++ u32 i; ++ ++ lockdep_assert_held(&scheduler->lock); ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ protm_grp = scheduler->active_protm_grp; ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++ ++ if (!protm_grp) ++ return exit_protm; ++ ++ num_groups = kbdev->csf.global_iface.group_num; ++ ++ for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { ++ struct kbase_csf_csg_slot *csg_slot = ++ &scheduler->csg_slots[i]; ++ struct kbase_queue_group *group = csg_slot->resident_group; ++ ++ if (group->scan_seq_num < protm_grp->scan_seq_num) { ++ /* If sync update has been performed for the group that ++ * has a higher priority than the protm group, then we ++ * need to exit protected mode. ++ */ ++ if (check_sync_update_for_idle_group_protm(group)) ++ exit_protm = true; ++ } ++ } ++ ++ return exit_protm; ++} ++ ++/** ++ * check_group_sync_update_worker() - Check the sync wait condition for all the ++ * blocked queue groups ++ * ++ * @work: Pointer to the context-specific work item for evaluating the wait ++ * condition for all the queue groups in idle_wait_groups list. ++ * ++ * This function checks the gpu queues of all the groups present in both ++ * idle_wait_groups list of a context and all on slot idle groups (if GPU ++ * is in protected mode). ++ * If the sync wait condition for at least one queue bound to the group has ++ * been satisfied then the group is moved to the per context list of ++ * runnable groups so that Scheduler can consider scheduling the group ++ * in next tick or exit protected mode. ++ */ ++static void check_group_sync_update_worker(struct work_struct *work) ++{ ++ struct kbase_context *const kctx = container_of(work, ++ struct kbase_context, csf.sched.sync_update_work); ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ mutex_lock(&scheduler->lock); ++ ++ KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u); ++ if (kctx->csf.sched.num_idle_wait_grps != 0) { ++ struct kbase_queue_group *group, *temp; ++ ++ list_for_each_entry_safe(group, temp, ++ &kctx->csf.sched.idle_wait_groups, link) { ++ if (group_sync_updated(group)) { ++ /* Move this group back in to the runnable ++ * groups list of the context. ++ */ ++ update_idle_suspended_group_state(group); ++ KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); ++ } ++ } ++ } else { ++ WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); ++ } ++ ++ if (check_sync_update_for_idle_groups_protm(kbdev)) ++ scheduler_force_protm_exit(kbdev); ++ KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); ++ ++ mutex_unlock(&scheduler->lock); ++} ++ ++static ++enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) ++{ ++ struct kbase_context *const kctx = param; ++ ++ KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u); ++ queue_work(kctx->csf.sched.sync_update_wq, ++ &kctx->csf.sched.sync_update_work); ++ ++ return KBASE_CSF_EVENT_CALLBACK_KEEP; ++} ++ ++int kbase_csf_scheduler_context_init(struct kbase_context *kctx) ++{ ++ int priority; ++ int err; ++ ++ for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++ ++priority) { ++ INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); ++ } ++ ++ kctx->csf.sched.num_runnable_grps = 0; ++ INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); ++ kctx->csf.sched.num_idle_wait_grps = 0; ++ kctx->csf.sched.ngrp_to_schedule = 0; ++ ++ kctx->csf.sched.sync_update_wq = ++ alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", ++ WQ_HIGHPRI); ++ if (!kctx->csf.sched.sync_update_wq) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to initialize scheduler context workqueue"); ++ return -ENOMEM; ++ } ++ ++ INIT_WORK(&kctx->csf.sched.sync_update_work, ++ check_group_sync_update_worker); ++ ++ err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); ++ ++ if (err) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to register a sync update callback"); ++ destroy_workqueue(kctx->csf.sched.sync_update_wq); ++ } ++ ++ return err; ++} ++ ++void kbase_csf_scheduler_context_term(struct kbase_context *kctx) ++{ ++ kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); ++ cancel_work_sync(&kctx->csf.sched.sync_update_work); ++ destroy_workqueue(kctx->csf.sched.sync_update_wq); ++} ++ ++int kbase_csf_scheduler_init(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ u32 num_groups = kbdev->csf.global_iface.group_num; ++ ++ bitmap_zero(scheduler->csg_inuse_bitmap, num_groups); ++ bitmap_zero(scheduler->csg_slots_idle_mask, num_groups); ++ ++ scheduler->csg_slots = kcalloc(num_groups, ++ sizeof(*scheduler->csg_slots), GFP_KERNEL); ++ if (!scheduler->csg_slots) { ++ dev_err(kbdev->dev, ++ "Failed to allocate memory for csg slot status array\n"); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ scheduler->timer_enabled = true; ++ ++ scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); ++ if (!scheduler->wq) { ++ dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); ++ return -ENOMEM; ++ } ++ ++ INIT_WORK(&scheduler->tick_work, schedule_on_tick); ++ INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); ++ ++ INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); ++ BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS); ++ ++ mutex_init(&scheduler->lock); ++ spin_lock_init(&scheduler->interrupt_lock); ++ ++ /* Internal lists */ ++ INIT_LIST_HEAD(&scheduler->runnable_kctxs); ++ INIT_LIST_HEAD(&scheduler->groups_to_schedule); ++ INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule); ++ ++ BUILD_BUG_ON(MAX_SUPPORTED_CSGS > ++ (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); ++ bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); ++ scheduler->state = SCHED_SUSPENDED; ++ scheduler->pm_active_count = 0; ++ scheduler->ngrp_to_schedule = 0; ++ scheduler->total_runnable_grps = 0; ++ scheduler->top_ctx = NULL; ++ scheduler->top_grp = NULL; ++ scheduler->last_schedule = 0; ++ scheduler->tock_pending_request = false; ++ scheduler->active_protm_grp = NULL; ++ scheduler->gpu_idle_fw_timer_enabled = false; ++ scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; ++ scheduler_doorbell_init(kbdev); ++ ++ INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); ++ atomic_set(&scheduler->non_idle_offslot_grps, 0); ++ ++ hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ scheduler->tick_timer.function = tick_timer_callback; ++ scheduler->tick_timer_active = false; ++ ++ return 0; ++} ++ ++void kbase_csf_scheduler_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.scheduler.csg_slots) { ++ WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); ++ WARN_ON(csgs_active(kbdev)); ++ flush_work(&kbdev->csf.scheduler.gpu_idle_work); ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED)) ++ scheduler_suspend(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); ++ cancel_tick_timer(kbdev); ++ cancel_work_sync(&kbdev->csf.scheduler.tick_work); ++ cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work); ++ mutex_destroy(&kbdev->csf.scheduler.lock); ++ kfree(kbdev->csf.scheduler.csg_slots); ++ kbdev->csf.scheduler.csg_slots = NULL; ++ } ++} ++ ++void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.scheduler.wq) ++ destroy_workqueue(kbdev->csf.scheduler.wq); ++} ++ ++/** ++ * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will restart the scheduler tick so that regular scheduling can ++ * be resumed without any explicit trigger (like kicking of GPU queues). This ++ * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the ++ * CSF scheduler lock to already have been held. ++ */ ++static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&kbdev->csf.scheduler.lock); ++ ++ if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) ++ return; ++ ++ WARN_ON((scheduler->state != SCHED_INACTIVE) && ++ (scheduler->state != SCHED_SUSPENDED)); ++ ++ if (scheduler->total_runnable_grps > 0) { ++ enqueue_tick_work(kbdev); ++ dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); ++ } else if (scheduler->state != SCHED_SUSPENDED) { ++ queue_work(system_wq, &scheduler->gpu_idle_work); ++ } ++} ++ ++void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ scheduler_enable_tick_timer_nolock(kbdev); ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++} ++ ++bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ bool enabled; ++ ++ mutex_lock(&scheduler->lock); ++ enabled = scheduler_timer_is_enabled_nolock(kbdev); ++ mutex_unlock(&scheduler->lock); ++ ++ return enabled; ++} ++ ++void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, ++ bool enable) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ bool currently_enabled; ++ ++ mutex_lock(&scheduler->lock); ++ ++ currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); ++ if (currently_enabled && !enable) { ++ scheduler->timer_enabled = false; ++ cancel_tick_timer(kbdev); ++ cancel_delayed_work(&scheduler->tock_work); ++ mutex_unlock(&scheduler->lock); ++ /* The non-sync version to cancel the normal work item is not ++ * available, so need to drop the lock before cancellation. ++ */ ++ cancel_work_sync(&scheduler->tick_work); ++ } else if (!currently_enabled && enable) { ++ scheduler->timer_enabled = true; ++ ++ scheduler_enable_tick_timer_nolock(kbdev); ++ mutex_unlock(&scheduler->lock); ++ } ++} ++ ++void kbase_csf_scheduler_kick(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ mutex_lock(&scheduler->lock); ++ ++ if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) ++ goto out; ++ ++ if (scheduler->total_runnable_grps > 0) { ++ enqueue_tick_work(kbdev); ++ dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); ++ } ++ ++out: ++ mutex_unlock(&scheduler->lock); ++} ++ ++void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ /* Cancel any potential queued delayed work(s) */ ++ cancel_work_sync(&scheduler->tick_work); ++ cancel_delayed_work_sync(&scheduler->tock_work); ++ ++ if (kbase_reset_gpu_prevent_and_wait(kbdev)) { ++ dev_warn(kbdev->dev, ++ "Stop PM suspending for failing to prevent gpu reset.\n"); ++ return; ++ } ++ ++ mutex_lock(&scheduler->lock); ++ ++ disable_gpu_idle_fw_timer(kbdev); ++ ++ if (scheduler->state != SCHED_SUSPENDED) { ++ suspend_active_groups_on_powerdown(kbdev, true); ++ dev_info(kbdev->dev, "Scheduler PM suspend"); ++ scheduler_suspend(kbdev); ++ cancel_tick_timer(kbdev); ++ } ++ mutex_unlock(&scheduler->lock); ++ ++ kbase_reset_gpu_allow(kbdev); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend); ++ ++void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ ++ mutex_lock(&scheduler->lock); ++ ++ if (scheduler->total_runnable_grps > 0) { ++ WARN_ON(scheduler->state != SCHED_SUSPENDED); ++ dev_info(kbdev->dev, "Scheduler PM resume"); ++ scheduler_wakeup(kbdev, true); ++ } ++ mutex_unlock(&scheduler->lock); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume); ++ ++void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ u32 prev_count; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ prev_count = kbdev->csf.scheduler.pm_active_count++; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* On 0 => 1, make a pm_ctx_active request */ ++ if (!prev_count) ++ kbase_pm_context_active(kbdev); ++ else ++ WARN_ON(prev_count == U32_MAX); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); ++ ++void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ u32 prev_count; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ prev_count = kbdev->csf.scheduler.pm_active_count--; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (prev_count == 1) ++ kbase_pm_context_idle(kbdev); ++ else ++ WARN_ON(prev_count == 0); ++} ++KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h +new file mode 100644 +index 0000000..428ecbe +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h +@@ -0,0 +1,494 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_SCHEDULER_H_ ++#define _KBASE_CSF_SCHEDULER_H_ ++ ++#include "mali_kbase_csf.h" ++ ++/** ++ * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue ++ * on firmware. ++ * ++ * @queue: Pointer to the GPU command queue to be started. ++ * ++ * This function would enable the start of a CSI, within a ++ * CSG, to which the @queue was bound. ++ * If the CSG is already scheduled and resident, the CSI will be started ++ * right away, otherwise once the group is made resident. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_queue_start(struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_scheduler_queue_stop() - Disable the running of GPU command queue ++ * on firmware. ++ * ++ * @queue: Pointer to the GPU command queue to be stopped. ++ * ++ * This function would stop the CSI, within a CSG, to which @queue was bound. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue); ++ ++/** ++ * kbase_csf_scheduler_group_protm_enter - Handle the protm enter event for the ++ * GPU command queue group. ++ * ++ * @group: The command queue group. ++ * ++ * This function could request the firmware to enter the protected mode ++ * and allow the execution of protected region instructions for all the ++ * bound queues of the group that have protm pending bit set in their ++ * respective CS_ACK register. ++ */ ++void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group); ++ ++/** ++ * kbase_csf_scheduler_group_get_slot() - Checks if a queue group is ++ * programmed on a firmware CSG slot ++ * and returns the slot number. ++ * ++ * @group: The command queue group. ++ * ++ * Return: The slot number, if the group is programmed on a slot. ++ * Otherwise returns a negative number. ++ * ++ * Note: This function should not be used if the interrupt_lock is held. Use ++ * kbase_csf_scheduler_group_get_slot_locked() instead. ++ */ ++int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group); ++ ++/** ++ * kbase_csf_scheduler_group_get_slot_locked() - Checks if a queue group is ++ * programmed on a firmware CSG slot ++ * and returns the slot number. ++ * ++ * @group: The command queue group. ++ * ++ * Return: The slot number, if the group is programmed on a slot. ++ * Otherwise returns a negative number. ++ * ++ * Note: Caller must hold the interrupt_lock. ++ */ ++int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group); ++ ++/** ++ * kbase_csf_scheduler_group_events_enabled() - Checks if interrupt events ++ * should be handled for a queue group. ++ * ++ * @kbdev: The device of the group. ++ * @group: The queue group. ++ * ++ * Return: true if interrupt events should be handled. ++ * ++ * Note: Caller must hold the interrupt_lock. ++ */ ++bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, ++ struct kbase_queue_group *group); ++ ++/** ++ * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been ++ * programmed to a firmware CSG slot. ++ * ++ * @kbdev: The GPU device. ++ * @slot: The slot for which to get the queue group. ++ * ++ * Return: Pointer to the programmed queue group. ++ * ++ * Note: Caller must hold the interrupt_lock. ++ */ ++struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( ++ struct kbase_device *kbdev, int slot); ++ ++/** ++ * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue ++ * group from the firmware. ++ * ++ * @group: Pointer to the queue group to be descheduled. ++ * ++ * This function would disable the scheduling of GPU command queue group on ++ * firmware. ++ */ ++void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group); ++ ++/** ++ * kbase_csf_scheduler_evict_ctx_slots() - Evict all GPU command queue groups ++ * of a given context that are active ++ * running from the firmware. ++ * ++ * @kbdev: The GPU device. ++ * @kctx: Kbase context for the evict operation. ++ * @evicted_groups: List_head for returning evicted active queue groups. ++ * ++ * This function would disable the scheduling of GPU command queue groups active ++ * on firmware slots from the given Kbase context. The affected groups are ++ * added to the supplied list_head argument. ++ */ ++void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct list_head *evicted_groups); ++ ++/** ++ * kbase_csf_scheduler_context_init() - Initialize the context-specific part ++ * for CSF scheduler. ++ * ++ * @kctx: Pointer to kbase context that is being created. ++ * ++ * This function must be called during Kbase context creation. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_context_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_scheduler_init - Initialize the CSF scheduler ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * The scheduler does the arbitration for the CSG slots ++ * provided by the firmware between the GPU command queue groups created ++ * by the Clients. ++ * This function must be called after loading firmware and parsing its capabilities. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_early_init - Early initialization for the CSF scheduler ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Initialize necessary resources such as locks, workqueue for CSF scheduler. ++ * This must be called at kbase probe. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_early_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_context_term() - Terminate the context-specific part ++ * for CSF scheduler. ++ * ++ * @kctx: Pointer to kbase context that is being terminated. ++ * ++ * This function must be called during Kbase context termination. ++ */ ++void kbase_csf_scheduler_context_term(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_scheduler_term - Terminate the CSF scheduler. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This should be called when unload of firmware is done on device ++ * termination. ++ */ ++void kbase_csf_scheduler_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_early_term - Early termination of the CSF scheduler. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This should be called only when kbase probe fails or gets rmmoded. ++ */ ++void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_reset - Reset the state of all active GPU command ++ * queue groups. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will first iterate through all the active/scheduled GPU ++ * command queue groups and suspend them (to avoid losing work for groups ++ * that are not stuck). The groups that could not get suspended would be ++ * descheduled and marked as terminated (which will then lead to unbinding ++ * of all the queues bound to them) and also no more work would be allowed ++ * to execute for them. ++ * ++ * This is similar to the action taken in response to an unexpected OoM event. ++ * No explicit re-initialization is done for CSG & CS interface I/O pages; ++ * instead, that happens implicitly on firmware reload. ++ * ++ * Should be called only after initiating the GPU reset. ++ */ ++void kbase_csf_scheduler_reset(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_enable_tick_timer - Enable the scheduler tick timer. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will restart the scheduler tick so that regular scheduling can ++ * be resumed without any explicit trigger (like kicking of GPU queues). ++ */ ++void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue ++ * group and copy suspend buffer. ++ * ++ * This function is called to suspend a queue group and copy the suspend_buffer ++ * contents to the input buffer provided. ++ * ++ * @group: Pointer to the queue group to be suspended. ++ * @sus_buf: Pointer to the structure which contains details of the ++ * user buffer and its kernel pinned pages to which we need to copy ++ * the group suspend buffer. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, ++ struct kbase_suspend_copy_buffer *sus_buf); ++ ++/** ++ * kbase_csf_scheduler_lock - Acquire the global Scheduler lock. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will take the global scheduler lock, in order to serialize ++ * against the Scheduler actions, for access to CS IO pages. ++ */ ++static inline void kbase_csf_scheduler_lock(struct kbase_device *kbdev) ++{ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++} ++ ++/** ++ * kbase_csf_scheduler_unlock - Release the global Scheduler lock. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev) ++{ ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++} ++ ++/** ++ * kbase_csf_scheduler_spin_lock - Acquire Scheduler interrupt spinlock. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @flags: Pointer to the memory location that would store the previous ++ * interrupt state. ++ * ++ * This function will take the global scheduler lock, in order to serialize ++ * against the Scheduler actions, for access to CS IO pages. ++ */ ++static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, ++ unsigned long *flags) ++{ ++ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, *flags); ++} ++ ++/** ++ * kbase_csf_scheduler_spin_unlock - Release Scheduler interrupt spinlock. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @flags: Previously stored interrupt state when Scheduler interrupt ++ * spinlock was acquired. ++ */ ++static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, ++ unsigned long flags) ++{ ++ spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); ++} ++ ++/** ++ * kbase_csf_scheduler_spin_lock_assert_held - Assert if the Scheduler ++ * interrupt spinlock is held. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++static inline void ++kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++} ++ ++/** ++ * kbase_csf_scheduler_timer_is_enabled() - Check if the scheduler wakes up ++ * automatically for periodic tasks. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Return: true if the scheduler is configured to wake up periodically ++ */ ++bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic ++ * scheduler tasks. ++ * ++ * @kbdev: Pointer to the device ++ * @enable: Whether to enable periodic scheduler tasks ++ */ ++void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, ++ bool enable); ++ ++/** ++ * kbase_csf_scheduler_kick - Perform pending scheduling tasks once. ++ * ++ * Note: This function is only effective if the scheduling timer is disabled. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_scheduler_kick(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_protected_mode_in_use() - Check if the scheduler is ++ * running with protected mode tasks. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Return: true if the scheduler is running with protected mode tasks ++ */ ++static inline bool kbase_csf_scheduler_protected_mode_in_use( ++ struct kbase_device *kbdev) ++{ ++ return (kbdev->csf.scheduler.active_protm_grp != NULL); ++} ++ ++/** ++ * kbase_csf_scheduler_pm_active - Perform scheduler power active operation ++ * ++ * Note: This function will increase the scheduler's internal pm_active_count ++ * value, ensuring that both GPU and MCU are powered for access. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_idle - Perform the scheduler power idle operation ++ * ++ * Note: This function will decrease the scheduler's internal pm_active_count ++ * value. On reaching 0, the MCU and GPU could be powered off. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ */ ++void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will make the scheduler resume the scheduling of queue groups ++ * and take the power managemenet reference, if there are any runnable groups. ++ */ ++void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * This function will make the scheduler suspend all the running queue groups ++ * and drop its power managemenet reference. ++ */ ++void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal ++ * runtime used slots are all tagged as idle command queue groups. ++ * ++ * @kbdev: Pointer to the device ++ * ++ * Return: true if all the used slots are tagged as idle CSGs. ++ */ ++static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); ++ return bitmap_equal(kbdev->csf.scheduler.csg_slots_idle_mask, ++ kbdev->csf.scheduler.csg_inuse_bitmap, ++ kbdev->csf.global_iface.group_num); ++} ++ ++/** ++ * kbase_csf_scheduler_advance_tick_nolock() - Advance the scheduling tick ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function advances the scheduling tick by enqueing the tick work item for ++ * immediate execution, but only if the tick hrtimer is active. If the timer ++ * is inactive then the tick work item is already in flight. ++ * The caller must hold the interrupt lock. ++ */ ++static inline void ++kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ ++ lockdep_assert_held(&scheduler->interrupt_lock); ++ ++ if (scheduler->tick_timer_active) { ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_ADVANCE_TICK, NULL, 0u); ++ scheduler->tick_timer_active = false; ++ queue_work(scheduler->wq, &scheduler->tick_work); ++ } else { ++ KBASE_KTRACE_ADD(kbdev, SCHEDULER_NOADVANCE_TICK, NULL, 0u); ++ } ++} ++ ++/** ++ * kbase_csf_scheduler_advance_tick() - Advance the scheduling tick ++ * ++ * @kbdev: Pointer to the device ++ * ++ * This function advances the scheduling tick by enqueing the tick work item for ++ * immediate execution, but only if the tick hrtimer is active. If the timer ++ * is inactive then the tick work item is already in flight. ++ */ ++static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev) ++{ ++ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&scheduler->interrupt_lock, flags); ++ kbase_csf_scheduler_advance_tick_nolock(kbdev); ++ spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); ++} ++ ++/** ++ * kbase_csf_scheduler_queue_has_trace() - report whether the queue has been ++ * configured to operate with the ++ * cs_trace feature. ++ * ++ * @queue: Pointer to the queue. ++ * ++ * Return: True if the gpu queue is configured to operate with the cs_trace ++ * feature, otherwise false. ++ */ ++static inline bool kbase_csf_scheduler_queue_has_trace(struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&queue->kctx->kbdev->csf.scheduler.lock); ++ /* In the current arrangement, it is possible for the context to enable ++ * the cs_trace after some queues have been registered with cs_trace in ++ * disabled state. So each queue has its own enabled/disabled condition. ++ */ ++ return (queue->trace_buffer_size && queue->trace_buffer_base); ++} ++ ++#endif /* _KBASE_CSF_SCHEDULER_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c +new file mode 100644 +index 0000000..8ecf235 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c +@@ -0,0 +1,611 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++ ++#include "mali_kbase_csf_tiler_heap.h" ++#include "mali_kbase_csf_tiler_heap_def.h" ++#include "mali_kbase_csf_heap_context_alloc.h" ++ ++/** ++ * encode_chunk_ptr - Encode the address and size of a chunk as an integer. ++ * ++ * The size and address of the next chunk in a list are packed into a single ++ * 64-bit value for storage in a chunk's header. This function returns that ++ * value. ++ * ++ * @chunk_size: Size of a tiler heap chunk, in bytes. ++ * @chunk_addr: GPU virtual address of the same tiler heap chunk. ++ * ++ * Return: Next chunk pointer suitable for writing into a chunk header. ++ */ ++static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) ++{ ++ u64 encoded_size, encoded_addr; ++ ++ WARN_ON(chunk_size & ~CHUNK_SIZE_MASK); ++ WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK); ++ ++ encoded_size = ++ (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) << ++ CHUNK_HDR_NEXT_SIZE_POS; ++ ++ encoded_addr = ++ (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) << ++ CHUNK_HDR_NEXT_ADDR_POS; ++ ++ return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) | ++ (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK); ++} ++ ++/** ++ * get_last_chunk - Get the last chunk of a tiler heap ++ * ++ * @heap: Pointer to the tiler heap. ++ * ++ * Return: The address of the most recently-linked chunk, or NULL if none. ++ */ ++static struct kbase_csf_tiler_heap_chunk *get_last_chunk( ++ struct kbase_csf_tiler_heap *const heap) ++{ ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ ++ if (list_empty(&heap->chunks_list)) ++ return NULL; ++ ++ return list_last_entry(&heap->chunks_list, ++ struct kbase_csf_tiler_heap_chunk, link); ++} ++ ++/** ++ * link_chunk - Link a chunk into a tiler heap ++ * ++ * Unless the @chunk is the first in the kernel's list of chunks belonging to ++ * a given tiler heap, this function stores the size and address of the @chunk ++ * in the header of the preceding chunk. This requires the GPU memory region ++ * containing the header to be be mapped temporarily, which can fail. ++ * ++ * @heap: Pointer to the tiler heap. ++ * @chunk: Pointer to the heap chunk to be linked. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int link_chunk(struct kbase_csf_tiler_heap *const heap, ++ struct kbase_csf_tiler_heap_chunk *const chunk) ++{ ++ struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap); ++ ++ if (prev) { ++ struct kbase_context *const kctx = heap->kctx; ++ struct kbase_vmap_struct map; ++ u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va, ++ sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map); ++ ++ if (unlikely(!prev_hdr)) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to map tiler heap chunk 0x%llX\n", ++ prev->gpu_va); ++ return -ENOMEM; ++ } ++ ++ *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); ++ kbase_vunmap(kctx, &map); ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", ++ prev->gpu_va, chunk->gpu_va); ++ } ++ ++ return 0; ++} ++ ++/** ++ * init_chunk - Initialize and link a tiler heap chunk ++ * ++ * Zero-initialize a new chunk's header (including its pointer to the next ++ * chunk, which doesn't exist yet) and then update the previous chunk's ++ * header to link the new chunk into the chunk list. ++ * ++ * @heap: Pointer to the tiler heap. ++ * @chunk: Pointer to the heap chunk to be initialized and linked. ++ * @link_with_prev: Flag to indicate if the new chunk needs to be linked with ++ * the previously allocated chunk. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int init_chunk(struct kbase_csf_tiler_heap *const heap, ++ struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) ++{ ++ struct kbase_vmap_struct map; ++ struct u64 *chunk_hdr = NULL; ++ struct kbase_context *const kctx = heap->kctx; ++ ++ if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { ++ dev_err(kctx->kbdev->dev, ++ "Tiler heap chunk address is unusable\n"); ++ return -EINVAL; ++ } ++ ++ chunk_hdr = kbase_vmap_prot(kctx, ++ chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map); ++ ++ if (unlikely(!chunk_hdr)) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to map a tiler heap chunk header\n"); ++ return -ENOMEM; ++ } ++ ++ memset(chunk_hdr, 0, CHUNK_HDR_SIZE); ++ kbase_vunmap(kctx, &map); ++ ++ if (link_with_prev) ++ return link_chunk(heap, chunk); ++ else ++ return 0; ++} ++ ++/** ++ * create_chunk - Create a tiler heap chunk ++ * ++ * This function allocates a chunk of memory for a tiler heap and adds it to ++ * the end of the list of chunks associated with that heap. The size of the ++ * chunk is not a parameter because it is configured per-heap not per-chunk. ++ * ++ * @heap: Pointer to the tiler heap for which to allocate memory. ++ * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be ++ * linked with the previously allocated chunk. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int create_chunk(struct kbase_csf_tiler_heap *const heap, ++ bool link_with_prev) ++{ ++ int err = 0; ++ struct kbase_context *const kctx = heap->kctx; ++ u64 nr_pages = PFN_UP(heap->chunk_size); ++ u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | ++ BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | ++ BASE_MEM_COHERENT_LOCAL; ++ struct kbase_csf_tiler_heap_chunk *chunk = NULL; ++ ++ flags |= base_mem_group_id_set(kctx->jit_group_id); ++ ++#if defined(CONFIG_MALI_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP) ++ flags |= BASE_MEM_PROT_CPU_RD; ++#endif ++ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ ++ chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); ++ if (unlikely(!chunk)) { ++ dev_err(kctx->kbdev->dev, ++ "No kernel memory for a new tiler heap chunk\n"); ++ return -ENOMEM; ++ } ++ ++ /* Allocate GPU memory for the new chunk. */ ++ INIT_LIST_HEAD(&chunk->link); ++ chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, ++ &flags, &chunk->gpu_va); ++ ++ if (unlikely(!chunk->region)) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to allocate a tiler heap chunk\n"); ++ err = -ENOMEM; ++ } else { ++ err = init_chunk(heap, chunk, link_with_prev); ++ if (unlikely(err)) { ++ kbase_gpu_vm_lock(kctx); ++ chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; ++ kbase_mem_free_region(kctx, chunk->region); ++ kbase_gpu_vm_unlock(kctx); ++ } ++ } ++ ++ if (unlikely(err)) { ++ kfree(chunk); ++ } else { ++ list_add_tail(&chunk->link, &heap->chunks_list); ++ heap->chunk_count++; ++ ++ dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", ++ chunk->gpu_va); ++ } ++ ++ return err; ++} ++ ++/** ++ * delete_chunk - Delete a tiler heap chunk ++ * ++ * This function frees a tiler heap chunk previously allocated by @create_chunk ++ * and removes it from the list of chunks associated with the heap. ++ * ++ * WARNING: The deleted chunk is not unlinked from the list of chunks used by ++ * the GPU, therefore it is only safe to use this function when ++ * deleting a heap. ++ * ++ * @heap: Pointer to the tiler heap for which @chunk was allocated. ++ * @chunk: Pointer to a chunk to be deleted. ++ */ ++static void delete_chunk(struct kbase_csf_tiler_heap *const heap, ++ struct kbase_csf_tiler_heap_chunk *const chunk) ++{ ++ struct kbase_context *const kctx = heap->kctx; ++ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ ++ kbase_gpu_vm_lock(kctx); ++ chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; ++ kbase_mem_free_region(kctx, chunk->region); ++ kbase_gpu_vm_unlock(kctx); ++ list_del(&chunk->link); ++ heap->chunk_count--; ++ kfree(chunk); ++} ++ ++/** ++ * delete_all_chunks - Delete all chunks belonging to a tiler heap ++ * ++ * This function empties the list of chunks associated with a tiler heap by ++ * freeing all chunks previously allocated by @create_chunk. ++ * ++ * @heap: Pointer to a tiler heap. ++ */ ++static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) ++{ ++ struct list_head *entry = NULL, *tmp = NULL; ++ struct kbase_context *const kctx = heap->kctx; ++ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ ++ list_for_each_safe(entry, tmp, &heap->chunks_list) { ++ struct kbase_csf_tiler_heap_chunk *chunk = list_entry( ++ entry, struct kbase_csf_tiler_heap_chunk, link); ++ ++ delete_chunk(heap, chunk); ++ } ++} ++ ++/** ++ * create_initial_chunks - Create the initial list of chunks for a tiler heap ++ * ++ * This function allocates a given number of chunks for a tiler heap and ++ * adds them to the list of chunks associated with that heap. ++ * ++ * @heap: Pointer to the tiler heap for which to allocate memory. ++ * @nchunks: Number of chunks to create. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, ++ u32 const nchunks) ++{ ++ int err = 0; ++ u32 i; ++ ++ for (i = 0; (i < nchunks) && likely(!err); i++) ++ err = create_chunk(heap, true); ++ ++ if (unlikely(err)) ++ delete_all_chunks(heap); ++ ++ return err; ++} ++ ++/** ++ * delete_heap - Delete a tiler heap ++ * ++ * This function frees any chunks allocated for a tiler heap previously ++ * initialized by @kbase_csf_tiler_heap_init and removes it from the list of ++ * heaps associated with the kbase context. The heap context structure used by ++ * the firmware is also freed. ++ * ++ * @heap: Pointer to a tiler heap to be deleted. ++ */ ++static void delete_heap(struct kbase_csf_tiler_heap *heap) ++{ ++ struct kbase_context *const kctx = heap->kctx; ++ ++ dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va); ++ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ ++ delete_all_chunks(heap); ++ ++ /* We could optimize context destruction by not freeing leaked heap ++ * contexts but it doesn't seem worth the extra complexity. ++ */ ++ kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, ++ heap->gpu_va); ++ ++ list_del(&heap->link); ++ ++ WARN_ON(heap->chunk_count); ++ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, ++ heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, ++ heap->target_in_flight, 0); ++ ++ kfree(heap); ++} ++ ++/** ++ * find_tiler_heap - Find a tiler heap from the address of its heap context ++ * ++ * Each tiler heap managed by the kernel has an associated heap context ++ * structure used by the firmware. This function finds a tiler heap object from ++ * the GPU virtual address of its associated heap context. The heap context ++ * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the ++ * same @kctx. ++ * ++ * @kctx: Pointer to the kbase context to search for a tiler heap. ++ * @heap_gpu_va: GPU virtual address of a heap context structure. ++ * ++ * Return: pointer to the tiler heap object, or NULL if not found. ++ */ ++static struct kbase_csf_tiler_heap *find_tiler_heap( ++ struct kbase_context *const kctx, u64 const heap_gpu_va) ++{ ++ struct kbase_csf_tiler_heap *heap = NULL; ++ ++ lockdep_assert_held(&kctx->csf.tiler_heaps.lock); ++ ++ list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { ++ if (heap_gpu_va == heap->gpu_va) ++ return heap; ++ } ++ ++ dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", ++ heap_gpu_va); ++ ++ return NULL; ++} ++ ++int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) ++{ ++ int err = kbase_csf_heap_context_allocator_init( ++ &kctx->csf.tiler_heaps.ctx_alloc, kctx); ++ ++ if (unlikely(err)) ++ return err; ++ ++ INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list); ++ mutex_init(&kctx->csf.tiler_heaps.lock); ++ ++ dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n"); ++ ++ return 0; ++} ++ ++void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) ++{ ++ struct list_head *entry = NULL, *tmp = NULL; ++ ++ dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n"); ++ ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ ++ list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) { ++ struct kbase_csf_tiler_heap *heap = list_entry( ++ entry, struct kbase_csf_tiler_heap, link); ++ delete_heap(heap); ++ } ++ ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ mutex_destroy(&kctx->csf.tiler_heaps.lock); ++ ++ kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); ++} ++ ++int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, ++ u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks, ++ u16 const target_in_flight, u64 *const heap_gpu_va, ++ u64 *const first_chunk_va) ++{ ++ int err = 0; ++ struct kbase_csf_tiler_heap *heap = NULL; ++ struct kbase_csf_heap_context_allocator *const ctx_alloc = ++ &kctx->csf.tiler_heaps.ctx_alloc; ++ ++ dev_dbg(kctx->kbdev->dev, ++ "Creating a tiler heap with %u chunks (limit: %u) of size %u\n", ++ initial_chunks, max_chunks, chunk_size); ++ ++ if (chunk_size == 0) ++ return -EINVAL; ++ ++ if (chunk_size & ~CHUNK_SIZE_MASK) ++ return -EINVAL; ++ ++ if (initial_chunks == 0) ++ return -EINVAL; ++ ++ if (initial_chunks > max_chunks) ++ return -EINVAL; ++ ++ if (target_in_flight == 0) ++ return -EINVAL; ++ ++ heap = kzalloc(sizeof(*heap), GFP_KERNEL); ++ if (unlikely(!heap)) { ++ dev_err(kctx->kbdev->dev, ++ "No kernel memory for a new tiler heap\n"); ++ return -ENOMEM; ++ } ++ ++ heap->kctx = kctx; ++ heap->chunk_size = chunk_size; ++ heap->max_chunks = max_chunks; ++ heap->target_in_flight = target_in_flight; ++ INIT_LIST_HEAD(&heap->chunks_list); ++ ++ heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); ++ ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ ++ if (unlikely(!heap->gpu_va)) { ++ dev_err(kctx->kbdev->dev, ++ "Failed to allocate a tiler heap context\n"); ++ err = -ENOMEM; ++ } else { ++ err = create_initial_chunks(heap, initial_chunks); ++ if (unlikely(err)) { ++ kbase_csf_heap_context_allocator_free(ctx_alloc, ++ heap->gpu_va); ++ } ++ } ++ ++ if (unlikely(err)) { ++ kfree(heap); ++ } else { ++ struct kbase_csf_tiler_heap_chunk const *first_chunk = ++ list_first_entry(&heap->chunks_list, ++ struct kbase_csf_tiler_heap_chunk, link); ++ ++ kctx->csf.tiler_heaps.nr_of_heaps++; ++ heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; ++ list_add(&heap->link, &kctx->csf.tiler_heaps.list); ++ ++ *heap_gpu_va = heap->gpu_va; ++ *first_chunk_va = first_chunk->gpu_va; ++ ++ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( ++ kctx->kbdev, kctx->id, heap->heap_id, ++ PFN_UP(heap->chunk_size * heap->max_chunks), ++ PFN_UP(heap->chunk_size * heap->chunk_count), ++ heap->max_chunks, heap->chunk_size, heap->chunk_count, ++ heap->target_in_flight, 0); ++ ++ dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", ++ heap->gpu_va); ++ } ++ ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ ++ return err; ++} ++ ++int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, ++ u64 const heap_gpu_va) ++{ ++ int err = 0; ++ struct kbase_csf_tiler_heap *heap = NULL; ++ ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ ++ heap = find_tiler_heap(kctx, heap_gpu_va); ++ if (likely(heap)) ++ delete_heap(heap); ++ else ++ err = -EINVAL; ++ ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ ++ return err; ++} ++ ++/** ++ * alloc_new_chunk - Allocate a new chunk for the tiler heap. ++ * ++ * This function will allocate a new chunk for the chunked tiler heap depending ++ * on the settings provided by userspace when the heap was created and the ++ * heap's statistics (like number of render passes in-flight). ++ * ++ * @heap: Pointer to the tiler heap. ++ * @nr_in_flight: Number of render passes that are in-flight, must not be zero. ++ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. ++ * The minimum value is zero but it must be less or equal to ++ * the total number of render passes in flight ++ * @new_chunk_ptr: Where to store the GPU virtual address & size of the new ++ * chunk allocated for the heap. ++ * ++ * Return: 0 if a new chunk was allocated otherwise an appropriate negative ++ * error code. ++ */ ++static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap, ++ u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) ++{ ++ int err = -ENOMEM; ++ ++ lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); ++ ++ if (WARN_ON(!nr_in_flight) || ++ WARN_ON(pending_frag_count > nr_in_flight)) ++ return -EINVAL; ++ ++ if (nr_in_flight <= heap->target_in_flight) { ++ if (heap->chunk_count < heap->max_chunks) { ++ /* Not exceeded the target number of render passes yet so be ++ * generous with memory. ++ */ ++ err = create_chunk(heap, false); ++ ++ if (likely(!err)) { ++ struct kbase_csf_tiler_heap_chunk *new_chunk = ++ get_last_chunk(heap); ++ if (!WARN_ON(!new_chunk)) { ++ *new_chunk_ptr = ++ encode_chunk_ptr(heap->chunk_size, ++ new_chunk->gpu_va); ++ return 0; ++ } ++ } ++ } else if (pending_frag_count > 0) { ++ err = -EBUSY; ++ } else { ++ err = -ENOMEM; ++ } ++ } else { ++ /* Reached target number of render passes in flight. ++ * Wait for some of them to finish ++ */ ++ err = -EBUSY; ++ } ++ ++ return err; ++} ++ ++int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, ++ u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) ++{ ++ struct kbase_csf_tiler_heap *heap; ++ int err = -EINVAL; ++ ++ mutex_lock(&kctx->csf.tiler_heaps.lock); ++ ++ heap = find_tiler_heap(kctx, gpu_heap_va); ++ ++ if (likely(heap)) { ++ err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count, ++ new_chunk_ptr); ++ ++ KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( ++ kctx->kbdev, kctx->id, heap->heap_id, ++ PFN_UP(heap->chunk_size * heap->max_chunks), ++ PFN_UP(heap->chunk_size * heap->chunk_count), ++ heap->max_chunks, heap->chunk_size, heap->chunk_count, ++ heap->target_in_flight, nr_in_flight); ++ } ++ ++ mutex_unlock(&kctx->csf.tiler_heaps.lock); ++ ++ return err; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h +new file mode 100644 +index 0000000..04c27f7 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h +@@ -0,0 +1,115 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_TILER_HEAP_H_ ++#define _KBASE_CSF_TILER_HEAP_H_ ++ ++#include ++ ++/** ++ * kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a ++ * GPU address space ++ * ++ * @kctx: Pointer to the kbase context being initialized. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a ++ * GPU address space ++ * ++ * This function deletes any chunked tiler heaps that weren't deleted before ++ * context termination. ++ * ++ * @kctx: Pointer to the kbase context being terminated. ++ */ ++void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); ++ ++/** ++ * kbase_csf_tiler_heap_init - Initialize a chunked tiler memory heap. ++ * ++ * @kctx: Pointer to the kbase context in which to allocate resources for the ++ * tiler heap. ++ * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. ++ * @initial_chunks: The initial number of chunks to allocate. Must not be ++ * zero or greater than @max_chunks. ++ * @max_chunks: The maximum number of chunks that the heap should be allowed ++ * to use. Must not be less than @initial_chunks. ++ * @target_in_flight: Number of render-passes that the driver should attempt to ++ * keep in flight for which allocation of new chunks is ++ * allowed. Must not be zero. ++ * @gpu_heap_va: Where to store the GPU virtual address of the context that was ++ * set up for the tiler heap. ++ * @first_chunk_va: Where to store the GPU virtual address of the first chunk ++ * allocated for the heap. This points to the header of the ++ * heap chunk and not to the low address of free memory in it. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_tiler_heap_init(struct kbase_context *kctx, ++ u32 chunk_size, u32 initial_chunks, u32 max_chunks, ++ u16 target_in_flight, u64 *gpu_heap_va, ++ u64 *first_chunk_va); ++ ++/** ++ * kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap. ++ * ++ * This function will terminate a chunked tiler heap and cause all the chunks ++ * (initial and those added during out-of-memory processing) to be freed. ++ * It is the caller's responsibility to ensure no further operations on this ++ * heap will happen before calling this function. ++ * ++ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. ++ * @gpu_heap_va: The GPU virtual address of the context that was set up for the ++ * tiler heap. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); ++ ++/** ++ * kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap. ++ * ++ * This function will allocate a new chunk for the chunked tiler heap depending ++ * on the settings provided by userspace when the heap was created and the ++ * heap's statistics (like number of render passes in-flight). ++ * It would return an appropriate error code if a new chunk couldn't be ++ * allocated. ++ * ++ * @kctx: Pointer to the kbase context in which the tiler heap was initialized. ++ * @gpu_heap_va: GPU virtual address of the heap context. ++ * @nr_in_flight: Number of render passes that are in-flight, must not be zero. ++ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. ++ * The minimum value is zero but it must be less or equal to ++ * the total number of render passes in flight ++ * @new_chunk_ptr: Where to store the GPU virtual address & size of the new ++ * chunk allocated for the heap. ++ * ++ * Return: 0 if a new chunk was allocated otherwise an appropriate negative ++ * error code (like -EBUSY when a free chunk is expected to be ++ * available upon completion of a render pass and -EINVAL when ++ * invalid value was passed for one of the argument). ++ */ ++int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, ++ u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); ++#endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c +new file mode 100644 +index 0000000..f46beed +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c +@@ -0,0 +1,106 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_csf_tiler_heap_debugfs.h" ++#include "mali_kbase_csf_tiler_heap_def.h" ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++/** ++ * kbasep_csf_tiler_heap_debugfs_show() - Print tiler heap information for per context ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_context *kctx = file->private; ++ struct kbase_csf_tiler_heap_context *tiler_heaps_p = &kctx->csf.tiler_heaps; ++ struct kbase_csf_tiler_heap *heap; ++ struct kbase_csf_tiler_heap_chunk *chunk; ++ ++ seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); ++ ++ mutex_lock(&tiler_heaps_p->lock); ++ ++ list_for_each_entry(heap, &tiler_heaps_p->list, link) { ++ if (heap->kctx != kctx) ++ continue; ++ ++ seq_printf(file, "HEAP(gpu_va = 0x%llx):\n", heap->gpu_va); ++ seq_printf(file, "\tchunk_size = %u\n", heap->chunk_size); ++ seq_printf(file, "\tchunk_count = %u\n", heap->chunk_count); ++ seq_printf(file, "\tmax_chunks = %u\n", heap->max_chunks); ++ seq_printf(file, "\ttarget_in_flight = %u\n", heap->target_in_flight); ++ ++ list_for_each_entry(chunk, &heap->chunks_list, link) ++ seq_printf(file, "\t\tchunk gpu_va = 0x%llx\n", ++ chunk->gpu_va); ++ } ++ ++ mutex_unlock(&tiler_heaps_p->lock); ++ ++ return 0; ++} ++ ++static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file) ++{ ++ return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private); ++} ++ ++static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = { ++ .open = kbasep_csf_tiler_heap_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) ++{ ++ struct dentry *file; ++ ++ if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) ++ return; ++ ++ file = debugfs_create_file("tiler_heaps", 0444, kctx->kctx_dentry, ++ kctx, &kbasep_csf_tiler_heap_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kctx->kbdev->dev, ++ "Unable to create tiler heap debugfs entry"); ++ } ++} ++ ++ ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h +new file mode 100644 +index 0000000..92ae91a +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h +@@ -0,0 +1,37 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ ++#define _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ ++ ++/* Forward declaration */ ++struct kbase_context; ++ ++#define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0 ++ ++/** ++ * kbase_csf_tiler_heap_debugfs_init() - Create a debugfs entry for per context tiler heap ++ * ++ * @kctx: The kbase_context for which to create the debugfs entry ++ */ ++void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx); ++ ++#endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h +new file mode 100644 +index 0000000..fb439cf +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h +@@ -0,0 +1,114 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_TILER_HEAP_DEF_H_ ++#define _KBASE_CSF_TILER_HEAP_DEF_H_ ++ ++#include ++ ++/* Size of a tiler heap chunk header, in bytes. */ ++#define CHUNK_HDR_SIZE ((size_t)64) ++ ++/* Bit-position of the next chunk's size when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_SIZE_POS (0) ++ ++/* Bit-position of the next chunk's address when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_ADDR_POS (12) ++ ++/* Bitmask of the next chunk's size when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_SIZE_MASK (((u64)1 << CHUNK_HDR_NEXT_ADDR_POS) - 1u) ++ ++/* Bitmask of the address of the next chunk when stored in a chunk header. */ ++#define CHUNK_HDR_NEXT_ADDR_MASK (~CHUNK_HDR_NEXT_SIZE_MASK) ++ ++/* Right-shift before storing the next chunk's size in a chunk header. */ ++#define CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT (12) ++ ++/* Right-shift before storing the next chunk's address in a chunk header. */ ++#define CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT (12) ++ ++/* Bitmask of valid chunk sizes. This is also the maximum chunk size, in bytes. ++ */ ++#define CHUNK_SIZE_MASK \ ++ ((CHUNK_HDR_NEXT_SIZE_MASK >> CHUNK_HDR_NEXT_SIZE_POS) << \ ++ CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) ++ ++/* Bitmask of valid chunk addresses. This is also the highest address. */ ++#define CHUNK_ADDR_MASK \ ++ ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ ++ CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) ++ ++/** ++ * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel ++ * ++ * Chunks are allocated upon initialization of a tiler heap or in response to ++ * out-of-memory events from the firmware. Chunks are always fully backed by ++ * physical memory to avoid the overhead of processing GPU page faults. The ++ * allocated GPU memory regions are linked together independent of the list of ++ * kernel objects of this type. ++ * ++ * @link: Link to this chunk in a list of chunks belonging to a ++ * @kbase_csf_tiler_heap. ++ * @region: Pointer to the GPU memory region allocated for the chunk. ++ * @gpu_va: GPU virtual address of the start of the memory region. ++ * This points to the header of the chunk and not to the low address ++ * of free memory within it. ++ */ ++struct kbase_csf_tiler_heap_chunk { ++ struct list_head link; ++ struct kbase_va_region *region; ++ u64 gpu_va; ++}; ++ ++/** ++ * struct kbase_csf_tiler_heap - A tiler heap managed by the kernel ++ * ++ * @kctx: Pointer to the kbase context with which this heap is ++ * associated. ++ * @link: Link to this heap in a list of tiler heaps belonging to ++ * the @kbase_csf_tiler_heap_context. ++ * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. ++ * @chunk_count: The number of chunks currently allocated. Must not be ++ * zero or greater than @max_chunks. ++ * @max_chunks: The maximum number of chunks that the heap should be ++ * allowed to use. Must not be less than @chunk_count. ++ * @target_in_flight: Number of render-passes that the driver should attempt ++ * to keep in flight for which allocation of new chunks is ++ * allowed. Must not be zero. ++ * @gpu_va: The GPU virtual address of the heap context structure that ++ * was allocated for the firmware. This is also used to ++ * uniquely identify the heap. ++ * @heap_id: Unique id representing the heap, assigned during heap ++ * initialization. ++ * @chunks_list: Linked list of allocated chunks. ++ */ ++struct kbase_csf_tiler_heap { ++ struct kbase_context *kctx; ++ struct list_head link; ++ u32 chunk_size; ++ u32 chunk_count; ++ u32 max_chunks; ++ u16 target_in_flight; ++ u64 gpu_va; ++ u64 heap_id; ++ struct list_head chunks_list; ++}; ++#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c +new file mode 100644 +index 0000000..4d93fe5 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c +@@ -0,0 +1,178 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "mali_kbase.h" ++#include "mali_kbase_config_defaults.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_timeout.h" ++#include "mali_kbase_reset_gpu.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++ ++/** ++ * set_timeout - set a new global progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * @timeout: the maximum number of GPU cycles without forward progress to allow ++ * to elapse before terminating a GPU command queue group. ++ * ++ * Return: 0 on success, or negative on failure ++ * (e.g. -ERANGE if the requested timeout is too large). ++ */ ++static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) ++{ ++ if (timeout > GLB_PROGRESS_TIMER_TIMEOUT_MAX) { ++ dev_err(kbdev->dev, "Timeout %llu is too large.\n", timeout); ++ return -ERANGE; ++ } ++ ++ dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout); ++ ++ atomic64_set(&kbdev->csf.progress_timeout, timeout); ++ ++ return 0; ++} ++ ++/** ++ * progress_timeout_store - Store the progress_timeout device attribute. ++ * @dev: The device that has the attribute. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called when the progress_timeout sysfs file is written to. ++ * It checks the data written, and if valid updates the progress timeout value. ++ * The function also checks gpu reset status, if the gpu is in reset process, ++ * the function will return an error code (-EBUSY), and no change for timeout ++ * value. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t progress_timeout_store(struct device * const dev, ++ struct device_attribute * const attr, const char * const buf, ++ size_t const count) ++{ ++ struct kbase_device *const kbdev = dev_get_drvdata(dev); ++ int err; ++ u64 timeout; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ err = kbase_reset_gpu_try_prevent(kbdev); ++ if (err) { ++ dev_warn(kbdev->dev, ++ "Couldn't process progress_timeout write operation for GPU reset.\n"); ++ return -EBUSY; ++ } ++ ++ err = kstrtou64(buf, 0, &timeout); ++ if (err) ++ dev_err(kbdev->dev, ++ "Couldn't process progress_timeout write operation.\n" ++ "Use format \n"); ++ else ++ err = set_timeout(kbdev, timeout); ++ ++ if (!err) { ++ kbase_csf_scheduler_pm_active(kbdev); ++ ++ err = kbase_pm_wait_for_desired_state(kbdev); ++ if (!err) ++ err = kbase_csf_firmware_set_timeout(kbdev, timeout); ++ ++ kbase_csf_scheduler_pm_idle(kbdev); ++ } ++ ++ kbase_reset_gpu_allow(kbdev); ++ if (err) ++ return err; ++ ++ return count; ++} ++ ++/** ++ * progress_timeout_show - Show the progress_timeout device attribute. ++ * @dev: The device that has the attribute. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the global timeout. ++ * ++ * This function is called to get the progress timeout value. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t progress_timeout_show(struct device * const dev, ++ struct device_attribute * const attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = dev_get_drvdata(dev); ++ int err; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ err = scnprintf(buf, PAGE_SIZE, "%llu\n", kbase_csf_timeout_get(kbdev)); ++ ++ return err; ++ ++} ++ ++static DEVICE_ATTR(progress_timeout, 0644, progress_timeout_show, ++ progress_timeout_store); ++ ++int kbase_csf_timeout_init(struct kbase_device *const kbdev) ++{ ++ u64 timeout = DEFAULT_PROGRESS_TIMEOUT; ++ int err; ++ ++#if IS_ENABLED(CONFIG_OF) ++ err = of_property_read_u64(kbdev->dev->of_node, ++ "progress_timeout", &timeout); ++ if (!err) ++ dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", ++ timeout); ++#endif ++ ++ err = set_timeout(kbdev, timeout); ++ if (err) ++ return err; ++ ++ err = sysfs_create_file(&kbdev->dev->kobj, ++ &dev_attr_progress_timeout.attr); ++ if (err) ++ dev_err(kbdev->dev, "SysFS file creation failed\n"); ++ ++ return err; ++} ++ ++void kbase_csf_timeout_term(struct kbase_device * const kbdev) ++{ ++ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_progress_timeout.attr); ++} ++ ++u64 kbase_csf_timeout_get(struct kbase_device *const kbdev) ++{ ++ return atomic64_read(&kbdev->csf.progress_timeout); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h +new file mode 100644 +index 0000000..b406eaa +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h +@@ -0,0 +1,66 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_TIMEOUT_H_ ++#define _KBASE_CSF_TIMEOUT_H_ ++ ++struct kbase_device; ++ ++/** ++ * kbase_csf_timeout_init - Initialize the progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * Must be zero-initialized. ++ * ++ * The progress timeout is the number of GPU clock cycles allowed to elapse ++ * before the driver terminates a GPU command queue group in which a task is ++ * making no forward progress on an endpoint (e.g. a shader core). This function ++ * determines the initial value and also creates a sysfs file to allow the ++ * timeout to be reconfigured later. ++ * ++ * Reconfigures the global firmware interface to enable the current timeout. ++ * ++ * Return: 0 on success, or negative on failure. ++ */ ++int kbase_csf_timeout_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_timeout_term - Terminate the progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Removes the sysfs file which allowed the timeout to be reconfigured. ++ * Does nothing if called on a zero-initialized object. ++ */ ++void kbase_csf_timeout_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_timeout_get - get the current global progress timeout. ++ * ++ * @kbdev: Instance of a GPU platform device that implements a CSF interface. ++ * ++ * Return: the maximum number of GPU cycles that is allowed to elapse without ++ * forward progress before the driver terminates a GPU command queue ++ * group. ++ */ ++u64 kbase_csf_timeout_get(struct kbase_device *const kbdev); ++ ++#endif /* _KBASE_CSF_TIMEOUT_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c +new file mode 100644 +index 0000000..1824c2d +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c +@@ -0,0 +1,534 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_csf_tl_reader.h" ++ ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_reset_gpu.h" ++ ++#include "tl/mali_kbase_tlstream.h" ++#include "tl/mali_kbase_tl_serialize.h" ++#include "tl/mali_kbase_tracepoints.h" ++ ++#include "mali_kbase_pm.h" ++#include "mali_kbase_hwaccess_time.h" ++ ++#include ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#include "tl/mali_kbase_timeline_priv.h" ++#include ++ ++#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) ++#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE ++#endif ++#endif ++ ++/* Name of the CSFFW timeline tracebuffer. */ ++#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline" ++/* Name of the timeline header metatadata */ ++#define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header" ++ ++/** ++ * struct kbase_csffw_tl_message - CSFFW timeline message. ++ * ++ * @msg_id: Message ID. ++ * @timestamp: Timestamp of the event. ++ * @cycle_counter: Cycle number of the event. ++ * ++ * Contain fields that are common for all CSFFW timeline messages. ++ */ ++struct kbase_csffw_tl_message { ++ u32 msg_id; ++ u64 timestamp; ++ u64 cycle_counter; ++} __packed __aligned(4); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++static int kbase_csf_tl_debugfs_poll_interval_read(void *data, u64 *val) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; ++ ++ *val = self->timer_interval; ++ ++ return 0; ++} ++ ++static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; ++ ++ if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) { ++ return -EINVAL; ++ } ++ ++ self->timer_interval = (u32)val; ++ ++ return 0; ++} ++ ++DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, ++ kbase_csf_tl_debugfs_poll_interval_read, ++ kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); ++ ++ ++void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("csf_tl_poll_interval_in_ms", S_IRUGO | S_IWUSR, ++ kbdev->debugfs_instr_directory, kbdev, ++ &kbase_csf_tl_poll_interval_fops); ++ ++} ++#endif ++ ++/** ++ * get_cpu_gpu_time() - Get current CPU and GPU timestamps. ++ * ++ * @kbdev: Kbase device. ++ * @cpu_ts: Output CPU timestamp. ++ * @gpu_ts: Output GPU timestamp. ++ * @gpu_cycle: Output GPU cycle counts. ++ */ ++static void get_cpu_gpu_time( ++ struct kbase_device *kbdev, ++ u64 *cpu_ts, ++ u64 *gpu_ts, ++ u64 *gpu_cycle) ++{ ++ struct timespec64 ts; ++ ++ kbase_pm_context_active(kbdev); ++ kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); ++ kbase_pm_context_idle(kbdev); ++ ++ if (cpu_ts) ++ *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; ++} ++ ++ ++/** ++ * kbase_ts_converter_init() - Initialize system timestamp converter. ++ * ++ * @self: System Timestamp Converter instance. ++ * @kbdev: Kbase device pointer ++ * ++ * Return: Zero on success, -1 otherwise. ++ */ ++static int kbase_ts_converter_init( ++ struct kbase_ts_converter *self, ++ struct kbase_device *kbdev) ++{ ++ u64 cpu_ts = 0; ++ u64 gpu_ts = 0; ++ u64 freq; ++ u64 common_factor; ++ ++ get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); ++ freq = arch_timer_get_cntfrq(); ++ ++ if (!freq) { ++ dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); ++ return -1; ++ } ++ ++ common_factor = gcd(NSEC_PER_SEC, freq); ++ ++ self->multiplier = div64_u64(NSEC_PER_SEC, common_factor); ++ self->divisor = div64_u64(freq, common_factor); ++ self->offset = ++ cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor); ++ ++ return 0; ++} ++ ++/** ++ * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp. ++ * ++ * @self: System Timestamp Converter instance. ++ * @gpu_ts: System timestamp value to converter. ++ * ++ * Return: The CPU timestamp. ++ */ ++static void kbase_ts_converter_convert( ++ const struct kbase_ts_converter *self, ++ u64 *gpu_ts) ++{ ++ u64 old_gpu_ts = *gpu_ts; ++ *gpu_ts = div64_u64(old_gpu_ts * self->multiplier, ++ self->divisor) + self->offset; ++} ++ ++/** ++ * tl_reader_overflow_notify() - Emit stream overflow tracepoint. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @msg_buf_start: Start of the message. ++ * @msg_buf_end: End of the message buffer. ++ */ ++static void tl_reader_overflow_notify( ++ const struct kbase_csf_tl_reader *self, ++ u8 *const msg_buf_start, ++ u8 *const msg_buf_end) ++{ ++ struct kbase_device *kbdev = self->kbdev; ++ struct kbase_csffw_tl_message message = {0}; ++ ++ /* Reuse the timestamp and cycle count from current event if possible */ ++ if (msg_buf_start + sizeof(message) <= msg_buf_end) ++ memcpy(&message, msg_buf_start, sizeof(message)); ++ ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( ++ kbdev, message.timestamp, message.cycle_counter); ++} ++ ++/** ++ * tl_reader_overflow_check() - Check if an overflow has happened ++ * ++ * @self: CSFFW TL Reader instance. ++ * @event_id: Incoming event id. ++ * ++ * Return: True, if an overflow has happened, False otherwise. ++ */ ++static bool tl_reader_overflow_check( ++ struct kbase_csf_tl_reader *self, ++ u16 event_id) ++{ ++ struct kbase_device *kbdev = self->kbdev; ++ bool has_overflow = false; ++ ++ /* 0 is a special event_id and reserved for the very first tracepoint ++ * after reset, we should skip overflow check when reset happened. ++ */ ++ if (event_id != 0) { ++ has_overflow = self->got_first_event ++ && self->expected_event_id != event_id; ++ ++ if (has_overflow) ++ dev_warn(kbdev->dev, ++ "CSFFW overflow, event_id: %u, expected: %u.", ++ event_id, self->expected_event_id); ++ } ++ ++ self->got_first_event = true; ++ self->expected_event_id = event_id + 1; ++ /* When event_id reaches its max value, it skips 0 and wraps to 1. */ ++ if (self->expected_event_id == 0) ++ self->expected_event_id++; ++ ++ return has_overflow; ++} ++ ++/** ++ * tl_reader_reset() - Reset timeline tracebuffer reader state machine. ++ * ++ * @self: CSFFW TL Reader instance. ++ * ++ * Reset the reader to the default state, i.e. set all the ++ * mutable fields to zero. ++ */ ++static void tl_reader_reset(struct kbase_csf_tl_reader *self) ++{ ++ self->got_first_event = false; ++ self->is_active = false; ++ self->expected_event_id = 0; ++ self->tl_header.btc = 0; ++} ++ ++int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) ++{ ++ int ret = 0; ++ struct kbase_device *kbdev = self->kbdev; ++ struct kbase_tlstream *stream = self->stream; ++ ++ u8 *read_buffer = self->read_buffer; ++ const size_t read_buffer_size = sizeof(self->read_buffer); ++ ++ u32 bytes_read; ++ u8 *csffw_data_begin; ++ u8 *csffw_data_end; ++ u8 *csffw_data_it; ++ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&self->read_lock, flags); ++ ++ /* If not running, early exit. */ ++ if (!self->is_active) { ++ spin_unlock_irqrestore(&self->read_lock, flags); ++ return -EBUSY; ++ } ++ ++ /* Copying the whole buffer in a single shot. We assume ++ * that the buffer will not contain partially written messages. ++ */ ++ bytes_read = kbase_csf_firmware_trace_buffer_read_data( ++ self->trace_buffer, read_buffer, read_buffer_size); ++ csffw_data_begin = read_buffer; ++ csffw_data_end = read_buffer + bytes_read; ++ ++ for (csffw_data_it = csffw_data_begin; ++ csffw_data_it < csffw_data_end;) { ++ u32 event_header; ++ u16 event_id; ++ u16 event_size; ++ unsigned long acq_flags; ++ char *buffer; ++ ++ /* Can we safely read event_id? */ ++ if (csffw_data_it + sizeof(event_header) > csffw_data_end) { ++ dev_warn( ++ kbdev->dev, ++ "Unable to parse CSFFW tracebuffer event header."); ++ ret = -EBUSY; ++ break; ++ } ++ ++ /* Read and parse the event header. */ ++ memcpy(&event_header, csffw_data_it, sizeof(event_header)); ++ event_id = (event_header >> 0) & 0xFFFF; ++ event_size = (event_header >> 16) & 0xFFFF; ++ csffw_data_it += sizeof(event_header); ++ ++ /* Detect if an overflow has happened. */ ++ if (tl_reader_overflow_check(self, event_id)) ++ tl_reader_overflow_notify(self, ++ csffw_data_it, ++ csffw_data_end); ++ ++ /* Can we safely read the message body? */ ++ if (csffw_data_it + event_size > csffw_data_end) { ++ dev_warn(kbdev->dev, ++ "event_id: %u, can't read with event_size: %u.", ++ event_id, event_size); ++ ret = -EBUSY; ++ break; ++ } ++ ++ /* Convert GPU timestamp to CPU timestamp. */ ++ { ++ struct kbase_csffw_tl_message *msg = ++ (struct kbase_csffw_tl_message *) csffw_data_it; ++ kbase_ts_converter_convert( ++ &self->ts_converter, ++ &msg->timestamp); ++ } ++ ++ /* Copy the message out to the tl_stream. */ ++ buffer = kbase_tlstream_msgbuf_acquire( ++ stream, event_size, &acq_flags); ++ kbasep_serialize_bytes(buffer, 0, csffw_data_it, event_size); ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++ csffw_data_it += event_size; ++ } ++ ++ spin_unlock_irqrestore(&self->read_lock, flags); ++ return ret; ++} ++ ++static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer) ++{ ++ struct kbase_csf_tl_reader *self = ++ container_of(timer, struct kbase_csf_tl_reader, read_timer); ++ ++ int rcode; ++ ++ kbase_csf_tl_reader_flush_buffer(self); ++ ++ rcode = mod_timer(&self->read_timer, ++ jiffies + msecs_to_jiffies(self->timer_interval)); ++ ++ CSTD_UNUSED(rcode); ++} ++ ++/** ++ * tl_reader_init_late() - Late CSFFW TL Reader initialization. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @kbdev: Kbase device. ++ * ++ * Late initialization is done once at kbase_csf_tl_reader_start() time. ++ * This is because the firmware image is not parsed ++ * by the kbase_csf_tl_reader_init() time. ++ * ++ * Return: Zero on success, -1 otherwise. ++ */ ++static int tl_reader_init_late( ++ struct kbase_csf_tl_reader *self, ++ struct kbase_device *kbdev) ++{ ++ struct firmware_trace_buffer *tb; ++ size_t hdr_size = 0; ++ const char *hdr = NULL; ++ ++ if (self->kbdev) ++ return 0; ++ ++ tb = kbase_csf_firmware_get_trace_buffer( ++ kbdev, KBASE_CSFFW_TRACEBUFFER_NAME); ++ hdr = kbase_csf_firmware_get_timeline_metadata( ++ kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size); ++ ++ if (!tb) { ++ dev_warn( ++ kbdev->dev, ++ "'%s' tracebuffer is not present in the firmware image.", ++ KBASE_CSFFW_TRACEBUFFER_NAME); ++ return -1; ++ } ++ ++ if (!hdr) { ++ dev_warn( ++ kbdev->dev, ++ "'%s' timeline metadata is not present in the firmware image.", ++ KBASE_CSFFW_TIMELINE_HEADER_NAME); ++ return -1; ++ } ++ ++ if (kbase_ts_converter_init(&self->ts_converter, kbdev)) { ++ return -1; ++ } ++ ++ self->kbdev = kbdev; ++ self->trace_buffer = tb; ++ self->tl_header.data = hdr; ++ self->tl_header.size = hdr_size; ++ ++ return 0; ++} ++ ++/** ++ * tl_reader_update_enable_bit() - Update the first bit of a CSFFW tracebuffer. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @value: The value to set. ++ * ++ * Update the first bit of a CSFFW tracebufer and then reset the GPU. ++ * This is to make these changes visible to the MCU. ++ * ++ * Return: 0 on success, or negative error code for failure. ++ */ ++static int tl_reader_update_enable_bit( ++ struct kbase_csf_tl_reader *self, ++ bool value) ++{ ++ int err = 0; ++ ++ err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit( ++ self->trace_buffer, 0, value); ++ ++ return err; ++} ++ ++void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, ++ struct kbase_tlstream *stream) ++{ ++ self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; ++ ++ kbase_timer_setup(&self->read_timer, ++ kbasep_csf_tl_reader_read_callback); ++ ++ self->stream = stream; ++ ++ /* This will be initialized by tl_reader_init_late() */ ++ self->kbdev = NULL; ++ self->trace_buffer = NULL; ++ self->tl_header.data = NULL; ++ self->tl_header.size = 0; ++ ++ spin_lock_init(&self->read_lock); ++ ++ tl_reader_reset(self); ++} ++ ++void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) ++{ ++ del_timer_sync(&self->read_timer); ++} ++ ++int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, ++ struct kbase_device *kbdev) ++{ ++ int rcode; ++ ++ /* If already running, early exit. */ ++ if (self->is_active) ++ return 0; ++ ++ if (tl_reader_init_late(self, kbdev)) { ++ return -EINVAL; ++ } ++ ++ tl_reader_reset(self); ++ ++ self->is_active = true; ++ /* Set bytes to copy to the header size. This is to trigger copying ++ * of the header to the user space. ++ */ ++ self->tl_header.btc = self->tl_header.size; ++ ++ /* Enable the tracebuffer on the CSFFW side. */ ++ rcode = tl_reader_update_enable_bit(self, true); ++ if (rcode != 0) ++ return rcode; ++ ++ rcode = mod_timer(&self->read_timer, ++ jiffies + msecs_to_jiffies(self->timer_interval)); ++ ++ return 0; ++} ++ ++void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self) ++{ ++ unsigned long flags; ++ ++ /* If is not running, early exit. */ ++ if (!self->is_active) ++ return; ++ ++ /* Disable the tracebuffer on the CSFFW side. */ ++ tl_reader_update_enable_bit(self, false); ++ ++ del_timer_sync(&self->read_timer); ++ ++ spin_lock_irqsave(&self->read_lock, flags); ++ ++ tl_reader_reset(self); ++ ++ spin_unlock_irqrestore(&self->read_lock, flags); ++} ++ ++void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self) ++{ ++ u64 gpu_cycle = 0; ++ struct kbase_device *kbdev = self->kbdev; ++ ++ if (!kbdev) ++ return; ++ ++ kbase_csf_tl_reader_flush_buffer(self); ++ ++ get_cpu_gpu_time(kbdev, NULL, NULL, &gpu_cycle); ++ KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET(kbdev, gpu_cycle); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h +new file mode 100644 +index 0000000..1b0fcd7 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h +@@ -0,0 +1,185 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSFFW_TL_READER_H_ ++#define _KBASE_CSFFW_TL_READER_H_ ++ ++#include ++#include ++#include ++ ++/* The number of pages used for CSFFW trace buffer. Can be tweaked. */ ++#define KBASE_CSF_TL_BUFFER_NR_PAGES 128 ++/* CSFFW Timeline read polling minimum period in milliseconds. */ ++#define KBASE_CSF_TL_READ_INTERVAL_MIN 20 ++/* CSFFW Timeline read polling default period in milliseconds. */ ++#define KBASE_CSF_TL_READ_INTERVAL_DEFAULT 200 ++/* CSFFW Timeline read polling maximum period in milliseconds. */ ++#define KBASE_CSF_TL_READ_INTERVAL_MAX (60*1000) ++ ++struct firmware_trace_buffer; ++struct kbase_tlstream; ++struct kbase_device; ++ ++/** ++ * struct kbase_ts_converter - ++ * System timestamp to CPU timestamp converter state. ++ * ++ * @multiplier: Numerator of the converter's fraction. ++ * @divisor: Denominator of the converter's fraction. ++ * @offset: Converter's offset term. ++ * ++ * According to Generic timer spec, system timer: ++ * - Increments at a fixed frequency ++ * - Starts operating from zero ++ * ++ * Hence CPU time is a linear function of System Time. ++ * ++ * CPU_ts = alpha * SYS_ts + beta ++ * ++ * Where ++ * - alpha = 10^9/SYS_ts_freq ++ * - beta is calculated by two timer samples taken at the same time: ++ * beta = CPU_ts_s - SYS_ts_s * alpha ++ * ++ * Since alpha is a rational number, we minimizing possible ++ * rounding error by simplifying the ratio. Thus alpha is stored ++ * as a simple `multiplier / divisor` ratio. ++ * ++ */ ++struct kbase_ts_converter { ++ u64 multiplier; ++ u64 divisor; ++ s64 offset; ++}; ++ ++/** ++ * struct kbase_csf_tl_reader - CSFFW timeline reader state. ++ * ++ * @read_timer: Timer used for periodical tracebufer reading. ++ * @timer_interval: Timer polling period in milliseconds. ++ * @stream: Timeline stream where to the tracebuffer content ++ * is copied. ++ * @kbdev: KBase device. ++ * @trace_buffer: CSF Firmware timeline tracebuffer. ++ * @tl_header: CSFFW Timeline header ++ * @tl_header.data: CSFFW Timeline header content. ++ * @tl_header.size: CSFFW Timeline header size. ++ * @tl_header.btc: CSFFW Timeline header remaining bytes to copy to ++ * the user space. ++ * @ts_converter: Timestamp converter state. ++ * @got_first_event: True, if a CSFFW timelime session has been enabled ++ * and the first event was received. ++ * @is_active: True, if a CSFFW timelime session has been enabled. ++ * @expected_event_id: The last 16 bit event ID received from CSFFW. It ++ * is only valid when got_first_event is true. ++ * @read_buffer: Temporary buffer used for CSFFW timeline data ++ * reading from the tracebufer. ++ * @read_lock: CSFFW timeline reader lock. ++ */ ++struct kbase_csf_tl_reader { ++ struct timer_list read_timer; ++ u32 timer_interval; ++ struct kbase_tlstream *stream; ++ ++ struct kbase_device *kbdev; ++ struct firmware_trace_buffer *trace_buffer; ++ struct { ++ const char *data; ++ size_t size; ++ size_t btc; ++ } tl_header; ++ struct kbase_ts_converter ts_converter; ++ ++ bool got_first_event; ++ bool is_active; ++ u16 expected_event_id; ++ ++ u8 read_buffer[PAGE_SIZE * KBASE_CSF_TL_BUFFER_NR_PAGES]; ++ spinlock_t read_lock; ++}; ++ ++/** ++ * kbase_csf_tl_reader_init() - Initialize CSFFW Timelime Stream Reader. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @stream: Destination timeline stream. ++ */ ++void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, ++ struct kbase_tlstream *stream); ++ ++/** ++ * kbase_csf_tl_reader_term() - Terminate CSFFW Timelime Stream Reader. ++ * ++ * @self: CSFFW TL Reader instance. ++ */ ++void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self); ++ ++/** ++ * kbase_csf_tl_reader_flush_buffer() - ++ * Flush trace from buffer into CSFFW timeline stream. ++ * ++ * @self: CSFFW TL Reader instance. ++ * ++ * Return: Zero on success, negative error code (EBUSY) otherwise ++ */ ++ ++int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self); ++ ++/** ++ * kbase_csf_tl_reader_start() - ++ * Start asynchronous copying of CSFFW timeline stream. ++ * ++ * @self: CSFFW TL Reader instance. ++ * @kbdev: Kbase device. ++ * ++ * Return: zero on success, a negative error code otherwise. ++ */ ++int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, ++ struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_tl_reader_stop() - ++ * Stop asynchronous copying of CSFFW timeline stream. ++ * ++ * @self: CSFFW TL Reader instance. ++ */ ++void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbase_csf_tl_reader_debugfs_init() - ++ * Initialize debugfs for CSFFW Timelime Stream Reader. ++ * ++ * @kbdev: Kbase device. ++ */ ++void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev); ++#endif ++ ++/** ++ * kbase_csf_tl_reader_reset() - ++ * Reset CSFFW timeline reader, it should be called before reset CSFFW. ++ * ++ * @self: CSFFW TL Reader instance. ++ */ ++void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self); ++ ++#endif /* _KBASE_CSFFW_TL_READER_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c +new file mode 100644 +index 0000000..a6343c8 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c +@@ -0,0 +1,688 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase.h" ++#include "mali_kbase_defs.h" ++#include "mali_kbase_csf_firmware.h" ++#include "mali_kbase_csf_trace_buffer.h" ++#include "mali_kbase_reset_gpu.h" ++#include "mali_kbase_csf_tl_reader.h" ++ ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) ++#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE ++#endif ++#endif ++ ++/** ++ * struct firmware_trace_buffer - Trace Buffer within the MCU firmware ++ * ++ * The firmware relays information to the host by writing on memory buffers ++ * which are allocated and partially configured by the host. These buffers ++ * are called Trace Buffers: each of them has a specific purpose and is ++ * identified by a name and a set of memory addresses where the host can ++ * set pointers to host-allocated structures. ++ * ++ * @kbdev: Pointer to the Kbase device. ++ * @node: List head linking all trace buffers to ++ * kbase_device:csf.firmware_trace_buffers ++ * @data_mapping: MCU shared memory mapping used for the data buffer. ++ * @updatable: Indicates whether config items can be updated with ++ * FIRMWARE_CONFIG_UPDATE ++ * @type: The type of the trace buffer. ++ * @trace_enable_entry_count: Number of Trace Enable bits. ++ * @gpu_va: Structure containing all the Firmware addresses ++ * that are accessed by the MCU. ++ * @gpu_va.size_address: The address where the MCU shall read the size of ++ * the data buffer. ++ * @gpu_va.insert_address: The address that shall be dereferenced by the MCU ++ * to write the Insert offset. ++ * @gpu_va.extract_address: The address that shall be dereferenced by the MCU ++ * to read the Extract offset. ++ * @gpu_va.data_address: The address that shall be dereferenced by the MCU ++ * to write the Trace Buffer. ++ * @gpu_va.trace_enable: The address where the MCU shall read the array of ++ * Trace Enable bits describing which trace points ++ * and features shall be enabled. ++ * @cpu_va: Structure containing CPU addresses of variables ++ * which are permanently mapped on the CPU address ++ * space. ++ * @cpu_va.insert_cpu_va: CPU virtual address of the Insert variable. ++ * @cpu_va.extract_cpu_va: CPU virtual address of the Extract variable. ++ * @num_pages: Size of the data buffer, in pages. ++ * @trace_enable_init_mask: Initial value for the trace enable bit mask. ++ * @name: NULL terminated string which contains the name of the trace buffer. ++ */ ++struct firmware_trace_buffer { ++ struct kbase_device *kbdev; ++ struct list_head node; ++ struct kbase_csf_mapping data_mapping; ++ bool updatable; ++ u32 type; ++ u32 trace_enable_entry_count; ++ struct gpu_va { ++ u32 size_address; ++ u32 insert_address; ++ u32 extract_address; ++ u32 data_address; ++ u32 trace_enable; ++ } gpu_va; ++ struct cpu_va { ++ u32 *insert_cpu_va; ++ u32 *extract_cpu_va; ++ } cpu_va; ++ u32 num_pages; ++ u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; ++ char name[1]; /* this field must be last */ ++}; ++ ++/** ++ * struct firmware_trace_buffer_data - Configuration data for trace buffers ++ * ++ * Describe how to set up a trace buffer interface. ++ * Trace buffers are identified by name and they require a data buffer and ++ * an initial mask of values for the trace enable bits. ++ * ++ * @name: Name identifier of the trace buffer ++ * @trace_enable_init_mask: Initial value to assign to the trace enable bits ++ * @size: Size of the data buffer to allocate for the trace buffer, in pages. ++ * The size of a data buffer must always be a power of 2. ++ */ ++struct firmware_trace_buffer_data { ++ char name[64]; ++ u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; ++ size_t size; ++}; ++ ++/* ++ * Table of configuration data for trace buffers. ++ * ++ * This table contains the configuration data for the trace buffers that are ++ * expected to be parsed from the firmware. ++ */ ++static const struct firmware_trace_buffer_data ++trace_buffer_data[] = { ++#ifndef MALI_KBASE_BUILD ++ { "fwutf", {0}, 1 }, ++#endif ++ { FW_TRACE_BUF_NAME, {0}, 4 }, ++ { "benchmark", {0}, 2 }, ++ { "timeline", {0}, KBASE_CSF_TL_BUFFER_NR_PAGES }, ++}; ++ ++int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) ++{ ++ struct firmware_trace_buffer *trace_buffer; ++ int ret = 0; ++ u32 mcu_rw_offset = 0, mcu_write_offset = 0; ++ const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); ++ ++ if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) { ++ dev_dbg(kbdev->dev, "No trace buffers to initialise\n"); ++ return 0; ++ } ++ ++ /* GPU-readable,writable memory used for Extract variables */ ++ ret = kbase_csf_firmware_mcu_shared_mapping_init( ++ kbdev, 1, PROT_WRITE, ++ KBASE_REG_GPU_RD | KBASE_REG_GPU_WR, ++ &kbdev->csf.firmware_trace_buffers.mcu_rw); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to map GPU-rw MCU shared memory\n"); ++ goto out; ++ } ++ ++ /* GPU-writable memory used for Insert variables */ ++ ret = kbase_csf_firmware_mcu_shared_mapping_init( ++ kbdev, 1, PROT_READ, KBASE_REG_GPU_WR, ++ &kbdev->csf.firmware_trace_buffers.mcu_write); ++ if (ret != 0) { ++ dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory\n"); ++ goto out; ++ } ++ ++ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { ++ u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, ++ trace_enable_size_dwords; ++ u32 *extract_cpu_va, *insert_cpu_va; ++ unsigned int i; ++ ++ /* GPU-writable data buffer for the individual trace buffer */ ++ ret = kbase_csf_firmware_mcu_shared_mapping_init( ++ kbdev, trace_buffer->num_pages, PROT_READ, KBASE_REG_GPU_WR, ++ &trace_buffer->data_mapping); ++ if (ret) { ++ dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory for a trace buffer\n"); ++ goto out; ++ } ++ ++ extract_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_rw_offset; ++ extract_cpu_va = (u32*)( ++ kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + ++ mcu_rw_offset); ++ insert_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_write_offset; ++ insert_cpu_va = (u32*)( ++ kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + ++ mcu_write_offset); ++ data_buffer_gpu_va = ++ (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); ++ ++ /* Initialize the Extract variable */ ++ *extract_cpu_va = 0; ++ ++ /* Each FW address shall be mapped and set individually, as we can't ++ * assume anything about their location in the memory address space. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.size_address, ++ trace_buffer->num_pages << PAGE_SHIFT); ++ ++ trace_enable_size_dwords = ++ (trace_buffer->trace_enable_entry_count + 31) >> 5; ++ ++ for (i = 0; i < trace_enable_size_dwords; i++) { ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.trace_enable + i*4, ++ trace_buffer->trace_enable_init_mask[i]); ++ } ++ ++ /* Store CPU virtual addresses for permanently mapped variables */ ++ trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; ++ trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; ++ ++ /* Update offsets */ ++ mcu_write_offset += cache_line_alignment; ++ mcu_rw_offset += cache_line_alignment; ++ } ++ ++out: ++ return ret; ++} ++ ++void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev) ++{ ++ if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) ++ return; ++ ++ while (!list_empty(&kbdev->csf.firmware_trace_buffers.list)) { ++ struct firmware_trace_buffer *trace_buffer; ++ ++ trace_buffer = list_first_entry(&kbdev->csf.firmware_trace_buffers.list, ++ struct firmware_trace_buffer, node); ++ kbase_csf_firmware_mcu_shared_mapping_term(kbdev, &trace_buffer->data_mapping); ++ list_del(&trace_buffer->node); ++ ++ kfree(trace_buffer); ++ } ++ ++ kbase_csf_firmware_mcu_shared_mapping_term( ++ kbdev, &kbdev->csf.firmware_trace_buffers.mcu_rw); ++ kbase_csf_firmware_mcu_shared_mapping_term( ++ kbdev, &kbdev->csf.firmware_trace_buffers.mcu_write); ++} ++ ++int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, ++ const u32 *entry, ++ unsigned int size, ++ bool updatable) ++{ ++ const char *name = (char *)&entry[7]; ++ const unsigned int name_len = size - TRACE_BUFFER_ENTRY_NAME_OFFSET; ++ struct firmware_trace_buffer *trace_buffer; ++ unsigned int i; ++ ++ /* Allocate enough space for struct firmware_trace_buffer and the ++ * trace buffer name (with NULL termination). ++ */ ++ trace_buffer = ++ kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL); ++ ++ if (!trace_buffer) ++ return -ENOMEM; ++ ++ memcpy(&trace_buffer->name, name, name_len); ++ trace_buffer->name[name_len] = '\0'; ++ ++ for (i = 0; i < ARRAY_SIZE(trace_buffer_data); i++) { ++ if (!strcmp(trace_buffer_data[i].name, trace_buffer->name)) { ++ unsigned int j; ++ ++ trace_buffer->kbdev = kbdev; ++ trace_buffer->updatable = updatable; ++ trace_buffer->type = entry[0]; ++ trace_buffer->gpu_va.size_address = entry[1]; ++ trace_buffer->gpu_va.insert_address = entry[2]; ++ trace_buffer->gpu_va.extract_address = entry[3]; ++ trace_buffer->gpu_va.data_address = entry[4]; ++ trace_buffer->gpu_va.trace_enable = entry[5]; ++ trace_buffer->trace_enable_entry_count = entry[6]; ++ trace_buffer->num_pages = trace_buffer_data[i].size; ++ ++ for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) { ++ trace_buffer->trace_enable_init_mask[j] = ++ trace_buffer_data[i].trace_enable_init_mask[j]; ++ } ++ break; ++ } ++ } ++ ++ if (i < ARRAY_SIZE(trace_buffer_data)) { ++ list_add(&trace_buffer->node, &kbdev->csf.firmware_trace_buffers.list); ++ dev_dbg(kbdev->dev, "Trace buffer '%s'", trace_buffer->name); ++ } else { ++ dev_dbg(kbdev->dev, "Unknown trace buffer '%s'", trace_buffer->name); ++ kfree(trace_buffer); ++ } ++ ++ return 0; ++} ++ ++void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) ++{ ++ struct firmware_trace_buffer *trace_buffer; ++ u32 mcu_rw_offset = 0, mcu_write_offset = 0; ++ const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); ++ ++ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { ++ u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, ++ trace_enable_size_dwords; ++ u32 *extract_cpu_va, *insert_cpu_va; ++ unsigned int i; ++ ++ /* Rely on the fact that all required mappings already exist */ ++ extract_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_rw_offset; ++ extract_cpu_va = (u32*)( ++ kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + ++ mcu_rw_offset); ++ insert_gpu_va = ++ (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + ++ mcu_write_offset; ++ insert_cpu_va = (u32*)( ++ kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + ++ mcu_write_offset); ++ data_buffer_gpu_va = ++ (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); ++ ++ /* Notice that the function only re-updates firmware memory locations ++ * with information that allows access to the trace buffers without ++ * really resetting their state. For instance, the Insert offset will ++ * not change and, as a consequence, the Extract offset is not going ++ * to be reset to keep consistency. ++ */ ++ ++ /* Each FW address shall be mapped and set individually, as we can't ++ * assume anything about their location in the memory address space. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.size_address, ++ trace_buffer->num_pages << PAGE_SHIFT); ++ ++ trace_enable_size_dwords = ++ (trace_buffer->trace_enable_entry_count + 31) >> 5; ++ ++ for (i = 0; i < trace_enable_size_dwords; i++) { ++ kbase_csf_update_firmware_memory( ++ kbdev, trace_buffer->gpu_va.trace_enable + i*4, ++ trace_buffer->trace_enable_init_mask[i]); ++ } ++ ++ /* Store CPU virtual addresses for permanently mapped variables, ++ * as they might have slightly changed. ++ */ ++ trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; ++ trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; ++ ++ /* Update offsets */ ++ mcu_write_offset += cache_line_alignment; ++ mcu_rw_offset += cache_line_alignment; ++ } ++} ++ ++struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( ++ struct kbase_device *kbdev, const char *name) ++{ ++ struct firmware_trace_buffer *trace_buffer; ++ ++ list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { ++ if (!strcmp(trace_buffer->name, name)) ++ return trace_buffer; ++ } ++ ++ return NULL; ++} ++EXPORT_SYMBOL(kbase_csf_firmware_get_trace_buffer); ++ ++unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( ++ const struct firmware_trace_buffer *trace_buffer) ++{ ++ return trace_buffer->trace_enable_entry_count; ++} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count); ++ ++static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( ++ struct firmware_trace_buffer *tb, unsigned int bit, bool value) ++{ ++ struct kbase_device *kbdev = tb->kbdev; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (bit < tb->trace_enable_entry_count) { ++ unsigned int trace_enable_reg_offset = bit >> 5; ++ u32 trace_enable_bit_mask = 1u << (bit & 0x1F); ++ ++ if (value) { ++ tb->trace_enable_init_mask[trace_enable_reg_offset] |= ++ trace_enable_bit_mask; ++ } else { ++ tb->trace_enable_init_mask[trace_enable_reg_offset] &= ++ ~trace_enable_bit_mask; ++ } ++ ++ /* This is not strictly needed as the caller is supposed to ++ * reload the firmware image (through GPU reset) after updating ++ * the bitmask. Otherwise there is no guarantee that firmware ++ * will take into account the updated bitmask for all types of ++ * trace buffers, since firmware could continue to use the ++ * value of bitmask it cached after the boot. ++ */ ++ kbase_csf_update_firmware_memory( ++ kbdev, ++ tb->gpu_va.trace_enable + trace_enable_reg_offset * 4, ++ tb->trace_enable_init_mask[trace_enable_reg_offset]); ++ } ++} ++ ++int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( ++ struct firmware_trace_buffer *tb, unsigned int bit, bool value) ++{ ++ struct kbase_device *kbdev = tb->kbdev; ++ int err = 0; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ /* If trace buffer update cannot be performed with ++ * FIRMWARE_CONFIG_UPDATE then we need to do a ++ * silent reset before we update the memory. ++ */ ++ if (!tb->updatable) { ++ /* If there is already a GPU reset pending then inform ++ * the User to retry the update. ++ */ ++ if (kbase_reset_gpu_silent(kbdev)) { ++ dev_warn( ++ kbdev->dev, ++ "GPU reset already in progress when enabling firmware timeline."); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ return -EAGAIN; ++ } ++ } ++ ++ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit, ++ value); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ if (tb->updatable) ++ err = kbase_csf_trigger_firmware_config_update(kbdev); ++ ++ return err; ++} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_update_trace_enable_bit); ++ ++bool kbase_csf_firmware_trace_buffer_is_empty( ++ const struct firmware_trace_buffer *trace_buffer) ++{ ++ return *(trace_buffer->cpu_va.insert_cpu_va) == ++ *(trace_buffer->cpu_va.extract_cpu_va); ++} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_is_empty); ++ ++unsigned int kbase_csf_firmware_trace_buffer_read_data( ++ struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes) ++{ ++ unsigned int bytes_copied; ++ u8 *data_cpu_va = trace_buffer->data_mapping.cpu_addr; ++ u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va); ++ u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va); ++ u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; ++ ++ if (insert_offset >= extract_offset) { ++ bytes_copied = min_t(unsigned int, num_bytes, ++ (insert_offset - extract_offset)); ++ memcpy(data, &data_cpu_va[extract_offset], bytes_copied); ++ extract_offset += bytes_copied; ++ } else { ++ unsigned int bytes_copied_head, bytes_copied_tail; ++ ++ bytes_copied_tail = min_t(unsigned int, num_bytes, ++ (buffer_size - extract_offset)); ++ memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail); ++ ++ bytes_copied_head = min_t(unsigned int, ++ (num_bytes - bytes_copied_tail), insert_offset); ++ memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head); ++ ++ bytes_copied = bytes_copied_head + bytes_copied_tail; ++ extract_offset += bytes_copied; ++ if (extract_offset >= buffer_size) ++ extract_offset = bytes_copied_head; ++ } ++ ++ *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; ++ ++ return bytes_copied; ++} ++EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++#define U32_BITS 32 ++static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb) ++{ ++ u64 active_mask = tb->trace_enable_init_mask[0]; ++ ++ if (tb->trace_enable_entry_count > U32_BITS) ++ active_mask |= (u64)tb->trace_enable_init_mask[1] << U32_BITS; ++ ++ return active_mask; ++} ++ ++static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, ++ u64 mask) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < tb->trace_enable_entry_count; i++) ++ kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( ++ tb, i, (mask >> i) & 1); ++} ++ ++static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, ++ u64 mask) ++{ ++ struct kbase_device *kbdev = tb->kbdev; ++ unsigned long flags; ++ int err = 0; ++ ++ if (!tb->updatable) { ++ /* If there is already a GPU reset pending, need a retry */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ if (kbase_reset_gpu_silent(kbdev)) ++ err = -EAGAIN; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++ ++ if (!err) { ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ update_trace_buffer_active_mask64(tb, mask); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* if we can update the config we need to just trigger ++ * FIRMWARE_CONFIG_UPDATE. ++ */ ++ if (tb->updatable) ++ err = kbase_csf_trigger_firmware_config_update(kbdev); ++ } ++ ++ return err; ++} ++ ++static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); ++ ++ if (tb == NULL) { ++ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); ++ return -EIO; ++ } ++ /* The enabled traces limited to u64 here, regarded practical */ ++ *val = get_trace_buffer_active_mask64(tb); ++ return 0; ++} ++ ++static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val) ++{ ++ struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); ++ u64 new_mask; ++ unsigned int enable_bits_count; ++ ++ if (tb == NULL) { ++ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); ++ return -EIO; ++ } ++ ++ /* Ignore unsupported types */ ++ enable_bits_count = ++ kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); ++ if (enable_bits_count > 64) { ++ dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", ++ enable_bits_count); ++ enable_bits_count = 64; ++ } ++ new_mask = val & ((1 << enable_bits_count) - 1); ++ ++ if (new_mask != get_trace_buffer_active_mask64(tb)) ++ return set_trace_buffer_active_mask64(tb, new_mask); ++ else ++ return 0; ++} ++ ++static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in, ++ struct file *file) ++{ ++ struct kbase_device *kbdev = in->i_private; ++ ++ file->private_data = kbdev; ++ dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); ++ ++ return 0; ++} ++ ++static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file, ++ char __user *buf, size_t size, loff_t *ppos) ++{ ++ struct kbase_device *kbdev = file->private_data; ++ u8 *pbyte; ++ unsigned int n_read; ++ unsigned long not_copied; ++ /* Limit the kernel buffer to no more than two pages */ ++ size_t mem = MIN(size, 2 * PAGE_SIZE); ++ unsigned long flags; ++ ++ struct firmware_trace_buffer *tb = ++ kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); ++ ++ if (tb == NULL) { ++ dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); ++ return -EIO; ++ } ++ ++ pbyte = kmalloc(mem, GFP_KERNEL); ++ if (pbyte == NULL) { ++ dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump"); ++ return -ENOMEM; ++ } ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* Do the copy, if we have obtained some trace data */ ++ not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0; ++ kfree(pbyte); ++ ++ if (!not_copied) { ++ *ppos += n_read; ++ return n_read; ++ } ++ ++ dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); ++ return -EFAULT; ++} ++ ++ ++DEFINE_SIMPLE_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops, ++ kbase_csf_firmware_trace_enable_mask_read, ++ kbase_csf_firmware_trace_enable_mask_write, "%llx\n"); ++ ++static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = { ++ .owner = THIS_MODULE, ++ .open = kbasep_csf_firmware_trace_debugfs_open, ++ .read = kbasep_csf_firmware_trace_debugfs_read, ++ .llseek = no_llseek, ++}; ++ ++void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev) ++{ ++ debugfs_create_file("fw_trace_enable_mask", 0644, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbase_csf_firmware_trace_enable_mask_fops); ++ ++ debugfs_create_file("fw_traces", 0444, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_csf_firmware_trace_debugfs_fops); ++} ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h +new file mode 100644 +index 0000000..b9f481d +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h +@@ -0,0 +1,182 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CSF_TRACE_BUFFER_H_ ++#define _KBASE_CSF_TRACE_BUFFER_H_ ++ ++#include ++ ++#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) ++#define FW_TRACE_BUF_NAME "fwlog" ++ ++/* Forward declarations */ ++struct firmware_trace_buffer; ++struct kbase_device; ++ ++/** ++ * kbase_csf_firmware_trace_buffers_init - Initialize trace buffers ++ * ++ * Allocate resources for trace buffers. In particular: ++ * - One memory page of GPU-readable, CPU-writable memory is used for ++ * the Extract variables of all trace buffers. ++ * - One memory page of GPU-writable, CPU-readable memory is used for ++ * the Insert variables of all trace buffers. ++ * - A data buffer of GPU-writable, CPU-readable memory is allocated ++ * for each trace buffer. ++ * ++ * After that, firmware addresses are written with pointers to the ++ * insert, extract and data buffer variables. The size and the trace ++ * enable bits are not dereferenced by the GPU and shall be written ++ * in the firmware addresses directly. ++ * ++ * This function relies on the assumption that the list of ++ * firmware_trace_buffer elements in the device has already been ++ * populated with data from the firmware image parsing. ++ * ++ * Return: 0 if success, or an error code on failure. ++ * ++ * @kbdev: Device pointer ++ */ ++int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_trace_buffer_term - Terminate trace buffers ++ * ++ * @kbdev: Device pointer ++ */ ++void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section ++ * ++ * Read a "trace buffer" section adding metadata for the related trace buffer ++ * to the kbase_device:csf.firmware_trace_buffers list. ++ * ++ * Unexpected trace buffers will not be parsed and, as a consequence, ++ * will not be initialized. ++ * ++ * Return: 0 if successful, negative error code on failure. ++ * ++ * @kbdev: Kbase device structure ++ * @entry: Pointer to the section ++ * @size: Size (in bytes) of the section ++ * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE ++ */ ++int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, ++ const u32 *entry, ++ unsigned int size, ++ bool updatable); ++ ++/** ++ * kbase_csf_firmware_reload_trace_buffers_data - ++ * Reload trace buffers data for firmware reboot ++ * ++ * Helper function used when rebooting the firmware to reload the initial setup ++ * for all the trace buffers which have been previously parsed and initialized. ++ * ++ * Almost all of the operations done in the initialization process are ++ * replicated, with the difference that they might be done in a different order ++ * and that the variables of a given trace buffer may be mapped to different ++ * offsets within the same existing mappings. ++ * ++ * In other words, the re-initialization done by this function will be ++ * equivalent but not necessarily identical to the original initialization. ++ * ++ * @kbdev: Device pointer ++ */ ++void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev); ++ ++/** ++ * kbase_csf_firmware_get_trace_buffer - Get a trace buffer ++ * ++ * Return: handle to a trace buffer, given the name, or NULL if a trace buffer ++ * with that name couldn't be found. ++ * ++ * @kbdev: Device pointer ++ * @name: Name of the trace buffer to find ++ */ ++struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( ++ struct kbase_device *kbdev, const char *name); ++ ++/** ++ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - ++ * Get number of trace enable bits for a trace buffer ++ * ++ * Return: Number of trace enable bits in a trace buffer. ++ * ++ * @trace_buffer: Trace buffer handle ++ */ ++unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( ++ const struct firmware_trace_buffer *trace_buffer); ++ ++/** ++ * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - ++ * Update a trace enable bit ++ * ++ * Update the value of a given trace enable bit. ++ * ++ * @trace_buffer: Trace buffer handle ++ * @bit: Bit to update ++ * @value: New value for the given bit ++ * ++ * Return: 0 if successful, negative error code on failure. ++ */ ++int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( ++ struct firmware_trace_buffer *trace_buffer, unsigned int bit, ++ bool value); ++ ++/** ++ * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate ++ * ++ * Return: True if the trace buffer is empty, or false otherwise. ++ * ++ * @trace_buffer: Trace buffer handle ++ */ ++bool kbase_csf_firmware_trace_buffer_is_empty( ++ const struct firmware_trace_buffer *trace_buffer); ++ ++/** ++ * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer ++ * ++ * Read available data from a trace buffer. The client provides a data buffer ++ * of a given size and the maximum number of bytes to read. ++ * ++ * Return: Number of bytes read from the trace buffer. ++ * ++ * @trace_buffer: Trace buffer handle ++ * @data: Pointer to a client-allocated where data shall be written. ++ * @num_bytes: Maximum number of bytes to read from the trace buffer. ++ */ ++unsigned int kbase_csf_firmware_trace_buffer_read_data( ++ struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbase_csf_fw_trace_buffer_debugfs_init() - Add debugfs entries for setting ++ * enable mask and dumping the binary ++ * firmware trace buffer ++ * ++ * @kbdev: Pointer to the device ++ */ ++void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev); ++#endif /* CONFIG_DEBUG_FS */ ++ ++#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild +new file mode 100644 +index 0000000..1682c0f +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild +@@ -0,0 +1,27 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++mali_kbase-y += debug/mali_kbase_debug_ktrace.o ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o ++else ++ mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +new file mode 100644 +index 0000000..d05f802 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h +@@ -0,0 +1,278 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** ++ * ***** DO NOT INCLUDE DIRECTLY ***** ++ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** ++ */ ++ ++/* ++ * The purpose of this header file is just to contain a list of trace code ++ * identifiers ++ * ++ * When updating this file, also remember to update ++ * mali_kbase_debug_linux_ktrace_csf.h ++ * ++ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT ++ * DESCRIBED IN mali_kbase_debug_ktrace_codes.h ++ */ ++ ++#if 0 /* Dummy section to avoid breaking formatting */ ++int dummy_array[] = { ++#endif ++ /* ++ * Generic CSF events ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(EVICT_CTX_SLOTS), ++ /* info_val[0:7] == fw version_minor ++ * info_val[15:8] == fw version_major ++ * info_val[63:32] == fw version_hash ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_BOOT), ++ KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_REBOOT), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END), ++ /* info_val == total number of runnable groups across all kctxs */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET), ++ /* info_val = timeout in ms */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT), ++ /* info_val = remaining ms timeout, or 0 if timedout */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT_DONE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT), ++ KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT_NOTIFY_GPU), ++ ++ /* info_val = JOB_IRQ_STATUS */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT), ++ /* info_val = JOB_IRQ_STATUS */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END), ++ /* info_val = JOB_IRQ_STATUS */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS), ++ /* info_val = GLB_REQ ^ GLB_ACQ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GLB_REQ_ACQ), ++ /* info_val[31:0] = num non idle offslot groups ++ * info_val[32] = scheduler can suspend on idle ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CAN_IDLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ADVANCE_TICK), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NOADVANCE_TICK), ++ /* kctx is added to the back of the list */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_INSERT_RUNNABLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_REMOVE_RUNNABLE), ++ /* kctx is moved to the back of the list */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ROTATE_RUNNABLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HEAD_RUNNABLE), ++ ++ KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_BEGIN), ++ /* 4-bit encoding of boolean values (ease of reading as hex values) ++ * ++ * info_val[3:0] = was reset active/failed to be prevented ++ * info_val[7:4] = whether scheduler was both idle and suspendable ++ * info_val[11:8] = whether all groups were suspended ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_BEGIN), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_END), ++ ++ /* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK), ++ ++ /* ++ * Group events ++ */ ++ /* info_val[2:0] == CSG_REQ state issued ++ * info_val[19:16] == as_nr ++ * info_val[63:32] == endpoint config (max number of endpoints allowed) ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START), ++ /* info_val == CSG_REQ state issued */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP), ++ /* info_val == CSG_ACK state */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STARTED), ++ /* info_val == CSG_ACK state */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED), ++ /* info_val == slot cleaned */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED), ++ /* info_val = slot requesting STATUS_UPDATE */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STATUS_UPDATE), ++ /* info_val = scheduler's new csg_slots_idle_mask[0] ++ * group->csg_nr indicates which bit was set ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET), ++ /* info_val = scheduler's new csg_slots_idle_mask[0] ++ * group->csg_nr indicates which bit was cleared ++ * ++ * in case of no group, multiple bits may have been updated ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR), ++ /* info_val == previous priority */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_PRIO_UPDATE), ++ /* info_val == CSG_REQ ^ CSG_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_SYNC_UPDATE_INTERRUPT), ++ /* info_val == CSG_REQ ^ CSG_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_IDLE_INTERRUPT), ++ /* info_val == CSG_REQ ^ CSG_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_PROGRESS_TIMER_INTERRUPT), ++ /* info_val[31:0] == CSG_REQ ^ CSG_ACQ ++ * info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_END), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_DONE), ++ /* info_val == run state of the group */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_DESCHEDULE), ++ /* info_val == run state of the group */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SCHEDULE), ++ /* info_val[31:0] == new run state of the evicted group ++ * info_val[63:32] == number of runnable groups ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT_SCHED), ++ ++ /* info_val == new num_runnable_grps ++ * group is added to the back of the list for its priority level ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_RUNNABLE), ++ /* info_val == new num_runnable_grps ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_RUNNABLE), ++ /* info_val == num_runnable_grps ++ * group is moved to the back of the list for its priority level ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_ROTATE_RUNNABLE), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_RUNNABLE), ++ /* info_val == new num_idle_wait_grps ++ * group is added to the back of the list ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_IDLE_WAIT), ++ /* info_val == new num_idle_wait_grps ++ * group is added to the back of the list ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_IDLE_WAIT), ++ KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_IDLE_WAIT), ++ ++ /* info_val == is scheduler running with protected mode tasks */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CHECK_PROTM_ENTER), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ENTER_PROTM), ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EXIT_PROTM), ++ /* info_val[31:0] == number of GPU address space slots in use ++ * info_val[63:32] == number of runnable groups ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOP_GRP), ++ /* info_val == new count of off-slot non-idle groups ++ * no group indicates it was set rather than incremented ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_INC), ++ /* info_val == new count of off-slot non-idle groups */ ++ KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_DEC), ++ ++ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_BEGIN), ++ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), ++ ++ /* ++ * Group + Queue events ++ */ ++ /* info_val == queue->enabled */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_START), ++ /* info_val == queue->enabled before stop */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQUESTED), ++ /* info_val == CS_REQ ^ CS_ACK that were not processed due to the group ++ * being suspended ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND), ++ /* info_val == CS_REQ ^ CS_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_FAULT_INTERRUPT), ++ /* info_val == CS_REQ ^ CS_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_TILER_OOM_INTERRUPT), ++ /* info_val == CS_REQ ^ CS_ACK */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_INTERRUPT), ++ /* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK), ++ /* info_val == group->run_State (for group the queue is bound to) */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), ++ /* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE), ++ /* info_val == bool for result of the evaluation */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVALUATED), ++ /* info_val == contents of CS_STATUS_WAIT */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_STATUS_WAIT), ++ /* info_val == current sync value pointed to by queue->sync_ptr */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_CURRENT_VAL), ++ /* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_TEST_VAL), ++ /* info_val == current value of CS_STATUS_BLOCKED_REASON */ ++ KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_BLOCKED_REASON), ++ /* info_val = group's new protm_pending_bitmap[0] ++ * queue->csi_index indicates which bit was set ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_SET), ++ /* info_val = group's new protm_pending_bitmap[0] ++ * queue->csi_index indicates which bit was cleared ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_CLEAR), ++ ++ /* ++ * KCPU queue events ++ */ ++ /* KTrace info_val == KCPU queue fence context ++ * KCPU extra_info_val == N/A. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_NEW), ++ /* KTrace info_val == Number of pending commands in KCPU queue when ++ * it is destroyed. ++ * KCPU extra_info_val == Number of CQS wait operations present in ++ * the KCPU queue when it is destroyed. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DESTROY), ++ /* KTrace info_val == CQS event memory address ++ * KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents ++ * of error field. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CQS_SET), ++ /* KTrace info_val == Number of CQS objects to be waited upon ++ * KCPU extra_info_val == N/A. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_START), ++ /* KTrace info_val == CQS event memory address ++ * KCPU extra_info_val == 1 if CQS was signaled with an error and queue ++ * inherited the error, otherwise 0. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_END), ++ /* KTrace info_val == Fence context ++ * KCPU extra_info_val == Fence seqno. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(FENCE_SIGNAL), ++ /* KTrace info_val == Fence context ++ * KCPU extra_info_val == Fence seqno. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_START), ++ /* KTrace info_val == Fence context ++ * KCPU extra_info_val == Fence seqno. ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_END), ++ ++#if 0 /* Dummy section to avoid breaking formatting */ ++}; ++#endif ++ ++/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h +index d534f30..f419f70 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -30,6 +29,9 @@ + * The purpose of this header file is just to contain a list of trace code + * identifiers + * ++ * When updating this file, also remember to update ++ * mali_kbase_debug_linux_ktrace_jm.h ++ * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT + * DESCRIBED IN mali_kbase_debug_ktrace_codes.h + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c +new file mode 100644 +index 0000000..824ca4b +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c +@@ -0,0 +1,193 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include "debug/mali_kbase_debug_ktrace_internal.h" ++#include "debug/backend/mali_kbase_debug_ktrace_csf.h" ++ ++#if KBASE_KTRACE_TARGET_RBUF ++ ++void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) ++{ ++ *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ++ "group,slot,prio,csi,kcpu"), 0); ++} ++ ++void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, ++ char *buffer, int sz, s32 *written) ++{ ++ const union kbase_ktrace_backend * const be_msg = &trace_msg->backend; ++ /* At present, no need to check for KBASE_KTRACE_FLAG_BACKEND, as the ++ * other backend-specific flags currently imply this anyway ++ */ ++ ++ /* group parts */ ++ if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { ++ const s8 slot = be_msg->gpu.csg_nr; ++ /* group,slot, */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%u,%d,", be_msg->gpu.group_handle, slot), 0); ++ ++ /* prio */ ++ if (slot >= 0) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%u", be_msg->gpu.slot_prio), 0); ++ ++ /* , */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ","), 0); ++ } else { ++ /* No group,slot,prio fields, but ensure ending with "," */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ",,,"), 0); ++ } ++ ++ /* queue parts: csi */ ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "%d", be_msg->gpu.csi_index), 0); ++ ++ /* , */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ ","), 0); ++ ++ if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { ++ /* kcpu data */ ++ *written += MAX(snprintf(buffer + *written, ++ MAX(sz - *written, 0), ++ "kcpu %d (0x%llx)", ++ be_msg->kcpu.id, ++ be_msg->kcpu.extra_info_val), 0); ++ } ++ ++ /* Don't end with a trailing "," - this is a 'standalone' formatted ++ * msg, caller will handle the delimiters ++ */ ++} ++ ++void kbasep_ktrace_add_csf(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, struct kbase_queue_group *group, ++ struct kbase_queue *queue, kbase_ktrace_flag_t flags, ++ u64 info_val) ++{ ++ unsigned long irqflags; ++ struct kbase_ktrace_msg *trace_msg; ++ struct kbase_context *kctx = NULL; ++ ++ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); ++ ++ /* Reserve and update indices */ ++ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); ++ ++ /* Determine the kctx */ ++ if (group) ++ kctx = group->kctx; ++ else if (queue) ++ kctx = queue->kctx; ++ ++ /* Fill the common part of the message (including backend.gpu.flags) */ ++ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, ++ info_val); ++ ++ /* Indicate to the common code that backend-specific parts will be ++ * valid ++ */ ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; ++ ++ /* Fill the CSF-specific parts of the message ++ * ++ * Generally, no need to use default initializers when queue/group not ++ * present - can usually check the flags instead. ++ */ ++ ++ if (queue) { ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_QUEUE; ++ trace_msg->backend.gpu.csi_index = queue->csi_index; ++ } ++ ++ if (group) { ++ const s8 slot = group->csg_nr; ++ ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_GROUP; ++ ++ trace_msg->backend.gpu.csg_nr = slot; ++ ++ if (slot >= 0) { ++ struct kbase_csf_csg_slot *csg_slot = ++ &kbdev->csf.scheduler.csg_slots[slot]; ++ ++ trace_msg->backend.gpu.slot_prio = ++ csg_slot->priority; ++ } ++ /* slot >=0 indicates whether slot_prio valid, so no need to ++ * initialize in the case where it's invalid ++ */ ++ ++ trace_msg->backend.gpu.group_handle = group->handle; ++ } ++ ++ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); ++ ++ /* Done */ ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++} ++ ++void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, ++ struct kbase_kcpu_command_queue *queue, ++ u64 info_val1, u64 info_val2) ++{ ++ unsigned long irqflags; ++ struct kbase_ktrace_msg *trace_msg; ++ struct kbase_context *kctx = queue->kctx; ++ ++ spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); ++ ++ /* Reserve and update indices */ ++ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); ++ ++ /* Fill the common part of the message */ ++ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0, ++ info_val1); ++ ++ /* Indicate to the common code that backend-specific parts will be ++ * valid ++ */ ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; ++ ++ /* Fill the KCPU-specific parts of the message */ ++ trace_msg->backend.kcpu.id = queue->id; ++ trace_msg->backend.kcpu.extra_info_val = info_val2; ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_KCPU; ++ ++ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); ++ ++ /* Done */ ++ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); ++} ++ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h +new file mode 100644 +index 0000000..0593c30 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h +@@ -0,0 +1,203 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_DEBUG_KTRACE_CSF_H_ ++#define _KBASE_DEBUG_KTRACE_CSF_H_ ++ ++/* ++ * KTrace target for internal ringbuffer ++ */ ++#if KBASE_KTRACE_TARGET_RBUF ++/** ++ * kbasep_ktrace_add_csf - internal function to add trace about CSF ++ * @kbdev: kbase device ++ * @code: trace code ++ * @group: queue group, or NULL if no queue group ++ * @queue: queue, or NULL if no queue ++ * @flags: flags about the message ++ * @info_val: generic information about @code to add to the trace ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF() instead. ++ */ ++ ++void kbasep_ktrace_add_csf(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, struct kbase_queue_group *group, ++ struct kbase_queue *queue, kbase_ktrace_flag_t flags, ++ u64 info_val); ++ ++/** ++ * kbasep_ktrace_add_csf_kcpu - internal function to add trace about the CSF ++ * KCPU queues. ++ * @kbdev: kbase device ++ * @code: trace code ++ * @queue: queue, or NULL if no queue ++ * @info_val1: Main infoval variable with information based on the KCPU ++ * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h ++ * for information on the infoval values. ++ * @info_val2: Extra infoval variable with information based on the KCPU ++ * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h ++ * for information on the infoval values. ++ * ++ * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF_KCPU() instead. ++ */ ++void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, ++ enum kbase_ktrace_code code, ++ struct kbase_kcpu_command_queue *queue, ++ u64 info_val1, u64 info_val2); ++ ++#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ ++ kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, \ ++ flags, info_val) ++ ++#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, \ ++ info_val2) kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), \ ++ queue, info_val1, info_val2) ++ ++#else /* KBASE_KTRACE_TARGET_RBUF */ ++ ++#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(group);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(flags);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) ++ ++#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(info_val1);\ ++ CSTD_UNUSED(info_val2);\ ++ } while (0) ++ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ ++/* ++ * KTrace target for Linux's ftrace ++ * ++ * Note: the header file(s) that define the trace_mali_<...> tracepoints are ++ * included by the parent header file ++ */ ++#if KBASE_KTRACE_TARGET_FTRACE ++ ++#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ ++ trace_mali_##code(kbdev, group, queue, info_val) ++ ++#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ ++ trace_mali_##code(queue, info_val1, info_val2) ++ ++#else /* KBASE_KTRACE_TARGET_FTRACE */ ++ ++#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ ++ do {\ ++ CSTD_UNUSED(kbdev);\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(group);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(info_val);\ ++ CSTD_NOP(0);\ ++ } while (0) ++ ++#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ ++ do {\ ++ CSTD_NOP(code);\ ++ CSTD_UNUSED(queue);\ ++ CSTD_UNUSED(info_val1);\ ++ CSTD_UNUSED(info_val2);\ ++ } while (0) ++ ++#endif /* KBASE_KTRACE_TARGET_FTRACE */ ++ ++/* ++ * Master set of macros to route KTrace to any of the targets ++ */ ++ ++/** ++ * KBASE_KTRACE_ADD_CSF_GRP - Add trace values about a group, with info ++ * @kbdev: kbase device ++ * @code: trace code ++ * @group: queue group, or NULL if no queue group ++ * @info_val: generic information about @code to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure fn calls */ \ ++ struct kbase_queue_group *__group = group; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, \ ++ __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, NULL, \ ++ __info_val); \ ++ } while (0) ++ ++/** ++ * KBASE_KTRACE_ADD_CSF_GRP_Q - Add trace values about a group, queue, with info ++ * @kbdev: kbase device ++ * @code: trace code ++ * @group: queue group, or NULL if no queue group ++ * @queue: queue, or NULL if no queue ++ * @info_val: generic information about @code to add to the trace ++ * ++ * Note: Any functions called through this macro will still be evaluated in ++ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when ++ * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied ++ * to this macro must: ++ * a) be static or static inline, and ++ * b) just return 0 and have no other statements present in the body. ++ */ ++#define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \ ++ do { \ ++ /* capture values that could come from non-pure fn calls */ \ ++ struct kbase_queue_group *__group = group; \ ++ struct kbase_queue *__queue = queue; \ ++ u64 __info_val = info_val; \ ++ KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, __queue, 0u, \ ++ __info_val); \ ++ KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, \ ++ __queue, __info_val); \ ++ } while (0) ++ ++ ++#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ ++ do { \ ++ /* capture values that could come from non-pure fn calls */ \ ++ struct kbase_kcpu_command_queue *__queue = queue; \ ++ u64 __info_val1 = info_val1; \ ++ u64 __info_val2 = info_val2; \ ++ KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, \ ++ __info_val1, __info_val2); \ ++ KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, \ ++ __info_val1, __info_val2); \ ++ } while (0) ++ ++#endif /* _KBASE_DEBUG_KTRACE_CSF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h +new file mode 100644 +index 0000000..7f32cd2 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h +@@ -0,0 +1,116 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ ++#define _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ ++ ++#if KBASE_KTRACE_TARGET_RBUF ++/** ++ * DOC: KTrace version history, CSF variant ++ * ++ * 1.0: ++ * First version, with version information in the header. ++ * ++ * 1.1: ++ * kctx field is no longer a pointer, and is now an ID of the format %d_%u as ++ * used by kctx directories in mali debugfs entries: (tgid creating the kctx), ++ * (unique kctx id) ++ * ++ * ftrace backend now outputs kctx field (as %d_%u format). ++ * ++ * Add fields group, slot, prio, csi into backend-specific part. ++ * ++ * 1.2: ++ * There is a new class of KCPU traces; with this, a new KCPU column in the ++ * ringbuffer RBUF (mali_trace) between csi and info_val, which is empty ++ * for non-kcpu related traces, and usually displays the KCPU Queue ID and ++ * an extra information value. ftrace also displays these KCPU traces. ++ * ++ * 1.3: ++ * Add a lot of extra new traces. Tweak some existing scheduler related traces ++ * to contain extra information information/happen at slightly different times. ++ * SCHEDULER_EXIT_PROTM now has group information ++ */ ++#define KBASE_KTRACE_VERSION_MAJOR 1 ++#define KBASE_KTRACE_VERSION_MINOR 3 ++ ++/* indicates if the trace message has valid queue-group related info. */ ++#define KBASE_KTRACE_FLAG_CSF_GROUP (((kbase_ktrace_flag_t)1) << 0) ++ ++/* indicates if the trace message has valid queue related info. */ ++#define KBASE_KTRACE_FLAG_CSF_QUEUE (((kbase_ktrace_flag_t)1) << 1) ++ ++/* indicates if the trace message has valid KCPU-queue related info. */ ++#define KBASE_KTRACE_FLAG_CSF_KCPU (((kbase_ktrace_flag_t)1) << 2) ++ ++/* Collect all the flags together for debug checking */ ++#define KBASE_KTRACE_FLAG_BACKEND_ALL \ ++ (KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | \ ++ KBASE_KTRACE_FLAG_CSF_KCPU) ++ ++/** ++ * union kbase_ktrace_backend - backend specific part of a trace message ++ * @kcpu: kcpu union member ++ * @kcpu.code: Identifies the event, refer to enum kbase_ktrace_code. ++ * @kcpu.flags: indicates information about the trace message itself. Used ++ * during dumping of the message. ++ * @kcpu.id: ID of the KCPU queue. ++ * @kcpu.extra_info_val: value specific to the type of KCPU event being traced. ++ * Refer to the KPU specific code in enum kbase_ktrace_code in ++ * mali_kbase_debug_ktrace_codes_csf.h ++ * @gpu: gpu union member ++ * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. ++ * @gpu.flags: indicates information about the trace message itself. Used ++ * during dumping of the message. ++ * @gpu.group_handle: Handle identifying the associated queue group. Only valid ++ * when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. ++ * @gpu.csg_nr: Number/index of the associated queue group's CS group to ++ * which it is mapped, or negative if none associated. Only ++ * valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. ++ * @gpu.slot_prio: The priority of the slot for the associated group, if it ++ * was scheduled. Hence, only valid when @csg_nr >=0 and ++ * @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. ++ * @gpu.csi_index: ID of the associated queue's CS HW interface. ++ * Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE. ++ */ ++ ++union kbase_ktrace_backend { ++ /* Place 64 and 32-bit members together */ ++ /* Pack smaller members together */ ++ struct { ++ kbase_ktrace_code_t code; ++ kbase_ktrace_flag_t flags; ++ u8 id; ++ u64 extra_info_val; ++ } kcpu; ++ ++ struct { ++ kbase_ktrace_code_t code; ++ kbase_ktrace_flag_t flags; ++ u8 group_handle; ++ s8 csg_nr; ++ u8 slot_prio; ++ s8 csi_index; ++ } gpu; ++}; ++ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++#endif /* _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h +index 55b66ad..c01f930 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,25 +17,44 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ + #define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ + ++#if KBASE_KTRACE_TARGET_RBUF + /** + * DOC: KTrace version history, JM variant ++ * + * 1.0: +- * - Original version (implicit, header did not carry version information) ++ * Original version (implicit, header did not carry version information). ++ * + * 2.0: +- * - Introduced version information into the header +- * - some changes of parameter names in header +- * - trace now uses all 64-bits of info_val +- * - Non-JM specific parts moved to using info_val instead of refcount/gpu_addr ++ * Introduced version information into the header. ++ * ++ * Some changes of parameter names in header. ++ * ++ * Trace now uses all 64-bits of info_val. ++ * ++ * Non-JM specific parts moved to using info_val instead of refcount/gpu_addr. ++ * ++ * 2.1: ++ * kctx field is no longer a pointer, and is now an ID of the format %d_%u as ++ * used by kctx directories in mali debugfs entries: (tgid creating the kctx), ++ * (unique kctx id). ++ * ++ * ftrace backend now outputs kctx field (as %d_%u format). ++ * + */ + #define KBASE_KTRACE_VERSION_MAJOR 2 +-#define KBASE_KTRACE_VERSION_MINOR 0 ++#define KBASE_KTRACE_VERSION_MINOR 1 ++#endif /* KBASE_KTRACE_TARGET_RBUF */ ++ ++/* ++ * Note: mali_kbase_debug_ktrace_jm.h needs these value even if the RBUF target ++ * is disabled (they get discarded with CSTD_UNUSED(), but they're still ++ * referenced) ++ */ + + /* indicates if the trace message has a valid refcount member */ + #define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) +@@ -43,33 +63,47 @@ + /* indicates if the trace message has valid atom related info. */ + #define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) + ++#if KBASE_KTRACE_TARGET_RBUF ++/* Collect all the flags together for debug checking */ ++#define KBASE_KTRACE_FLAG_BACKEND_ALL \ ++ (KBASE_KTRACE_FLAG_JM_REFCOUNT | KBASE_KTRACE_FLAG_JM_JOBSLOT \ ++ | KBASE_KTRACE_FLAG_JM_ATOM) + + /** +- * struct kbase_ktrace_backend - backend specific part of a trace message ++ * union kbase_ktrace_backend - backend specific part of a trace message ++ * Contains only a struct but is a union such that it is compatible with ++ * generic JM and CSF KTrace calls. + * +- * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. +- * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags +- * @gpu_addr: GPU address, usually of the job-chain represented by an atom. +- * @atom_number: id of the atom for which trace message was added. Only valid +- * if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags +- * @code: Identifies the event, refer to enum kbase_ktrace_code. +- * @flags: indicates information about the trace message itself. Used +- * during dumping of the message. +- * @jobslot: job-slot for which trace message was added, valid only for +- * job-slot management events. +- * @refcount: reference count for the context, valid for certain events +- * related to scheduler core and policy. ++ * @gpu: gpu union member ++ * @gpu.atom_udata: Copy of the user data sent for the atom in base_jd_submit. ++ * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags ++ * @gpu.gpu_addr: GPU address, usually of the job-chain represented by an ++ * atom. ++ * @gpu.atom_number: id of the atom for which trace message was added. Only ++ * valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags ++ * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. ++ * @gpu.flags: indicates information about the trace message itself. Used ++ * during dumping of the message. ++ * @gpu.jobslot: job-slot for which trace message was added, valid only for ++ * job-slot management events. ++ * @gpu.refcount: reference count for the context, valid for certain events ++ * related to scheduler core and policy. + */ +-struct kbase_ktrace_backend { +- /* Place 64 and 32-bit members together */ +- u64 atom_udata[2]; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ +- u64 gpu_addr; +- int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ +- /* Pack smaller members together */ +- kbase_ktrace_code_t code; +- kbase_ktrace_flag_t flags; +- u8 jobslot; +- u8 refcount; ++union kbase_ktrace_backend { ++ struct { ++ /* Place 64 and 32-bit members together */ ++ u64 atom_udata[2]; /* Only valid for ++ * KBASE_KTRACE_FLAG_JM_ATOM ++ */ ++ u64 gpu_addr; ++ int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ ++ /* Pack smaller members together */ ++ kbase_ktrace_code_t code; ++ kbase_ktrace_flag_t flags; ++ u8 jobslot; ++ u8 refcount; ++ } gpu; + }; ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + + #endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c +index e651a09..fed9c1f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,9 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ ++ + #include + #include "debug/mali_kbase_debug_ktrace_internal.h" + #include "debug/backend/mali_kbase_debug_ktrace_jm.h" +@@ -35,38 +35,39 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written) + { + /* katom */ +- if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_ATOM) ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "atom %d (ud: 0x%llx 0x%llx)", +- trace_msg->backend.atom_number, +- trace_msg->backend.atom_udata[0], +- trace_msg->backend.atom_udata[1]), 0); ++ trace_msg->backend.gpu.atom_number, ++ trace_msg->backend.gpu.atom_udata[0], ++ trace_msg->backend.gpu.atom_udata[1]), 0); + + /* gpu_addr */ +- if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_BACKEND) ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), +- ",%.8llx,", trace_msg->backend.gpu_addr), 0); ++ ",%.8llx,", trace_msg->backend.gpu.gpu_addr), ++ 0); + else + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",,"), 0); + + /* jobslot */ +- if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), +- "%d", trace_msg->backend.jobslot), 0); ++ "%d", trace_msg->backend.gpu.jobslot), 0); + + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + ","), 0); + + /* refcount */ +- if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) ++ if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), +- "%d", trace_msg->backend.refcount), 0); ++ "%d", trace_msg->backend.gpu.refcount), 0); + } + + void kbasep_ktrace_add_jm(struct kbase_device *kbdev, +@@ -83,28 +84,31 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + +- /* Fill the common part of the message (including backend.flags) */ ++ /* Fill the common part of the message (including backend.gpu.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Indicate to the common code that backend-specific parts will be + * valid + */ +- trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND; ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; + + /* Fill the JM-specific parts of the message */ + if (katom) { +- trace_msg->backend.flags |= KBASE_KTRACE_FLAG_JM_ATOM; ++ trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_JM_ATOM; + +- trace_msg->backend.atom_number = kbase_jd_atom_id(katom->kctx, katom); +- trace_msg->backend.atom_udata[0] = katom->udata.blob[0]; +- trace_msg->backend.atom_udata[1] = katom->udata.blob[1]; ++ trace_msg->backend.gpu.atom_number = ++ kbase_jd_atom_id(katom->kctx, katom); ++ trace_msg->backend.gpu.atom_udata[0] = katom->udata.blob[0]; ++ trace_msg->backend.gpu.atom_udata[1] = katom->udata.blob[1]; + } + +- trace_msg->backend.gpu_addr = gpu_addr; +- trace_msg->backend.jobslot = jobslot; ++ trace_msg->backend.gpu.gpu_addr = gpu_addr; ++ trace_msg->backend.gpu.jobslot = jobslot; + /* Clamp refcount */ +- trace_msg->backend.refcount = MIN((unsigned int)refcount, 0xFF); ++ trace_msg->backend.gpu.refcount = MIN((unsigned int)refcount, 0xFF); ++ ++ WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h +index c1bacf9..8b09d05 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUG_KTRACE_JM_H_ +@@ -47,89 +46,24 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val); + +-#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ +- jobslot) \ +- kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ +- gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, 0) +- +-#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ +- jobslot, info_val) \ ++#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ ++ refcount, jobslot, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ +- gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, \ +- info_val) +- +-#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ +- refcount) \ +- kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ +- gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, 0) +-#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ +- gpu_addr, refcount, info_val) \ +- kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ +- gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, \ +- info_val) +- +-#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ +- kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ +- gpu_addr, 0, 0, 0, info_val) ++ gpu_addr, flags, refcount, jobslot, info_val) + + #else /* KBASE_KTRACE_TARGET_RBUF */ +-#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ +- jobslot) \ +- do {\ +- CSTD_UNUSED(kbdev);\ +- CSTD_NOP(code);\ +- CSTD_UNUSED(kctx);\ +- CSTD_UNUSED(katom);\ +- CSTD_UNUSED(gpu_addr);\ +- CSTD_UNUSED(jobslot);\ +- CSTD_NOP(0);\ +- } while (0) +- +-#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ +- jobslot, info_val) \ +- do {\ +- CSTD_UNUSED(kbdev);\ +- CSTD_NOP(code);\ +- CSTD_UNUSED(kctx);\ +- CSTD_UNUSED(katom);\ +- CSTD_UNUSED(gpu_addr);\ +- CSTD_UNUSED(jobslot);\ +- CSTD_UNUSED(info_val);\ +- CSTD_NOP(0);\ +- } while (0) + +-#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ +- refcount) \ ++#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ ++ refcount, jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(flags);\ + CSTD_UNUSED(refcount);\ +- CSTD_NOP(0);\ +- } while (0) +- +-#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ +- gpu_addr, refcount, info_val) \ +- do {\ +- CSTD_UNUSED(kbdev);\ +- CSTD_NOP(code);\ +- CSTD_UNUSED(kctx);\ +- CSTD_UNUSED(katom);\ +- CSTD_UNUSED(gpu_addr);\ +- CSTD_UNUSED(info_val);\ +- CSTD_NOP(0);\ +- } while (0) +- +-#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ +- info_val)\ +- do {\ +- CSTD_UNUSED(kbdev);\ +- CSTD_NOP(code);\ +- CSTD_UNUSED(kctx);\ +- CSTD_UNUSED(katom);\ +- CSTD_UNUSED(gpu_addr);\ ++ CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +@@ -137,27 +71,30 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + + /* + * KTrace target for Linux's ftrace ++ * ++ * Note: the header file(s) that define the trace_mali_<...> tracepoints are ++ * included by the parent header file + */ + #if KBASE_KTRACE_TARGET_FTRACE + #define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ +- trace_mali_##code(jobslot, 0) ++ trace_mali_##code(kctx, jobslot, 0) + + #define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ +- trace_mali_##code(jobslot, info_val) ++ trace_mali_##code(kctx, jobslot, info_val) + + #define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ +- trace_mali_##code(refcount, 0) ++ trace_mali_##code(kctx, refcount, 0) + + #define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ +- trace_mali_##code(refcount, info_val) ++ trace_mali_##code(kctx, refcount, info_val) + + #define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val) \ +- trace_mali_##code(gpu_addr, info_val) ++ trace_mali_##code(kctx, gpu_addr, info_val) + #else /* KBASE_KTRACE_TARGET_FTRACE */ + #define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ +@@ -247,7 +184,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ +- KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ ++ 0); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + } while (0) + +@@ -275,7 +214,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + u64 __info_val = info_val; \ +- KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ ++ __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + } while (0) + +@@ -301,7 +242,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ +- KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ ++ 0u); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + } while (0) + +@@ -330,7 +273,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + u64 __info_val = info_val; \ +- KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ ++ __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + } while (0) + +@@ -355,7 +300,8 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + u64 __info_val = info_val; \ +- KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ ++ KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ ++ 0u, 0, 0, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + } while (0) + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +new file mode 100644 +index 0000000..4b23fc9 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h +@@ -0,0 +1,241 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * NOTE: This must **only** be included through mali_linux_trace.h, ++ * otherwise it will fail to setup tracepoints correctly ++ */ ++ ++#if !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) ++#define _KBASE_DEBUG_LINUX_KTRACE_CSF_H_ ++ ++/* ++ * Generic CSF events - using the common DEFINE_MALI_ADD_EVENT ++ */ ++DEFINE_MALI_ADD_EVENT(EVICT_CTX_SLOTS); ++DEFINE_MALI_ADD_EVENT(FIRMWARE_BOOT); ++DEFINE_MALI_ADD_EVENT(FIRMWARE_REBOOT); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT_DONE); ++DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT); ++DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT_NOTIFY_GPU); ++DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT); ++DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END); ++DEFINE_MALI_ADD_EVENT(CSG_INTERRUPT_PROCESS); ++DEFINE_MALI_ADD_EVENT(GLB_REQ_ACQ); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_CAN_IDLE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_ADVANCE_TICK); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_NOADVANCE_TICK); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_INSERT_RUNNABLE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_REMOVE_RUNNABLE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_ROTATE_RUNNABLE); ++DEFINE_MALI_ADD_EVENT(SCHEDULER_HEAD_RUNNABLE); ++DEFINE_MALI_ADD_EVENT(IDLE_WORKER_BEGIN); ++DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END); ++DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN); ++DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END); ++DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK); ++ ++DECLARE_EVENT_CLASS(mali_csf_grp_q_template, ++ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, ++ struct kbase_queue *queue, u64 info_val), ++ TP_ARGS(kbdev, group, queue, info_val), ++ TP_STRUCT__entry( ++ __field(u64, info_val) ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(u8, group_handle) ++ __field(s8, csg_nr) ++ __field(u8, slot_prio) ++ __field(s8, csi_index) ++ ), ++ TP_fast_assign( ++ { ++ struct kbase_context *kctx = NULL; ++ ++ __entry->info_val = info_val; ++ /* Note: if required in future, we could record some ++ * flags in __entry about whether the group/queue parts ++ * are valid, and add that to the trace message e.g. ++ * by using __print_flags()/__print_symbolic() ++ */ ++ if (queue) { ++ /* Note: kctx overridden by group->kctx later if group is valid */ ++ kctx = queue->kctx; ++ __entry->csi_index = queue->csi_index; ++ } else { ++ __entry->csi_index = -1; ++ } ++ ++ if (group) { ++ kctx = group->kctx; ++ __entry->group_handle = group->handle; ++ __entry->csg_nr = group->csg_nr; ++ if (group->csg_nr >= 0) ++ __entry->slot_prio = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; ++ else ++ __entry->slot_prio = 0u; ++ } else { ++ __entry->group_handle = 0u; ++ __entry->csg_nr = -1; ++ __entry->slot_prio = 0u; ++ } ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; ++ } ++ ++ ), ++ TP_printk("kctx=%d_%u group=%u slot=%d prio=%u csi=%d info=0x%llx", ++ __entry->kctx_tgid, __entry->kctx_id, ++ __entry->group_handle, __entry->csg_nr, ++ __entry->slot_prio, __entry->csi_index, ++ __entry->info_val) ++); ++ ++/* ++ * Group events ++ */ ++#define DEFINE_MALI_CSF_GRP_EVENT(name) \ ++ DEFINE_EVENT_PRINT(mali_csf_grp_q_template, mali_##name, \ ++ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ ++ struct kbase_queue *queue, u64 info_val), \ ++ TP_ARGS(kbdev, group, queue, info_val), \ ++ TP_printk("kctx=%d_%u group=%u slot=%d prio=%u info=0x%llx", \ ++ __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ ++ __entry->csg_nr, __entry->slot_prio, __entry->info_val)) ++ ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STARTED); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STATUS_UPDATE); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_PRIO_UPDATE); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_SYNC_UPDATE_INTERRUPT); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_IDLE_INTERRUPT); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_PROGRESS_TIMER_INTERRUPT); ++DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT_SCHED); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_RUNNABLE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_RUNNABLE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_ROTATE_RUNNABLE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_RUNNABLE); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_IDLE_WAIT); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_IDLE_WAIT); ++DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_IDLE_WAIT); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_CHECK_PROTM_ENTER); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_ENTER_PROTM); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_EXIT_PROTM); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_INC); ++DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_DEC); ++DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_BEGIN); ++DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); ++ ++#undef DEFINE_MALI_CSF_GRP_EVENT ++ ++/* ++ * Group + Queue events ++ */ ++#define DEFINE_MALI_CSF_GRP_Q_EVENT(name) \ ++ DEFINE_EVENT(mali_csf_grp_q_template, mali_##name, \ ++ TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ ++ struct kbase_queue *queue, u64 info_val), \ ++ TP_ARGS(kbdev, group, queue, info_val)) ++ ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQUESTED); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_FAULT_INTERRUPT); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_TILER_OOM_INTERRUPT); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_INTERRUPT); ++DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVALUATED); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_STATUS_WAIT); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_CURRENT_VAL); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_TEST_VAL); ++DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_BLOCKED_REASON); ++DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_SET); ++DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_CLEAR); ++ ++#undef DEFINE_MALI_CSF_GRP_Q_EVENT ++ ++/* ++ * KCPU queue events ++ */ ++DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template, ++ TP_PROTO(struct kbase_kcpu_command_queue *queue, ++ u64 info_val1, u64 info_val2), ++ TP_ARGS(queue, info_val1, info_val2), ++ TP_STRUCT__entry( ++ __field(u64, info_val1) ++ __field(u64, info_val2) ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) ++ __field(u8, id) ++ ), ++ TP_fast_assign( ++ { ++ __entry->info_val1 = info_val1; ++ __entry->info_val2 = info_val2; ++ __entry->kctx_id = queue->kctx->id; ++ __entry->kctx_tgid = queue->kctx->tgid; ++ __entry->id = queue->id; ++ } ++ ++ ), ++ TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx", ++ __entry->kctx_tgid, __entry->kctx_id, __entry->id, ++ __entry->info_val1, __entry->info_val2) ++); ++ ++#define DEFINE_MALI_CSF_KCPU_EVENT(name) \ ++ DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \ ++ TP_PROTO(struct kbase_kcpu_command_queue *queue, \ ++ u64 info_val1, u64 info_val2), \ ++ TP_ARGS(queue, info_val1, info_val2)) ++ ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_NEW); ++DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DESTROY); ++DEFINE_MALI_CSF_KCPU_EVENT(CQS_SET); ++DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_START); ++DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_END); ++DEFINE_MALI_CSF_KCPU_EVENT(FENCE_SIGNAL); ++DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_START); ++DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_END); ++ ++#undef DEFINE_MALI_CSF_KCPU_EVENT ++ ++#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h +index d964e5a..2e88e69 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -29,23 +28,28 @@ + #define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ + + DECLARE_EVENT_CLASS(mali_jm_slot_template, +- TP_PROTO(int jobslot, u64 info_val), +- TP_ARGS(jobslot, info_val), ++ TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), ++ TP_ARGS(kctx, jobslot, info_val), + TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) + __field(unsigned int, jobslot) + __field(u64, info_val) + ), + TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->jobslot = jobslot; + __entry->info_val = info_val; + ), +- TP_printk("jobslot=%u info=0x%llx", __entry->jobslot, __entry->info_val) ++ TP_printk("kctx=%d_%u jobslot=%u info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->jobslot, __entry->info_val) + ); + + #define DEFINE_MALI_JM_SLOT_EVENT(name) \ + DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ +- TP_PROTO(int jobslot, u64 info_val), \ +- TP_ARGS(jobslot, info_val)) ++ TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \ ++ TP_ARGS(kctx, jobslot, info_val)) + DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); + DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); + DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); +@@ -75,23 +79,28 @@ DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); + #undef DEFINE_MALI_JM_SLOT_EVENT + + DECLARE_EVENT_CLASS(mali_jm_refcount_template, +- TP_PROTO(int refcount, u64 info_val), +- TP_ARGS(refcount, info_val), ++ TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), ++ TP_ARGS(kctx, refcount, info_val), + TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) + __field(unsigned int, refcount) + __field(u64, info_val) + ), + TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->refcount = refcount; + __entry->info_val = info_val; + ), +- TP_printk("refcount=%u info=0x%llx", __entry->refcount, __entry->info_val) ++ TP_printk("kctx=%d_%u refcount=%u info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->refcount, __entry->info_val) + ); + + #define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ + DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ +- TP_PROTO(int refcount, u64 info_val), \ +- TP_ARGS(refcount, info_val)) ++ TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), \ ++ TP_ARGS(kctx, refcount, info_val)) + DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); + DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); + DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); +@@ -106,23 +115,28 @@ DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); + #undef DEFINE_MALI_JM_REFCOUNT_EVENT + + DECLARE_EVENT_CLASS(mali_jm_add_template, +- TP_PROTO(u64 gpu_addr, u64 info_val), +- TP_ARGS(gpu_addr, info_val), ++ TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), ++ TP_ARGS(kctx, gpu_addr, info_val), + TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) + __field(u64, gpu_addr) + __field(u64, info_val) + ), + TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->gpu_addr = gpu_addr; + __entry->info_val = info_val; + ), +- TP_printk("gpu_addr=0x%llx info=0x%llx", __entry->gpu_addr, __entry->info_val) ++ TP_printk("kctx=%d_%u gpu_addr=0x%llx info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->gpu_addr, __entry->info_val) + ); + + #define DEFINE_MALI_JM_ADD_EVENT(name) \ + DEFINE_EVENT(mali_jm_add_template, mali_##name, \ +- TP_PROTO(u64 gpu_addr, u64 info_val), \ +- TP_ARGS(gpu_addr, info_val)) ++ TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), \ ++ TP_ARGS(kctx, gpu_addr, info_val)) + DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); + DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); + DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c +index 6322abb..9bf8610 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,9 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ ++ + #include + #include "debug/mali_kbase_debug_ktrace_internal.h" + +@@ -27,11 +27,6 @@ int kbase_ktrace_init(struct kbase_device *kbdev) + #if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace_msg *rbuf; + +- /* See also documentation of enum kbase_ktrace_code */ +- compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || +- KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), +- "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); +- + rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + + if (!rbuf) +@@ -91,15 +86,25 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, + + /* Initial part of message: + * +- * secs,thread_id,cpu,code,kctx, ++ * secs,thread_id,cpu,code, + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), +- "%d.%.6d,%d,%d,%s,%p,", ++ "%d.%.6d,%d,%d,%s,", + (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), + trace_msg->thread_id, trace_msg->cpu, +- kbasep_ktrace_code_string[trace_msg->backend.code], +- trace_msg->kctx), 0); ++ kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), ++ 0); ++ ++ /* kctx part: */ ++ if (trace_msg->kctx_tgid) { ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ "%d_%u", ++ trace_msg->kctx_tgid, trace_msg->kctx_id), 0); ++ } ++ /* Trailing comma */ ++ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), ++ ","), 0); + + /* Backend parts */ + kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, +@@ -156,11 +161,19 @@ void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + + ktime_get_real_ts64(&trace_msg->timestamp); + +- trace_msg->kctx = kctx; +- ++ /* No need to store a flag about whether there was a kctx, tgid==0 is ++ * sufficient ++ */ ++ if (kctx) { ++ trace_msg->kctx_tgid = kctx->tgid; ++ trace_msg->kctx_id = kctx->id; ++ } else { ++ trace_msg->kctx_tgid = 0; ++ trace_msg->kctx_id = 0; ++ } + trace_msg->info_val = info_val; +- trace_msg->backend.code = code; +- trace_msg->backend.flags = flags; ++ trace_msg->backend.gpu.code = code; ++ trace_msg->backend.gpu.flags = flags; + } + + void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, +@@ -170,12 +183,14 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + ++ WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL)); ++ + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + +- /* Fill the common part of the message (including backend.flags) */ ++ /* Fill the common part of the message (including backend.gpu.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + +@@ -225,7 +240,7 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev) + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); + } + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + struct trace_seq_state { + struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; + u32 start; +@@ -333,7 +348,7 @@ void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) + + #else /* KBASE_KTRACE_TARGET_RBUF */ + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) + { + CSTD_UNUSED(kbdev); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h +index 0dd8b7a..f943696 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -37,7 +36,15 @@ + #ifndef _KBASE_DEBUG_KTRACE_H_ + #define _KBASE_DEBUG_KTRACE_H_ + ++#if KBASE_KTRACE_TARGET_FTRACE ++#include "mali_linux_trace.h" ++#endif ++ ++#if MALI_USE_CSF ++#include "debug/backend/mali_kbase_debug_ktrace_csf.h" ++#else + #include "debug/backend/mali_kbase_debug_ktrace_jm.h" ++#endif + + /** + * kbase_ktrace_init - initialize kbase ktrace. +@@ -58,7 +65,7 @@ void kbase_ktrace_term(struct kbase_device *kbdev); + */ + void kbase_ktrace_hook_wrapper(void *param); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + /** + * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if + * the selected targets support it. +@@ -140,10 +147,9 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev); + * KTrace target for Linux's ftrace + */ + #if KBASE_KTRACE_TARGET_FTRACE +-#include "mali_linux_trace.h" + + #define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ +- trace_mali_##code(info_val) ++ trace_mali_##code(kctx, info_val) + + #else /* KBASE_KTRACE_TARGET_FTRACE */ + #define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h +index 364ed60..3309834 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -30,6 +29,9 @@ + * The purpose of this header file is just to contain a list of trace code + * identifiers + * ++ * When updating this file, also remember to update ++ * mali_kbase_debug_linux_ktrace.h ++ * + * Each identifier is wrapped in a macro, so that its string form and enum form + * can be created + * +@@ -112,6 +114,7 @@ int dummy_array[] = { + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), ++ KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), +@@ -142,9 +145,20 @@ int dummy_array[] = { + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* ++ * Arbitration events ++ */ ++ KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), ++ KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), ++ KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT), ++#endif + +- ++#if MALI_USE_CSF ++#include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h" ++#else + #include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" ++#endif + /* + * Unused code just to make it easier to not have a comma at the end. + * All other codes MUST come before this +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h +index d6baaf1..4694b78 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ +@@ -58,6 +57,16 @@ + #define KBASE_KTRACE_TARGET_RBUF 0 + #endif /* KBASE_KTRACE_ENABLE */ + ++/* ++ * Note: Some backends define flags in this type even if the RBUF target is ++ * disabled (they get discarded with CSTD_UNUSED(), but they're still ++ * referenced) ++ */ ++typedef u8 kbase_ktrace_flag_t; ++ ++#if KBASE_KTRACE_TARGET_RBUF ++typedef u8 kbase_ktrace_code_t; ++ + /* + * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in + * the backend, since updates can be made to one backend in a way that doesn't +@@ -67,20 +76,28 @@ + * updated. + */ + +-#if KBASE_KTRACE_TARGET_RBUF +-typedef u8 kbase_ktrace_flag_t; +-typedef u8 kbase_ktrace_code_t; +- + /* +- * struct kbase_ktrace_backend - backend specific part of a trace message +- * +- * At the very least, this must contain a kbase_ktrace_code_t 'code' member and +- * a kbase_ktrace_flag_t 'flags' member ++ * union kbase_ktrace_backend - backend specific part of a trace message. ++ * At the very least, this must contain a kbase_ktrace_code_t 'code' member ++ * and a kbase_ktrace_flag_t 'flags' inside a "gpu" sub-struct. Should a ++ * backend need several sub structs in its union to optimize the data storage ++ * for different message types, then it can use a "common initial sequence" to ++ * allow 'flags' and 'code' to pack optimally without corrupting them. ++ * Different backends need not share common initial sequences between them, they ++ * only need to ensure they have gpu.flags and gpu.code members, it ++ * is up to the backend then how to order these. + */ +-struct kbase_ktrace_backend; ++union kbase_ktrace_backend; ++ ++#endif /* KBASE_KTRACE_TARGET_RBUF */ + ++#if MALI_USE_CSF ++#include "debug/backend/mali_kbase_debug_ktrace_defs_csf.h" ++#else + #include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" ++#endif + ++#if KBASE_KTRACE_TARGET_RBUF + /* Indicates if the trace message has backend related info. + * + * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg +@@ -90,7 +107,14 @@ struct kbase_ktrace_backend; + */ + #define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) + +-#define KBASE_KTRACE_SHIFT 8 /* 256 entries */ ++/* Collect all the common flags together for debug checking */ ++#define KBASE_KTRACE_FLAG_COMMON_ALL \ ++ (KBASE_KTRACE_FLAG_BACKEND) ++ ++#define KBASE_KTRACE_FLAG_ALL \ ++ (KBASE_KTRACE_FLAG_COMMON_ALL | KBASE_KTRACE_FLAG_BACKEND_ALL) ++ ++#define KBASE_KTRACE_SHIFT (9) /* 512 entries */ + #define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) + #define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) + +@@ -121,24 +145,23 @@ enum kbase_ktrace_code { + * added. + * @cpu: indicates which CPU the @thread_id was scheduled on when the + * trace message was added. +- * @kctx: Pointer to the kbase context for which the trace message was +- * added. Will be NULL for certain trace messages associated with +- * the &kbase_device itself, such as power management events. +- * Will point to the appropriate context corresponding to +- * backend-specific events. ++ * @kctx_tgid: Thread group ID of the &kbase_context associated with the ++ * message, or 0 if none associated. ++ * @kctx_id: Unique identifier of the &kbase_context associated with the ++ * message. Only valid if @kctx_tgid != 0. + * @info_val: value specific to the type of event being traced. Refer to the +- * specific code in enum kbase_ktrace_code ++ * specific code in enum kbase_ktrace_code. + * @backend: backend-specific trace information. All backends must implement +- * a minimum common set of members ++ * a minimum common set of members. + */ + struct kbase_ktrace_msg { + struct timespec64 timestamp; + u32 thread_id; + u32 cpu; +- void *kctx; ++ pid_t kctx_tgid; ++ u32 kctx_id; + u64 info_val; +- +- struct kbase_ktrace_backend backend; ++ union kbase_ktrace_backend backend; + }; + + struct kbase_ktrace { +@@ -148,5 +171,17 @@ struct kbase_ktrace { + struct kbase_ktrace_msg *rbuf; + }; + ++ ++static inline void kbase_ktrace_compiletime_asserts(void) ++{ ++ /* See also documentation of enum kbase_ktrace_code */ ++ compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || ++ KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), ++ "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); ++ compiletime_assert((KBASE_KTRACE_FLAG_BACKEND_ALL & KBASE_KTRACE_FLAG_COMMON_ALL) == 0, ++ "KTrace backend flags intersect with KTrace common flags"); ++ ++} ++ + #endif /* KBASE_KTRACE_TARGET_RBUF */ + #endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h +index e450760..d9bd351 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h +index 18e4f7c..b56dec4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -31,21 +30,29 @@ + #if KBASE_KTRACE_TARGET_FTRACE + + DECLARE_EVENT_CLASS(mali_add_template, +- TP_PROTO(u64 info_val), +- TP_ARGS(info_val), ++ TP_PROTO(struct kbase_context *kctx, u64 info_val), ++ TP_ARGS(kctx, info_val), + TP_STRUCT__entry( ++ __field(pid_t, kctx_tgid) ++ __field(u32, kctx_id) + __field(u64, info_val) + ), + TP_fast_assign( ++ __entry->kctx_id = (kctx) ? kctx->id : 0u; ++ __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + __entry->info_val = info_val; + ), +- TP_printk("info=0x%llx", __entry->info_val) ++ TP_printk("kctx=%d_%u info=0x%llx", __entry->kctx_tgid, ++ __entry->kctx_id, __entry->info_val) + ); + ++/* DEFINE_MALI_ADD_EVENT is available also to backends for backend-specific ++ * simple trace codes ++ */ + #define DEFINE_MALI_ADD_EVENT(name) \ + DEFINE_EVENT(mali_add_template, mali_##name, \ +- TP_PROTO(u64 info_val), \ +- TP_ARGS(info_val)) ++ TP_PROTO(struct kbase_context *kctx, u64 info_val), \ ++ TP_ARGS(kctx, info_val)) + DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); + DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); + DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); +@@ -78,6 +85,7 @@ DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); + DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); + DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); + DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); ++DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_L2); + DEFINE_MALI_ADD_EVENT(PM_GPU_ON); + DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); + DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); +@@ -89,10 +97,20 @@ DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); + DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); + DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); + DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT + +-#undef DEFINE_MALI_ADD_EVENT ++DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST); ++DEFINE_MALI_ADD_EVENT(ARB_VM_STATE); ++DEFINE_MALI_ADD_EVENT(ARB_VM_EVT); + +-#include "mali_kbase_debug_linux_ktrace_jm.h" ++#endif ++#if MALI_USE_CSF ++#include "backend/mali_kbase_debug_linux_ktrace_csf.h" ++#else ++#include "backend/mali_kbase_debug_linux_ktrace_jm.h" ++#endif ++ ++#undef DEFINE_MALI_ADD_EVENT + + #endif /* KBASE_KTRACE_TARGET_FTRACE */ + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile b/dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild +similarity index 56% +rename from dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile +rename to dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild +index d848e87..90e7024 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,21 +16,18 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- +-# linux build system bootstrap for out-of-tree module + +-# default to building for the host +-ARCH ?= $(shell uname -m) ++mali_kbase-y += \ ++ device/mali_kbase_device.o \ ++ device/mali_kbase_device_hw.o + +-ifeq ($(KDIR),) +-$(error Must specify KDIR to point to the kernel to target)) ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += \ ++ device/backend/mali_kbase_device_csf.o \ ++ device/backend/mali_kbase_device_hw_csf.o ++else ++ mali_kbase-y += \ ++ device/backend/mali_kbase_device_jm.o \ ++ device/backend/mali_kbase_device_hw_jm.o + endif +- +-all: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS=-I$(CURDIR)/../include modules +- +-clean: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c +new file mode 100644 +index 0000000..0c5052b +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c +@@ -0,0 +1,464 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * When a kbase driver is removed, terminate CSF firmware and hardware counter ++ * components. ++ */ ++static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) ++{ ++ if (kbdev->csf.firmware_inited) { ++ kbase_vinstr_term(kbdev->vinstr_ctx); ++ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); ++ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); ++ kbase_csf_firmware_term(kbdev); ++ } ++} ++ ++/** ++ * kbase_backend_late_init - Perform any backend-specific initialization. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, or an error code on failure. ++ */ ++static int kbase_backend_late_init(struct kbase_device *kbdev) ++{ ++ int err; ++ ++ err = kbase_hwaccess_pm_init(kbdev); ++ if (err) ++ return err; ++ ++ err = kbase_reset_gpu_init(kbdev); ++ if (err) ++ goto fail_reset_gpu_init; ++ ++ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); ++ if (err) ++ goto fail_pm_powerup; ++ ++ err = kbase_backend_timer_init(kbdev); ++ if (err) ++ goto fail_timer; ++ ++#ifdef CONFIG_MALI_DEBUG ++#ifndef CONFIG_MALI_NO_MALI ++ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { ++ dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); ++ err = -EINVAL; ++ goto fail_interrupt_test; ++ } ++#endif /* !CONFIG_MALI_NO_MALI */ ++#endif /* CONFIG_MALI_DEBUG */ ++ ++ kbase_ipa_control_init(kbdev); ++ ++ /* Initialise the metrics subsystem, it couldn't be initialized earlier ++ * due to dependency on kbase_ipa_control. ++ */ ++ err = kbasep_pm_metrics_init(kbdev); ++ if (err) ++ goto fail_pm_metrics_init; ++ ++ /* Do the initialisation of devfreq. ++ * Devfreq needs backend_timer_init() for completion of its ++ * initialisation and it also needs to catch the first callback ++ * occurrence of the runtime_suspend event for maintaining state ++ * coherence with the backend power management, hence needs to be ++ * placed before the kbase_pm_context_idle(). ++ */ ++ err = kbase_backend_devfreq_init(kbdev); ++ if (err) ++ goto fail_devfreq_init; ++ ++ /* Update gpuprops with L2_FEATURES if applicable */ ++ err = kbase_gpuprops_update_l2_features(kbdev); ++ if (err) ++ goto fail_update_l2_features; ++ ++ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); ++ ++ kbase_pm_context_idle(kbdev); ++ ++ mutex_init(&kbdev->fw_load_lock); ++ ++ return 0; ++ ++fail_update_l2_features: ++ kbase_backend_devfreq_term(kbdev); ++fail_devfreq_init: ++ kbasep_pm_metrics_term(kbdev); ++fail_pm_metrics_init: ++ kbase_ipa_control_term(kbdev); ++ ++#ifdef CONFIG_MALI_DEBUG ++#ifndef CONFIG_MALI_NO_MALI ++fail_interrupt_test: ++#endif /* !CONFIG_MALI_NO_MALI */ ++#endif /* CONFIG_MALI_DEBUG */ ++ ++ kbase_backend_timer_term(kbdev); ++fail_timer: ++ kbase_pm_context_idle(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++fail_pm_powerup: ++ kbase_reset_gpu_term(kbdev); ++fail_reset_gpu_init: ++ kbase_hwaccess_pm_term(kbdev); ++ ++ return err; ++} ++ ++/** ++ * kbase_backend_late_term - Perform any backend-specific termination. ++ * @kbdev: Device pointer ++ */ ++static void kbase_backend_late_term(struct kbase_device *kbdev) ++{ ++ kbase_backend_devfreq_term(kbdev); ++ kbasep_pm_metrics_term(kbdev); ++ kbase_ipa_control_term(kbdev); ++ kbase_hwaccess_pm_halt(kbdev); ++ kbase_reset_gpu_term(kbdev); ++ kbase_hwaccess_pm_term(kbdev); ++} ++ ++/** ++ * kbase_csf_early_init - Early initialization for firmware & scheduler. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, error code otherwise. ++ */ ++static int kbase_csf_early_init(struct kbase_device *kbdev) ++{ ++ int err = kbase_csf_firmware_early_init(kbdev); ++ ++ if (err) ++ return err; ++ ++ err = kbase_csf_scheduler_early_init(kbdev); ++ ++ return err; ++} ++ ++/** ++ * kbase_csf_early_init - Early termination for firmware & scheduler. ++ * @kbdev: Device pointer ++ */ ++static void kbase_csf_early_term(struct kbase_device *kbdev) ++{ ++ kbase_csf_scheduler_early_term(kbdev); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend ++ * firmware interface. ++ * @kbdev: Device pointer ++ */ ++static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_csf_if_fw_create( ++ kbdev, &kbdev->hwcnt_backend_csf_if_fw); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend ++ * firmware interface. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend. ++ * @kbdev: Device pointer ++ */ ++ ++static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_csf_create( ++ &kbdev->hwcnt_backend_csf_if_fw, ++ KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, ++ &kbdev->hwcnt_gpu_iface); ++} ++ ++/** ++ * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend. ++ * @kbdev: Device pointer ++ */ ++static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); ++} ++ ++static const struct kbase_device_init dev_init[] = { ++ { assign_irqs, NULL, "IRQ search failed" }, ++ { registers_map, registers_unmap, "Register map failed" }, ++ { power_control_init, power_control_term, ++ "Power control initialization failed" }, ++ { kbase_device_io_history_init, kbase_device_io_history_term, ++ "Register access history initialization failed" }, ++ { kbase_device_early_init, kbase_device_early_term, ++ "Early device initialization failed" }, ++ { kbase_device_populate_max_freq, NULL, ++ "Populating max frequency failed" }, ++ { kbase_device_misc_init, kbase_device_misc_term, ++ "Miscellaneous device initialization failed" }, ++ { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, ++ "Priority control manager initialization failed" }, ++ { kbase_ctx_sched_init, kbase_ctx_sched_term, ++ "Context scheduler initialization failed" }, ++ { kbase_mem_init, kbase_mem_term, ++ "Memory subsystem initialization failed" }, ++ { kbase_csf_protected_memory_init, kbase_csf_protected_memory_term, ++ "Protected memory allocator initialization failed" }, ++ { kbase_device_coherency_init, NULL, "Device coherency init failed" }, ++ { kbase_protected_mode_init, kbase_protected_mode_term, ++ "Protected mode subsystem initialization failed" }, ++ { kbase_device_list_init, kbase_device_list_term, ++ "Device list setup failed" }, ++ { kbase_device_timeline_init, kbase_device_timeline_term, ++ "Timeline stream initialization failed" }, ++ { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, ++ "Clock rate trace manager initialization failed" }, ++ { kbase_device_hwcnt_backend_csf_if_init, ++ kbase_device_hwcnt_backend_csf_if_term, ++ "GPU hwcnt backend CSF interface creation failed" }, ++ { kbase_device_hwcnt_backend_csf_init, ++ kbase_device_hwcnt_backend_csf_term, ++ "GPU hwcnt backend creation failed" }, ++ { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, ++ "GPU hwcnt context initialization failed" }, ++ { kbase_backend_late_init, kbase_backend_late_term, ++ "Late backend initialization failed" }, ++ { kbase_csf_early_init, kbase_csf_early_term, ++ "Early CSF initialization failed" }, ++ { NULL, kbase_device_firmware_hwcnt_term, NULL }, ++#ifdef MALI_KBASE_BUILD ++ { kbase_device_debugfs_init, kbase_device_debugfs_term, ++ "DebugFS initialization failed" }, ++ /* Sysfs init needs to happen before registering the device with ++ * misc_register(), otherwise it causes a race condition between ++ * registering the device and a uevent event being generated for ++ * userspace, causing udev rules to run which might expect certain ++ * sysfs attributes present. As a result of the race condition ++ * we avoid, some Mali sysfs entries may have appeared to udev ++ * to not exist. ++ * For more information, see ++ * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the ++ * paragraph that starts with "Word of warning", currently the ++ * second-last paragraph. ++ */ ++ { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, ++ { kbase_device_misc_register, kbase_device_misc_deregister, ++ "Misc device registration failed" }, ++ { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, ++ "GPU property population failed" }, ++ { kbase_device_late_init, kbase_device_late_term, ++ "Late device initialization failed" }, ++#endif ++}; ++ ++static void kbase_device_term_partial(struct kbase_device *kbdev, ++ unsigned int i) ++{ ++ while (i-- > 0) { ++ if (dev_init[i].term) ++ dev_init[i].term(kbdev); ++ } ++} ++ ++void kbase_device_term(struct kbase_device *kbdev) ++{ ++ kbdev->csf.mali_file_inode = NULL; ++ kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); ++ kbase_mem_halt(kbdev); ++} ++ ++int kbase_device_init(struct kbase_device *kbdev) ++{ ++ int err = 0; ++ unsigned int i = 0; ++ ++ dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); ++ ++ kbase_device_id_init(kbdev); ++ kbase_disjoint_init(kbdev); ++ ++ for (i = 0; i < ARRAY_SIZE(dev_init); i++) { ++ if (dev_init[i].init) { ++ err = dev_init[i].init(kbdev); ++ if (err) { ++ dev_err(kbdev->dev, "%s error = %d\n", ++ dev_init[i].err_mes, err); ++ kbase_device_term_partial(kbdev, i); ++ break; ++ } ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * kbase_device_hwcnt_csf_deferred_init - Initialize CSF deferred HWC components ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * Hardware counter components depending on firmware are initialized after CSF ++ * firmware is loaded. ++ * ++ * @return 0 on success. An error code on failure. ++ */ ++static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ ++ /* For CSF GPUs, HWC metadata needs to query information from CSF ++ * firmware, so the initialization of HWC metadata only can be called ++ * after firmware initialized, but firmware initialization depends on ++ * HWC backend initialization, so we need to separate HWC backend ++ * metadata initialization from HWC backend initialization. ++ */ ++ ret = kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "GPU hwcnt backend metadata creation failed"); ++ return ret; ++ } ++ ++ ret = kbase_hwcnt_virtualizer_init( ++ kbdev->hwcnt_gpu_ctx, ++ KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, ++ &kbdev->hwcnt_gpu_virt); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "GPU hwcnt virtualizer initialization failed"); ++ goto virt_fail; ++ } ++ ++ ret = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); ++ if (ret) { ++ dev_err(kbdev->dev, ++ "Virtual instrumentation initialization failed"); ++ goto vinstr_fail; ++ } ++ ++ return ret; ++ ++vinstr_fail: ++ kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); ++ ++virt_fail: ++ kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); ++ return ret; ++} ++ ++/** ++ * kbase_csf_firmware_deferred_init - Load and initialize CSF firmware ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * Called when a device file is opened for the first time. ++ * To meet Android GKI vendor guideline, firmware load is deferred at ++ * the time when @ref kbase_open is called for the first time. ++ * ++ * @return 0 on success. An error code on failure. ++ */ ++static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev) ++{ ++ int err = 0; ++ ++ lockdep_assert_held(&kbdev->fw_load_lock); ++ ++ kbase_pm_context_active(kbdev); ++ ++ err = kbase_csf_firmware_init(kbdev); ++ if (!err) { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->pm.backend.mcu_state = KBASE_MCU_ON; ++ kbdev->csf.firmware_inited = true; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } else { ++ dev_err(kbdev->dev, "Firmware initialization failed"); ++ } ++ ++ kbase_pm_context_idle(kbdev); ++ ++ return err; ++} ++ ++int kbase_device_firmware_init_once(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ ++ mutex_lock(&kbdev->fw_load_lock); ++ ++ if (!kbdev->csf.firmware_inited) { ++ ret = kbase_csf_firmware_deferred_init(kbdev); ++ if (ret) ++ goto out; ++ ++ ret = kbase_device_hwcnt_csf_deferred_init(kbdev); ++ if (ret) { ++ kbase_csf_firmware_term(kbdev); ++ goto out; ++ } ++ ++ kbase_csf_debugfs_init(kbdev); ++ } ++ ++out: ++ mutex_unlock(&kbdev->fw_load_lock); ++ ++ return ret; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c +new file mode 100644 +index 0000000..3fce637 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c +@@ -0,0 +1,163 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * kbase_report_gpu_fault - Report a GPU fault of the device. ++ * ++ * @kbdev: Kbase device pointer ++ * @status: Fault status ++ * @as_nr: Faulty address space ++ * @as_valid: true if address space is valid ++ * ++ * This function is called from the interrupt handler when a GPU fault occurs. ++ */ ++static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, bool as_valid) ++{ ++ u64 address = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; ++ ++ address |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++ ++ /* Report GPU fault for all contexts in case either ++ * the address space is invalid or it's MCU address space. ++ */ ++ meson_gpu_fault ++; ++ kbase_mmu_gpu_fault_interrupt(kbdev, status, as_nr, address, as_valid); ++} ++ ++static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) ++{ ++ const u32 status = kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTSTATUS)); ++ const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; ++ const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> ++ GPU_FAULTSTATUS_JASID_SHIFT; ++ bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) == ++ GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT; ++ ++ if (bus_fault) { ++ /* If as_valid, reset gpu when ASID is for MCU. */ ++ if (!as_valid || (as_nr == MCU_AS_NR)) { ++ kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); ++ ++ dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } else { ++ /* Handle Bus fault */ ++ if (kbase_mmu_bus_fault_interrupt(kbdev, status, as_nr)) ++ dev_warn(kbdev->dev, ++ "fail to handle GPU bus fault ...\n"); ++ } ++ } else ++ kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); ++} ++ ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) ++{ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); ++ if (val & GPU_FAULT) ++ kbase_gpu_fault_interrupt(kbdev); ++ ++ if (val & GPU_PROTECTED_FAULT) { ++ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; ++ unsigned long flags; ++ ++ dev_err_ratelimited(kbdev->dev, "GPU fault in protected mode"); ++ ++ /* Mask the protected fault interrupt to avoid the potential ++ * deluge of such interrupts. It will be unmasked on GPU reset. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ GPU_IRQ_REG_ALL & ~GPU_PROTECTED_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use( ++ kbdev))) { ++ struct base_gpu_queue_group_error const ++ err_payload = { .error_type = ++ BASE_GPU_QUEUE_GROUP_ERROR_FATAL, ++ .payload = { ++ .fatal_group = { ++ .status = ++ GPU_EXCEPTION_TYPE_SW_FAULT_0, ++ } } }; ++ ++ scheduler->active_protm_grp->faulted = true; ++ kbase_csf_add_group_fatal_error( ++ scheduler->active_protm_grp, &err_payload); ++ kbase_event_wakeup(scheduler->active_protm_grp->kctx); ++ } ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++ ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++ } ++ ++ if (val & RESET_COMPLETED) ++ kbase_pm_reset_done(kbdev); ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); ++ ++ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must ++ * be called after the IRQ has been cleared. This is because it might ++ * trigger further power transitions and we don't want to miss the ++ * interrupt raised to notify us that these further transitions have ++ * finished. The same applies to kbase_clean_caches_done() - if another ++ * clean was queued, it might trigger another clean, which might ++ * generate another interrupt which shouldn't be missed. ++ */ ++ ++ if (val & CLEAN_CACHES_COMPLETED) ++ kbase_clean_caches_done(kbdev); ++ ++ if (val & (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)) { ++ kbase_pm_power_changed(kbdev); ++ } else if (val & CLEAN_CACHES_COMPLETED) { ++ /* If cache line evict messages can be lost when shader cores ++ * power down then we need to flush the L2 cache before powering ++ * down cores. When the flush completes, the shaders' state ++ * machine needs to be re-invoked to proceed with powering down ++ * cores. ++ */ ++ if (kbdev->pm.backend.l2_always_on || ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) ++ kbase_pm_power_changed(kbdev); ++ } ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c +new file mode 100644 +index 0000000..384e385 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c +@@ -0,0 +1,98 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * kbase_report_gpu_fault - Report a GPU fault. ++ * @kbdev: Kbase device pointer ++ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS ++ * was also set ++ * ++ * This function is called from the interrupt handler when a GPU fault occurs. ++ * It reports the details of the fault using dev_warn(). ++ */ ++static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) ++{ ++ u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); ++ u64 address = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; ++ ++ address |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++ meson_gpu_fault ++; ++ dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", ++ status, ++ kbase_gpu_exception_name(status & 0xFF), ++ address); ++ if (multiple) ++ dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); ++} ++ ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) ++{ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); ++ if (val & GPU_FAULT) ++ kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); ++ ++ if (val & RESET_COMPLETED) ++ kbase_pm_reset_done(kbdev); ++ ++ if (val & PRFCNT_SAMPLE_COMPLETED) ++ kbase_instr_hwcnt_sample_done(kbdev); ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); ++ ++ /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must ++ * be called after the IRQ has been cleared. This is because it might ++ * trigger further power transitions and we don't want to miss the ++ * interrupt raised to notify us that these further transitions have ++ * finished. The same applies to kbase_clean_caches_done() - if another ++ * clean was queued, it might trigger another clean, which might ++ * generate another interrupt which shouldn't be missed. ++ */ ++ ++ if (val & CLEAN_CACHES_COMPLETED) ++ kbase_clean_caches_done(kbdev); ++ ++ if (val & POWER_CHANGED_ALL) { ++ kbase_pm_power_changed(kbdev); ++ } else if (val & CLEAN_CACHES_COMPLETED) { ++ /* If cache line evict messages can be lost when shader cores ++ * power down then we need to flush the L2 cache before powering ++ * down cores. When the flush completes, the shaders' state ++ * machine needs to be re-invoked to proceed with powering down ++ * cores. ++ */ ++ if (kbdev->pm.backend.l2_always_on || ++ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) ++ kbase_pm_power_changed(kbdev); ++ } ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c +index fbba2e7..6a6ab60 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,21 +17,17 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#include "../mali_kbase_device_internal.h" +-#include "../mali_kbase_device.h" ++#include ++#include ++#include + + #include + #include + #include + #include + +-#ifdef CONFIG_MALI_NO_MALI +-#include +-#endif + + #ifdef CONFIG_MALI_ARBITER_SUPPORT + #include +@@ -43,6 +39,7 @@ + #include + #include + #include ++#include + + /** + * kbase_backend_late_init - Perform any backend-specific initialization. +@@ -95,9 +92,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) + if (err) + goto fail_devfreq_init; + +- /* Idle the GPU and/or cores, if the policy wants it to */ +- kbase_pm_context_idle(kbdev); +- + /* Update gpuprops with L2_FEATURES if applicable */ + err = kbase_gpuprops_update_l2_features(kbdev); + if (err) +@@ -105,9 +99,15 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + ++ /* Idle the GPU and/or cores, if the policy wants it to */ ++ kbase_pm_context_idle(kbdev); ++ ++ mutex_init(&kbdev->fw_load_lock); ++ + return 0; + + fail_update_l2_features: ++ kbase_backend_devfreq_term(kbdev); + fail_devfreq_init: + kbase_job_slot_term(kbdev); + fail_job_slot: +@@ -120,6 +120,7 @@ fail_interrupt_test: + + kbase_backend_timer_term(kbdev); + fail_timer: ++ kbase_pm_context_idle(kbdev); + kbase_hwaccess_pm_halt(kbdev); + fail_pm_powerup: + kbase_reset_gpu_term(kbdev); +@@ -144,57 +145,65 @@ static void kbase_backend_late_term(struct kbase_device *kbdev) + kbase_hwaccess_pm_term(kbdev); + } + ++static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) ++{ ++ return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); ++} ++ ++static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) ++{ ++ kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); ++} ++ + static const struct kbase_device_init dev_init[] = { +-#ifdef CONFIG_MALI_NO_MALI +- {kbase_gpu_device_create, kbase_gpu_device_destroy, +- "Dummy model initialization failed"}, +-#else +- {assign_irqs, NULL, +- "IRQ search failed"}, +- {registers_map, registers_unmap, +- "Register map failed"}, +-#endif +- {kbase_device_io_history_init, kbase_device_io_history_term, +- "Register access history initialization failed"}, +- {kbase_device_pm_init, kbase_device_pm_term, +- "Power management initialization failed"}, +- {kbase_device_early_init, kbase_device_early_term, +- "Early device initialization failed"}, +- {kbase_device_populate_max_freq, NULL, +- "Populating max frequency failed"}, +- {kbase_device_misc_init, kbase_device_misc_term, +- "Miscellaneous device initialization failed"}, +- {kbase_ctx_sched_init, kbase_ctx_sched_term, +- "Context scheduler initialization failed"}, +- {kbase_mem_init, kbase_mem_term, +- "Memory subsystem initialization failed"}, +- {kbase_device_coherency_init, NULL, +- "Device coherency init failed"}, +- {kbase_protected_mode_init, kbase_protected_mode_term, +- "Protected mode subsystem initialization failed"}, +- {kbase_device_list_init, kbase_device_list_term, +- "Device list setup failed"}, +- {kbasep_js_devdata_init, kbasep_js_devdata_term, +- "Job JS devdata initialization failed"}, +- {kbase_device_timeline_init, kbase_device_timeline_term, +- "Timeline stream initialization failed"}, +- {kbase_device_hwcnt_backend_gpu_init, +- kbase_device_hwcnt_backend_gpu_term, +- "GPU hwcnt backend creation failed"}, +- {kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, +- "GPU hwcnt context initialization failed"}, +- {kbase_device_hwcnt_virtualizer_init, +- kbase_device_hwcnt_virtualizer_term, +- "GPU hwcnt virtualizer initialization failed"}, +- {kbase_device_vinstr_init, kbase_device_vinstr_term, +- "Virtual instrumentation initialization failed"}, +- {kbase_backend_late_init, kbase_backend_late_term, +- "Late backend initialization failed"}, ++ { assign_irqs, NULL, "IRQ search failed" }, ++ { registers_map, registers_unmap, "Register map failed" }, ++ { kbase_device_io_history_init, kbase_device_io_history_term, ++ "Register access history initialization failed" }, ++ { kbase_device_pm_init, kbase_device_pm_term, ++ "Power management initialization failed" }, ++ { kbase_device_early_init, kbase_device_early_term, ++ "Early device initialization failed" }, ++ { kbase_device_populate_max_freq, NULL, ++ "Populating max frequency failed" }, ++ { kbase_device_misc_init, kbase_device_misc_term, ++ "Miscellaneous device initialization failed" }, ++ { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, ++ "Priority control manager initialization failed" }, ++ { kbase_ctx_sched_init, kbase_ctx_sched_term, ++ "Context scheduler initialization failed" }, ++ { kbase_mem_init, kbase_mem_term, ++ "Memory subsystem initialization failed" }, ++ { kbase_device_coherency_init, NULL, "Device coherency init failed" }, ++ { kbase_protected_mode_init, kbase_protected_mode_term, ++ "Protected mode subsystem initialization failed" }, ++ { kbase_device_list_init, kbase_device_list_term, ++ "Device list setup failed" }, ++ { kbasep_js_devdata_init, kbasep_js_devdata_term, ++ "Job JS devdata initialization failed" }, ++ { kbase_device_timeline_init, kbase_device_timeline_term, ++ "Timeline stream initialization failed" }, ++ { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, ++ "Clock rate trace manager initialization failed" }, ++ { kbase_instr_backend_init, kbase_instr_backend_term, ++ "Instrumentation backend initialization failed" }, ++ { kbase_device_hwcnt_backend_jm_init, ++ kbase_device_hwcnt_backend_jm_term, ++ "GPU hwcnt backend creation failed" }, ++ { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, ++ "GPU hwcnt context initialization failed" }, ++ { kbase_device_hwcnt_virtualizer_init, ++ kbase_device_hwcnt_virtualizer_term, ++ "GPU hwcnt virtualizer initialization failed" }, ++ { kbase_device_vinstr_init, kbase_device_vinstr_term, ++ "Virtual instrumentation initialization failed" }, ++ { kbase_backend_late_init, kbase_backend_late_term, ++ "Late backend initialization failed" }, + #ifdef MALI_KBASE_BUILD +- {kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, +- "Job fault debug initialization failed"}, +- {kbase_device_debugfs_init, kbase_device_debugfs_term, +- "DebugFS initialization failed"}, ++ { kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, ++ "Job fault debug initialization failed" }, ++ { kbase_device_debugfs_init, kbase_device_debugfs_term, ++ "DebugFS initialization failed" }, + /* Sysfs init needs to happen before registering the device with + * misc_register(), otherwise it causes a race condition between + * registering the device and a uevent event being generated for +@@ -207,17 +216,15 @@ static const struct kbase_device_init dev_init[] = { + * paragraph that starts with "Word of warning", currently the + * second-last paragraph. + */ +- {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, +- {kbase_device_misc_register, kbase_device_misc_deregister, +- "Misc device registration failed"}, +-#ifdef CONFIG_MALI_BUSLOG +- {buslog_init, buslog_term, "Bus log client registration failed"}, ++ { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, ++ { kbase_device_misc_register, kbase_device_misc_deregister, ++ "Misc device registration failed" }, ++ { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, ++ "GPU property population failed" }, + #endif +- {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, +- "GPU property population failed"}, +-#endif +- {kbase_dummy_job_wa_load, kbase_dummy_job_wa_cleanup, +- "Dummy job workaround load failed"}, ++ { NULL, kbase_dummy_job_wa_cleanup, NULL }, ++ { kbase_device_late_init, kbase_device_late_term, ++ "Late device initialization failed" }, + }; + + static void kbase_device_term_partial(struct kbase_device *kbdev, +@@ -247,14 +254,34 @@ int kbase_device_init(struct kbase_device *kbdev) + kbase_disjoint_init(kbdev); + + for (i = 0; i < ARRAY_SIZE(dev_init); i++) { +- err = dev_init[i].init(kbdev); +- if (err) { +- dev_err(kbdev->dev, "%s error = %d\n", ++ if (dev_init[i].init) { ++ err = dev_init[i].init(kbdev); ++ if (err) { ++ if (err != -EPROBE_DEFER) ++ dev_err(kbdev->dev, "%s error = %d\n", + dev_init[i].err_mes, err); +- kbase_device_term_partial(kbdev, i); +- break; ++ kbase_device_term_partial(kbdev, i); ++ break; ++ } + } + } + + return err; + } ++ ++int kbase_device_firmware_init_once(struct kbase_device *kbdev) ++{ ++ int ret = 0; ++ ++ mutex_lock(&kbdev->fw_load_lock); ++ ++ if (!kbdev->dummy_job_wa_loaded) { ++ ret = kbase_dummy_job_wa_load(kbdev); ++ if (!ret) ++ kbdev->dummy_job_wa_loaded = true; ++ } ++ ++ mutex_unlock(&kbdev->fw_load_lock); ++ ++ return ret; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c +index 76f14e5..1ebd8aa 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Base kernel device APIs + */ +@@ -34,12 +30,14 @@ + #include + #include + #include ++#include + + #include + #include + #include + #include + #include ++#include + + #include + #include "mali_kbase_vinstr.h" +@@ -50,6 +48,7 @@ + #include "mali_kbase_device_internal.h" + #include "backend/gpu/mali_kbase_pm_internal.h" + #include "backend/gpu/mali_kbase_irq_internal.h" ++#include "mali_kbase_regs_history_debugfs.h" + + #ifdef CONFIG_MALI_ARBITER_SUPPORT + #include "arbiter/mali_kbase_arbiter_pm.h" +@@ -75,64 +74,152 @@ struct kbase_device *kbase_device_alloc(void) + return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); + } + +-static int kbase_device_as_init(struct kbase_device *kbdev, int i) ++/** ++ * kbase_device_all_as_init() - Initialise address space objects of the device. ++ * ++ * @kbdev: Pointer to kbase device. ++ * ++ * Return: 0 on success otherwise non-zero. ++ */ ++static int kbase_device_all_as_init(struct kbase_device *kbdev) + { +- kbdev->as[i].number = i; +- kbdev->as[i].bf_data.addr = 0ULL; +- kbdev->as[i].pf_data.addr = 0ULL; ++ int i, err = 0; + +- kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); +- if (!kbdev->as[i].pf_wq) +- return -EINVAL; ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { ++ err = kbase_mmu_as_init(kbdev, i); ++ if (err) ++ break; ++ } + +- INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); +- INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); ++ if (err) { ++ while (i-- > 0) ++ kbase_mmu_as_term(kbdev, i); ++ } + +- return 0; ++ return err; + } + +-static void kbase_device_as_term(struct kbase_device *kbdev, int i) ++static void kbase_device_all_as_term(struct kbase_device *kbdev) + { +- destroy_workqueue(kbdev->as[i].pf_wq); ++ int i; ++ ++ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) ++ kbase_mmu_as_term(kbdev, i); + } + +-static int kbase_device_all_as_init(struct kbase_device *kbdev) ++int kbase_device_pcm_dev_init(struct kbase_device *const kbdev) + { +- int i, err; ++ int err = 0; + +- for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { +- err = kbase_device_as_init(kbdev, i); +- if (err) +- goto free_workqs; +- } ++#if IS_ENABLED(CONFIG_OF) ++ struct device_node *prio_ctrl_node; + +- return 0; +- +-free_workqs: +- for (; i > 0; i--) +- kbase_device_as_term(kbdev, i); ++ /* Check to see whether or not a platform specific priority control manager ++ * is available. ++ */ ++ prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node, ++ "priority-control-manager", 0); ++ if (!prio_ctrl_node) { ++ dev_info(kbdev->dev, ++ "No priority control manager is configured"); ++ } else { ++ struct platform_device *const pdev = ++ of_find_device_by_node(prio_ctrl_node); ++ ++ if (!pdev) { ++ dev_err(kbdev->dev, ++ "The configured priority control manager was not found"); ++ } else { ++ struct priority_control_manager_device *pcm_dev = ++ platform_get_drvdata(pdev); ++ if (!pcm_dev) { ++ dev_info(kbdev->dev, "Priority control manager is not ready"); ++ err = -EPROBE_DEFER; ++ } else if (!try_module_get(pcm_dev->owner)) { ++ dev_err(kbdev->dev, "Failed to get priority control manager module"); ++ err = -ENODEV; ++ } else { ++ dev_info(kbdev->dev, "Priority control manager successfully loaded"); ++ kbdev->pcm_dev = pcm_dev; ++ } ++ } ++ of_node_put(prio_ctrl_node); ++ } ++#endif /* CONFIG_OF */ + + return err; + } + +-static void kbase_device_all_as_term(struct kbase_device *kbdev) ++void kbase_device_pcm_dev_term(struct kbase_device *const kbdev) + { +- int i; ++ if (kbdev->pcm_dev) ++ module_put(kbdev->pcm_dev->owner); ++} + +- for (i = 0; i < kbdev->nr_hw_address_spaces; i++) +- kbase_device_as_term(kbdev, i); ++#define KBASE_PAGES_TO_KIB(pages) (((unsigned int)pages) << (PAGE_SHIFT - 10)) ++ ++/** ++ * mali_oom_notifier_handler - Mali driver out-of-memory handler ++ * ++ * @nb - notifier block - used to retrieve kbdev pointer ++ * @action - action (unused) ++ * @data - data pointer (unused) ++ * This function simply lists memory usage by the Mali driver, per GPU device, ++ * for diagnostic purposes. ++ */ ++static int mali_oom_notifier_handler(struct notifier_block *nb, ++ unsigned long action, void *data) ++{ ++ struct kbase_device *kbdev; ++ struct kbase_context *kctx = NULL; ++ unsigned long kbdev_alloc_total; ++ ++ if (WARN_ON(nb == NULL)) ++ return NOTIFY_BAD; ++ ++ kbdev = container_of(nb, struct kbase_device, oom_notifier_block); ++ ++ kbdev_alloc_total = ++ KBASE_PAGES_TO_KIB(atomic_read(&(kbdev->memdev.used_pages))); ++ ++ dev_err(kbdev->dev, "OOM notifier: dev %s %lu kB\n", kbdev->devname, ++ kbdev_alloc_total); ++ ++ mutex_lock(&kbdev->kctx_list_lock); ++ ++ list_for_each_entry (kctx, &kbdev->kctx_list, kctx_list_link) { ++ struct pid *pid_struct; ++ struct task_struct *task; ++ unsigned long task_alloc_total = ++ KBASE_PAGES_TO_KIB(atomic_read(&(kctx->used_pages))); ++ ++ rcu_read_lock(); ++ pid_struct = find_get_pid(kctx->pid); ++ task = pid_task(pid_struct, PIDTYPE_PID); ++ ++ dev_err(kbdev->dev, ++ "OOM notifier: tsk %s tgid (%u) pid (%u) %lu kB\n", ++ task ? task->comm : "[null task]", kctx->tgid, ++ kctx->pid, task_alloc_total); ++ ++ put_pid(pid_struct); ++ rcu_read_unlock(); ++ } ++ ++ mutex_unlock(&kbdev->kctx_list_lock); ++ return NOTIFY_OK; + } + + int kbase_device_misc_init(struct kbase_device * const kbdev) + { + int err; +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + struct device_node *np = NULL; + #endif /* CONFIG_ARM64 */ + + spin_lock_init(&kbdev->mmu_mask_change); + mutex_init(&kbdev->mmu_hw_mutex); +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { +@@ -153,6 +240,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) + } + } + #endif /* CONFIG_ARM64 */ ++ + /* Get the list of workarounds for issues on the current HW + * (identified by the GPU_ID register) + */ +@@ -169,11 +257,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) + if (err) + goto fail; + +- /* On Linux 4.0+, dma coherency is determined from device tree */ +-#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) +- set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); +-#endif +- + /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our + * device structure was created by device-tree + */ +@@ -194,9 +277,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) + + err = kbase_device_all_as_init(kbdev); + if (err) +- goto as_init_failed; +- +- spin_lock_init(&kbdev->hwcnt.lock); ++ goto dma_set_mask_failed; + + err = kbase_ktrace_init(kbdev); + if (err) +@@ -208,30 +289,28 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) + + atomic_set(&kbdev->ctx_num, 0); + +- err = kbase_instr_backend_init(kbdev); +- if (err) +- goto term_trace; +- + kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + + kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; + +- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) +- kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); +- else +- kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); ++ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); + + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + +- spin_lock_init(&kbdev->hwaccess_lock); ++ dev_dbg(kbdev->dev, "Registering mali_oom_notifier_handlern"); ++ kbdev->oom_notifier_block.notifier_call = mali_oom_notifier_handler; ++ err = register_oom_notifier(&kbdev->oom_notifier_block); + ++ if (err) { ++ dev_err(kbdev->dev, ++ "Unable to register OOM notifier for Mali - but will continue\n"); ++ kbdev->oom_notifier_block.notifier_call = NULL; ++ } + return 0; +-term_trace: +- kbase_ktrace_term(kbdev); ++ + term_as: + kbase_device_all_as_term(kbdev); +-as_init_failed: + dma_set_mask_failed: + fail: + return err; +@@ -247,11 +326,12 @@ void kbase_device_misc_term(struct kbase_device *kbdev) + kbase_debug_assert_register_hook(NULL, NULL); + #endif + +- kbase_instr_backend_term(kbdev); +- + kbase_ktrace_term(kbdev); + + kbase_device_all_as_term(kbdev); ++ ++ if (kbdev->oom_notifier_block.notifier_call) ++ unregister_oom_notifier(&kbdev->oom_notifier_block); + } + + void kbase_device_free(struct kbase_device *kbdev) +@@ -271,16 +351,6 @@ void kbase_increment_device_id(void) + kbase_dev_nr++; + } + +-int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev) +-{ +- return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); +-} +- +-void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev) +-{ +- kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); +-} +- + int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) + { + return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, +@@ -400,7 +470,18 @@ int kbase_device_early_init(struct kbase_device *kbdev) + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + ++ /* This spinlock has to be initialized before installing interrupt ++ * handlers that require to hold it to process interrupts. ++ */ ++ spin_lock_init(&kbdev->hwaccess_lock); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ if (kbdev->arb.arb_if) ++ err = kbase_arbiter_pm_install_interrupts(kbdev); ++ else ++ err = kbase_install_interrupts(kbdev); ++#else + err = kbase_install_interrupts(kbdev); ++#endif + if (err) + goto fail_interrupts; + +@@ -427,3 +508,17 @@ void kbase_device_early_term(struct kbase_device *kbdev) + kbase_pm_runtime_term(kbdev); + kbasep_platform_device_term(kbdev); + } ++ ++int kbase_device_late_init(struct kbase_device *kbdev) ++{ ++ int err; ++ ++ err = kbasep_platform_device_late_init(kbdev); ++ ++ return err; ++} ++ ++void kbase_device_late_term(struct kbase_device *kbdev) ++{ ++ kbasep_platform_device_late_term(kbdev); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h +index 16f1d70..517c16b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -47,6 +46,19 @@ void kbase_device_put_list(const struct list_head *dev_list); + */ + void kbase_increment_device_id(void); + ++/** ++ * kbase_device_firmware_init_once - Initialize firmware and HWC ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * ++ * When a device file is opened for the first time, ++ * load firmware and initialize hardware counter components. ++ * ++ * @return 0 on success. An error code on failure. ++ */ ++int kbase_device_firmware_init_once(struct kbase_device *kbdev); ++ + /** + * kbase_device_init - Device initialisation. + * +@@ -69,3 +81,109 @@ int kbase_device_init(struct kbase_device *kbdev); + * + */ + void kbase_device_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_reg_write - write to GPU register ++ * @kbdev: Kbase device pointer ++ * @offset: Offset of register ++ * @value: Value to write ++ * ++ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). ++ */ ++void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); ++ ++/** ++ * kbase_reg_read - read from GPU register ++ * @kbdev: Kbase device pointer ++ * @offset: Offset of register ++ * ++ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). ++ * ++ * Return: Value in desired register ++ */ ++u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); ++ ++/** ++ * kbase_is_gpu_removed() - Has the GPU been removed. ++ * @kbdev: Kbase device pointer ++ * ++ * When Kbase takes too long to give up the GPU, the Arbiter ++ * can remove it. This will then be followed by a GPU lost event. ++ * This function will return true if the GPU has been removed. ++ * When this happens register reads will be zero. A zero GPU_ID is ++ * invalid so this is used to detect when GPU is removed. ++ * ++ * Return: True if GPU removed ++ */ ++bool kbase_is_gpu_removed(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpu_start_cache_clean - Start a cache clean ++ * @kbdev: Kbase device ++ * ++ * Issue a cache clean and invalidate command to hardware. This function will ++ * take hwaccess_lock. ++ */ ++void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpu_start_cache_clean_nolock - Start a cache clean ++ * @kbdev: Kbase device ++ * ++ * Issue a cache clean and invalidate command to hardware. hwaccess_lock ++ * must be held by the caller. ++ */ ++void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish ++ * @kbdev: Kbase device ++ * ++ * This function will take hwaccess_lock, and may sleep. ++ */ ++void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache ++ * cleaning to finish ++ * @kbdev: Kbase device ++ * @wait_timeout_ms: Time in milliseconds, to wait for cache clean to complete. ++ * ++ * This function will take hwaccess_lock, and may sleep. This is supposed to be ++ * called from paths (like GPU reset) where an indefinite wait for the ++ * completion of cache clean operation can cause deadlock, as the operation may ++ * never complete. ++ * ++ * Return: 0 if successful or a negative error code on failure. ++ */ ++int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, ++ unsigned int wait_timeout_ms); ++ ++/** ++ * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is ++ * finished. Would also be called after ++ * the GPU reset. ++ * @kbdev: Kbase device ++ * ++ * Caller must hold the hwaccess_lock. ++ */ ++void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); ++ ++/** ++ * kbase_clean_caches_done - Issue preiously queued cache clean request or ++ * wake up the requester that issued cache clean. ++ * @kbdev: Kbase device ++ * ++ * Caller must hold the hwaccess_lock. ++ */ ++void kbase_clean_caches_done(struct kbase_device *kbdev); ++ ++/** ++ * kbase_gpu_interrupt - GPU interrupt handler ++ * @kbdev: Kbase device pointer ++ * @val: The value of the GPU IRQ status register which triggered the call ++ * ++ * This function is called from the interrupt handler when a GPU irq is to be ++ * handled. ++ */ ++void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c +new file mode 100644 +index 0000000..e80559a +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c +@@ -0,0 +1,182 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if !IS_ENABLED(CONFIG_MALI_NO_MALI) ++void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) ++{ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ KBASE_DEBUG_ASSERT(kbdev->dev != NULL); ++ ++ writel(value, kbdev->reg + offset); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ value, 1); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); ++} ++ ++KBASE_EXPORT_TEST_API(kbase_reg_write); ++ ++u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) ++{ ++ u32 val; ++ ++ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); ++ KBASE_DEBUG_ASSERT(kbdev->dev != NULL); ++ ++ val = readl(kbdev->reg + offset); ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ if (unlikely(kbdev->io_history.enabled)) ++ kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, ++ val, 0); ++#endif /* CONFIG_DEBUG_FS */ ++ dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); ++ ++ return val; ++} ++ ++KBASE_EXPORT_TEST_API(kbase_reg_read); ++ ++bool kbase_is_gpu_removed(struct kbase_device *kbdev) ++{ ++ u32 val; ++ ++ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); ++ ++ return val == 0; ++} ++#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ ++ ++void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) ++{ ++ u32 irq_mask; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (kbdev->cache_clean_in_progress) { ++ /* If this is called while another clean is in progress, we ++ * can't rely on the current one to flush any new changes in ++ * the cache. Instead, trigger another cache clean immediately ++ * after this one finishes. ++ */ ++ kbdev->cache_clean_queued = true; ++ return; ++ } ++ ++ /* Enable interrupt */ ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask | CLEAN_CACHES_COMPLETED); ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAN_INV_CACHES); ++ ++ kbdev->cache_clean_in_progress = true; ++} ++ ++void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_gpu_start_cache_clean_nolock(kbdev); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbdev->cache_clean_queued = false; ++ kbdev->cache_clean_in_progress = false; ++ wake_up(&kbdev->cache_clean_wait); ++} ++ ++void kbase_clean_caches_done(struct kbase_device *kbdev) ++{ ++ u32 irq_mask; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if (kbdev->cache_clean_queued) { ++ kbdev->cache_clean_queued = false; ++ ++ KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAN_INV_CACHES); ++ } else { ++ /* Disable interrupt */ ++ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), ++ irq_mask & ~CLEAN_CACHES_COMPLETED); ++ ++ kbase_gpu_cache_clean_wait_complete(kbdev); ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++static inline bool get_cache_clean_flag(struct kbase_device *kbdev) ++{ ++ bool cache_clean_in_progress; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ cache_clean_in_progress = kbdev->cache_clean_in_progress; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return cache_clean_in_progress; ++} ++ ++void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) ++{ ++ while (get_cache_clean_flag(kbdev)) { ++ wait_event_interruptible(kbdev->cache_clean_wait, ++ !kbdev->cache_clean_in_progress); ++ } ++} ++ ++int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, ++ unsigned int wait_timeout_ms) ++{ ++ long remaining = msecs_to_jiffies(wait_timeout_ms); ++ ++ while (remaining && get_cache_clean_flag(kbdev)) { ++ remaining = wait_event_timeout(kbdev->cache_clean_wait, ++ !kbdev->cache_clean_in_progress, ++ remaining); ++ } ++ ++ return (remaining ? 0 : -ETIMEDOUT); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h +index 9f96db0..d422407 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -43,9 +42,6 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev); + int kbase_device_timeline_init(struct kbase_device *kbdev); + void kbase_device_timeline_term(struct kbase_device *kbdev); + +-int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev); +-void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev); +- + int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); + void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); + +@@ -76,3 +72,17 @@ int kbase_device_early_init(struct kbase_device *kbdev); + * @kbdev: Device pointer + */ + void kbase_device_early_term(struct kbase_device *kbdev); ++ ++/** ++ * kbase_device_late_init - Complete any device-specific initialization. ++ * @kbdev: Device pointer ++ * ++ * Return: 0 on success, or an error code on failure. ++ */ ++int kbase_device_late_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_device_late_term - Complete any device-specific termination. ++ * @kbdev: Device pointer ++ */ ++void kbase_device_late_term(struct kbase_device *kbdev); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild +similarity index 65% +rename from dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig +rename to dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild +index 0cdb474..c3ab811 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,14 +16,12 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + ++mali_kbase-y += gpu/mali_kbase_gpu.o + +-config MALI_KUTF +- tristate "Mali Kernel Unit Test Framework" +- default m +- help +- Enables MALI testing framework. To compile it as a module, +- choose M here - this will generate a single module called kutf. ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += gpu/backend/mali_kbase_gpu_fault_csf.o ++else ++ mali_kbase-y += gpu/backend/mali_kbase_gpu_fault_jm.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c +new file mode 100644 +index 0000000..f9d4c14 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c +@@ -0,0 +1,104 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++const char *kbase_gpu_exception_name(u32 const exception_code) ++{ ++ const char *e; ++ ++ switch (exception_code) { ++ /* CS exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED: ++ e = "CS_RESOURCE_TERMINATED"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT: ++ e = "CS_INHERIT_FAULT"; ++ break; ++ /* CS fatal exceptions */ ++ case CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT: ++ e = "CS_CONFIG_FAULT"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT: ++ e = "FATAL_CS_ENDPOINT_FAULT"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT: ++ e = "FATAL_CS_BUS_FAULT"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION: ++ e = "FATAL_CS_INVALID_INSTRUCTION"; ++ break; ++ case CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW: ++ e = "FATAL_CS_CALL_STACK_OVERFLOW"; ++ break; ++ /* Shader exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC: ++ e = "INSTR_INVALID_PC"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC: ++ e = "INSTR_INVALID_ENC"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT: ++ e = "INSTR_BARRIER_FAULT"; ++ break; ++ /* Misc exceptions */ ++ case CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT: ++ e = "DATA_INVALID_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT: ++ e = "TILE_RANGE_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT: ++ e = "ADDR_RANGE_FAULT"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT: ++ e = "IMPRECISE_FAULT"; ++ break; ++ /* FW exceptions */ ++ case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR: ++ e = "FIRMWARE_INTERNAL_ERROR"; ++ break; ++ case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT: ++ e = "RESOURCE_EVICTION_TIMEOUT"; ++ break; ++ /* GPU Fault */ ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT: ++ e = "GPU_BUS_FAULT"; ++ break; ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT: ++ e = "GPU_SHAREABILITY_FAULT"; ++ break; ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT: ++ e = "SYSTEM_SHAREABILITY_FAULT"; ++ break; ++ case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT: ++ e = "GPU_CACHEABILITY_FAULT"; ++ break; ++ /* Any other exception code is unknown */ ++ default: ++ e = "UNKNOWN"; ++ break; ++ } ++ ++ return e; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c +index 63132dc..37015cc 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + +-#include "../mali_kbase_gpu_fault.h" ++#include + + const char *kbase_gpu_exception_name(u32 const exception_code) + { +@@ -119,8 +118,6 @@ const char *kbase_gpu_exception_name(u32 const exception_code) + e = "TRANSLATION_FAULT"; + break; + case 0xC8: +- e = "PERMISSION_FAULT"; +- break; + case 0xC9: + case 0xCA: + case 0xCB: +@@ -141,8 +138,6 @@ const char *kbase_gpu_exception_name(u32 const exception_code) + e = "TRANSTAB_BUS_FAULT"; + break; + case 0xD8: +- e = "ACCESS_FLAG"; +- break; + case 0xD9: + case 0xDA: + case 0xDB: +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c +index 3128db4..8a84ef5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include ++#include + + const char *kbase_gpu_access_type_name(u32 fault_status) + { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h +index b59b9d1..d1e9f77 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,14 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_GPU_FAULT_H_ + #define _KBASE_GPU_FAULT_H_ + +-/** Returns the name associated with a Mali exception code +- * ++/** ++ * kbase_gpu_exception_name() - ++ * Returns the name associated with a Mali exception code + * @exception_code: exception code + * + * This function is called from the interrupt handler when a GPU fault occurs. +@@ -33,17 +33,6 @@ + */ + const char *kbase_gpu_exception_name(u32 exception_code); + +-/** Returns the name associated with a Mali fatal exception code +- * +- * @fatal_exception_code: fatal exception code +- * +- * This function is called from the interrupt handler when a GPU fatal +- * exception occurs. +- * +- * Return: name associated with the fatal exception code +- */ +-const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code); +- + /** + * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE + * into string. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +index 759f30d..47e7781 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,422 +17,17 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_GPU_REGMAP_H_ + #define _KBASE_GPU_REGMAP_H_ + +-#include "mali_kbase_gpu_coherency.h" +-#include "mali_kbase_gpu_id.h" +-#include "backend/mali_kbase_gpu_regmap_jm.h" +- +-/* Begin Register Offsets */ +-/* GPU control registers */ +- +-#define GPU_CONTROL_BASE 0x0000 +-#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +-#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +-#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +-#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +-#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +-#define MMU_FEATURES 0x014 /* (RO) MMU features */ +-#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +-#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +-#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +-#define GPU_IRQ_MASK 0x028 /* (RW) */ +-#define GPU_IRQ_STATUS 0x02C /* (RO) */ +- +-#define GPU_COMMAND 0x030 /* (WO) */ +-#define GPU_STATUS 0x034 /* (RO) */ +- +-#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ +- +-#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +-#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +-#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ +- +-#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ +- +-#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +-#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core +- * supergroup are l2 coherent +- */ +- +-#define PWR_KEY 0x050 /* (WO) Power manager key register */ +-#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +-#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ +- +-#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +-#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +-#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +-#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ +- +-#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +-#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +-#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +-#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +-#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ +- +-#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +-#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +-#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +-#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ +- +-#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) +- +-#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +-#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ +- +-#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +-#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ +- +-#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +-#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ +- +-#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +-#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ +- +-#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +-#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ +- +-#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +-#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ +- +-#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +-#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ +- +-#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +-#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ +- +-#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +-#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ +- +-#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +-#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ +- +-#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +-#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ +- +-#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +-#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ +- +-#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +-#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ +- +-#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +-#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ +- +-#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +-#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ +- +-#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +-#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ +- +-#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +-#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ +- +-#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +-#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ +- +-#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +-#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ +- +-#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +-#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ +- +-#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +-#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ +- +-#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +-#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ +- +-#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +-#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ +- +-#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +-#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ +- +-#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +-#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +-#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ +- +-/* Job control registers */ +- +-#define JOB_CONTROL_BASE 0x1000 +- +-#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) +- +-#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +-#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +-#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +-#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ +- +-/* MMU control registers */ +- +-#define MEMORY_MANAGEMENT_BASE 0x2000 +-#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) +- +-#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +-#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +-#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +-#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ +- +-#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +-#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +-#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +-#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +-#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +-#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +-#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +-#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +-#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +-#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +-#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +-#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +-#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +-#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +-#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +-#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ +- +-/* MMU address space control registers */ +- +-#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) +- +-#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +-#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +-#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +-#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +-#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +-#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +-#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +-#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +-#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +-#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +-#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ +- +-/* (RW) Translation table configuration for address space n, low word */ +-#define AS_TRANSCFG_LO 0x30 +-/* (RW) Translation table configuration for address space n, high word */ +-#define AS_TRANSCFG_HI 0x34 +-/* (RO) Secondary fault address for address space n, low word */ +-#define AS_FAULTEXTRA_LO 0x38 +-/* (RO) Secondary fault address for address space n, high word */ +-#define AS_FAULTEXTRA_HI 0x3C +- +-/* End Register Offsets */ +- +-/* IRQ flags */ +-#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +-#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +-#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +-#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +-#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +- +-#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +-#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +- +-/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. +- */ +-#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ +- | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) ++#include + ++/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ + #ifdef CONFIG_MALI_DEBUG ++#undef GPU_IRQ_REG_ALL + #define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) +-#else /* CONFIG_MALI_DEBUG */ +-#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) + #endif /* CONFIG_MALI_DEBUG */ + +-/* +- * MMU_IRQ_RAWSTAT register values. Values are valid also for +- * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. +- */ +- +-#define MMU_PAGE_FAULT_FLAGS 16 +- +-/* Macros returning a bitmask to retrieve page fault or bus error flags from +- * MMU registers */ +-#define MMU_PAGE_FAULT(n) (1UL << (n)) +-#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) +- +-/* +- * Begin LPAE MMU TRANSTAB register values +- */ +-#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 +-#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) +-#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) +-#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) +-#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) +-#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) +- +-#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 +- +-/* +- * Begin AARCH64 MMU TRANSTAB register values +- */ +-#define MMU_HW_OUTA_BITS 40 +-#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) +- +-/* +- * Begin MMU STATUS register values +- */ +-#define AS_STATUS_AS_ACTIVE 0x01 +- +-#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +-#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +-#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +-#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +-#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) +- +-#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +-#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +-#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ +- (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +- +-#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +-#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +-#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ +- (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +- +-#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +-#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +-#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +-#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) +- +-#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +-#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +-#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ +- (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) +- +-/* +- * Begin MMU TRANSCFG register values +- */ +-#define AS_TRANSCFG_ADRMODE_LEGACY 0 +-#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +-#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +-#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +-#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 +- +-#define AS_TRANSCFG_ADRMODE_MASK 0xF +- +-/* +- * Begin TRANSCFG register values +- */ +-#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +-#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +-#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) +- +-#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +-#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +-#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +-#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) +- +-/* +- * Begin Command Values +- */ +- +-/* AS_COMMAND register commands */ +-#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +-#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +-#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +-#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +-#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs +- (deprecated - only for use with T60x) */ +-#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +-#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then +- flush all L2 caches then issue a flush region command to all MMUs */ +- +-/* GPU_STATUS values */ +-#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +-#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ +- +-/* PRFCNT_CONFIG register values */ +-#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +-#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +-#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ +- +-/* The performance counters are disabled. */ +-#define PRFCNT_CONFIG_MODE_OFF 0 +-/* The performance counters are enabled, but are only written out when a +- * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. +- */ +-#define PRFCNT_CONFIG_MODE_MANUAL 1 +-/* The performance counters are enabled, and are written out each time a tile +- * finishes rendering. +- */ +-#define PRFCNT_CONFIG_MODE_TILE 2 +- +-/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ +-/* Use GPU implementation-defined caching policy. */ +-#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +-/* The attribute set to force all resources to be cached. */ +-#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +-/* Inner write-alloc cache setup, no outer caching */ +-#define AS_MEMATTR_WRITE_ALLOC 0x8Dull +- +-/* Use GPU implementation-defined caching policy. */ +-#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +-/* The attribute set to force all resources to be cached. */ +-#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +-/* Inner write-alloc cache setup, no outer caching */ +-#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +-/* Set to implementation defined, outer caching */ +-#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +-/* Set to write back memory, outer caching */ +-#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +-/* There is no LPAE support for non-cacheable, since the memory type is always +- * write-back. +- * Marking this setting as reserved for LPAE +- */ +-#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED +- +-/* L2_MMU_CONFIG register */ +-#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +-#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) +- +-/* End L2_MMU_CONFIG register */ +- +-/* THREAD_* registers */ +- +-/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +-#define IMPLEMENTATION_UNSPECIFIED 0 +-#define IMPLEMENTATION_SILICON 1 +-#define IMPLEMENTATION_FPGA 2 +-#define IMPLEMENTATION_MODEL 3 +- +-/* Default values when registers are not supported by the implemented hardware */ +-#define THREAD_MT_DEFAULT 256 +-#define THREAD_MWS_DEFAULT 256 +-#define THREAD_MBS_DEFAULT 256 +-#define THREAD_MR_DEFAULT 1024 +-#define THREAD_MTQ_DEFAULT 4 +-#define THREAD_MTGS_DEFAULT 10 +- +-/* End THREAD_* registers */ +- +-/* SHADER_CONFIG register */ +-#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +-#define SC_TLS_HASH_ENABLE (1ul << 17) +-#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +-#define SC_VAR_ALGORITHM (1ul << 29) +-/* End SHADER_CONFIG register */ +- +-/* TILER_CONFIG register */ +-#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +-/* End TILER_CONFIG register */ +- +-/* L2_CONFIG register */ +-#define L2_CONFIG_SIZE_SHIFT 16 +-#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +-#define L2_CONFIG_HASH_SHIFT 24 +-#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +-/* End L2_CONFIG register */ +- +-/* IDVS_GROUP register */ +-#define IDVS_GROUP_SIZE_SHIFT (16) +-#define IDVS_GROUP_MAX_SIZE (0x3F) +- + #endif /* _KBASE_GPU_REGMAP_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild +index 3d9cf80..96977e9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,14 +16,20 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + mali_kbase-y += \ +- ipa/mali_kbase_ipa_simple.o \ +- ipa/mali_kbase_ipa.o \ +- ipa/mali_kbase_ipa_vinstr_g7x.o \ +- ipa/mali_kbase_ipa_vinstr_common.o ++ ipa/mali_kbase_ipa_simple.o \ ++ ipa/mali_kbase_ipa.o ++ ++mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o + +-mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o +\ No newline at end of file ++ifeq ($(MALI_USE_CSF),1) ++ mali_kbase-y += \ ++ ipa/backend/mali_kbase_ipa_counter_csf.o \ ++ ipa/backend/mali_kbase_ipa_counter_common_csf.o ++else ++ mali_kbase-y += \ ++ ipa/backend/mali_kbase_ipa_counter_jm.o \ ++ ipa/backend/mali_kbase_ipa_counter_common_jm.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c +new file mode 100644 +index 0000000..81dc56b +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c +@@ -0,0 +1,457 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_ipa_counter_common_csf.h" ++#include "ipa/mali_kbase_ipa_debugfs.h" ++ ++#define DEFAULT_SCALING_FACTOR 5 ++ ++/* If the value of GPU_ACTIVE is below this, use the simple model ++ * instead, to avoid extrapolating small amounts of counter data across ++ * large sample periods. ++ */ ++#define DEFAULT_MIN_SAMPLE_CYCLES 10000 ++ ++/* Typical value for the sampling interval is expected to be less than 100ms, ++ * So 5 seconds is a reasonable upper limit for the time gap between the ++ * 2 samples. ++ */ ++#define MAX_SAMPLE_INTERVAL_MS ((s64)5000) ++ ++/* Maximum increment that is expected for a counter value during a sampling ++ * interval is derived assuming ++ * - max sampling interval of 1 second. ++ * - max GPU frequency of 2 GHz. ++ * - max number of cores as 32. ++ * - max increment of 4 in per core counter value at every clock cycle. ++ * ++ * So max increment = 2 * 10^9 * 32 * 4 = ~2^38. ++ * If a counter increases by an amount greater than this value, then an error ++ * will be returned and the simple power model will be used. ++ */ ++#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1) ++ ++static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) ++{ ++ s64 rtn; ++ ++ if (a > 0 && (S64_MAX - a) < b) ++ rtn = S64_MAX; ++ else if (a < 0 && (S64_MIN - a) > b) ++ rtn = S64_MIN; ++ else ++ rtn = a + b; ++ ++ return rtn; ++} ++ ++static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value) ++{ ++ /* Range: 0 < counter_value < 2^38 */ ++ ++ /* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */ ++ return counter_value * (s64)coeff; ++} ++ ++/** ++ * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control ++ * @model_data: Pointer to counter model data ++ * ++ * Register IPA counter model as a client of kbase_ipa_control, which ++ * provides an interface to retreive the accumulated value of hardware ++ * counters to calculate energy consumption. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++static int ++kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) ++{ ++ struct kbase_device *kbdev = model_data->kbdev; ++ struct kbase_ipa_control_perf_counter *perf_counters; ++ u32 cnt_idx = 0; ++ int err; ++ size_t i; ++ ++ /* Value for GPU_ACTIVE counter also needs to be queried. It is required ++ * for the normalization of top-level and shader core counters. ++ */ ++ model_data->num_counters = 1 + model_data->num_top_level_cntrs + ++ model_data->num_shader_cores_cntrs; ++ ++ perf_counters = kcalloc(model_data->num_counters, ++ sizeof(*perf_counters), GFP_KERNEL); ++ ++ if (!perf_counters) { ++ dev_err(kbdev->dev, ++ "Failed to allocate memory for perf_counters array"); ++ return -ENOMEM; ++ } ++ ++ /* Fill in the description for GPU_ACTIVE counter which is always ++ * needed, as mentioned above, regardless of the energy model used ++ * by the CSF GPUs. ++ */ ++ perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW; ++ perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX; ++ perf_counters[cnt_idx].gpu_norm = false; ++ perf_counters[cnt_idx].scaling_factor = 1; ++ cnt_idx++; ++ ++ for (i = 0; i < model_data->num_top_level_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->top_level_cntrs_def[i]; ++ ++ perf_counters[cnt_idx].type = counter->counter_block_type; ++ perf_counters[cnt_idx].idx = counter->counter_block_offset; ++ perf_counters[cnt_idx].gpu_norm = false; ++ perf_counters[cnt_idx].scaling_factor = 1; ++ cnt_idx++; ++ } ++ ++ for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->shader_cores_cntrs_def[i]; ++ ++ perf_counters[cnt_idx].type = counter->counter_block_type; ++ perf_counters[cnt_idx].idx = counter->counter_block_offset; ++ perf_counters[cnt_idx].gpu_norm = false; ++ perf_counters[cnt_idx].scaling_factor = 1; ++ cnt_idx++; ++ } ++ ++ err = kbase_ipa_control_register(kbdev, perf_counters, ++ model_data->num_counters, ++ &model_data->ipa_control_client); ++ if (err) ++ dev_err(kbdev->dev, ++ "Failed to register IPA with kbase_ipa_control"); ++ ++ kfree(perf_counters); ++ return err; ++} ++ ++/** ++ * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control. ++ * @model_data: Pointer to counter model data ++ */ ++static void ++kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data) ++{ ++ if (model_data->ipa_control_client) { ++ kbase_ipa_control_unregister(model_data->kbdev, ++ model_data->ipa_control_client); ++ model_data->ipa_control_client = NULL; ++ } ++} ++ ++static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data, ++ const struct kbase_ipa_counter *const cnt_defs, ++ size_t num_counters, s32 *counter_coeffs, ++ u64 *counter_values, u32 active_cycles, u32 *coeffp) ++{ ++ u64 coeff = 0, coeff_mul = 0; ++ s64 total_energy = 0; ++ size_t i; ++ ++ /* Range for the 'counter_value' is [0, 2^38) ++ * Range for the 'coeff' is [-2^21, 2^21] ++ * So range for the 'group_energy' is [-2^59, 2^59) and range for the ++ * 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e. ++ * [-2^63, 2^63). ++ */ ++ for (i = 0; i < num_counters; i++) { ++ s32 coeff = counter_coeffs[i]; ++ u64 counter_value = counter_values[i]; ++ s64 group_energy = kbase_ipa_group_energy(coeff, counter_value); ++ ++ if (counter_value > MAX_COUNTER_INCREMENT) { ++ dev_warn(model_data->kbdev->dev, ++ "Increment in counter %s more than expected", ++ cnt_defs[i].name); ++ return -ERANGE; ++ } ++ ++ total_energy = ++ kbase_ipa_add_saturate(total_energy, group_energy); ++ } ++ ++ /* Range: 0 <= coeff < 2^63 */ ++ if (total_energy >= 0) ++ coeff = total_energy; ++ else ++ dev_dbg(model_data->kbdev->dev, ++ "Energy value came negative as %lld", total_energy); ++ ++ /* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this ++ * can be constrained further: the value of counters that are being ++ * used for dynamic power estimation can only increment by about 128 ++ * maximum per clock cycle. This is because max number of shader ++ * cores is expected to be 32 (max number of L2 slices is expected to ++ * be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT & ++ * SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle. ++ * Each "beat" is defined as 128 bits and each shader core can ++ * (currently) do 512 bits read and 512 bits write to/from the L2 ++ * cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment ++ * [0, 4] per shader core per cycle. ++ * We can thus write the range of 'coeff' in terms of active_cycles: ++ * ++ * coeff = SUM(coeffN * counterN * num_cores_for_counterN) ++ * coeff <= SUM(coeffN * counterN) * max_cores ++ * coeff <= num_IPA_groups * max_coeff * max_counter * max_cores ++ * (substitute max_counter = 2^2 * active_cycles) ++ * coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores ++ * coeff <= 2^4 * 2^21 * 2^2 * active_cycles * 2^5 ++ * coeff <= 2^32 * active_cycles ++ * ++ * So after the division: 0 <= coeff <= 2^32 ++ */ ++ coeff = div_u64(coeff, active_cycles); ++ ++ /* Not all models were derived at the same reference voltage. Voltage ++ * scaling is done by multiplying by V^2, so we need to *divide* by ++ * Vref^2 here. ++ * Range: 0 <= coeff <= 2^35 ++ */ ++ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); ++ /* Range: 0 <= coeff <= 2^38 */ ++ coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); ++ ++ /* Scale by user-specified integer factor. ++ * Range: 0 <= coeff_mul < 2^43 ++ */ ++ coeff_mul = coeff * model_data->scaling_factor; ++ ++ /* The power models have results with units ++ * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this ++ * becomes fW/(Hz V^2), which are the units of coeff_mul. However, ++ * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide ++ * by 1000. ++ * Range: 0 <= coeff_mul < 2^33 ++ */ ++ coeff_mul = div_u64(coeff_mul, 1000u); ++ ++ /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ ++ *coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16); ++ ++ return 0; ++} ++ ++int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) ++{ ++ struct kbase_ipa_counter_model_data *model_data = ++ (struct kbase_ipa_counter_model_data *)model->model_data; ++ struct kbase_device *kbdev = model->kbdev; ++ s32 *counter_coeffs_p = model_data->counter_coeffs; ++ u64 *cnt_values_p = model_data->counter_values; ++ const u64 num_counters = model_data->num_counters; ++ u32 active_cycles; ++ ktime_t now, diff; ++ s64 diff_ms; ++ int ret; ++ ++ lockdep_assert_held(&kbdev->ipa.lock); ++ ++ /* The last argument is supposed to be a pointer to the location that ++ * will store the time for which GPU has been in protected mode since ++ * last query. This can be passed as NULL as counter model itself will ++ * not be used when GPU enters protected mode, as IPA is supposed to ++ * switch to the simple power model. ++ */ ++ ret = kbase_ipa_control_query(kbdev, ++ model_data->ipa_control_client, ++ cnt_values_p, num_counters, NULL); ++ if (WARN_ON(ret)) ++ return ret; ++ ++ now = ktime_get(); ++ diff = ktime_sub(now, kbdev->ipa.last_sample_time); ++ diff_ms = ktime_to_ms(diff); ++ ++ kbdev->ipa.last_sample_time = now; ++ ++ /* The counter values cannot be relied upon if the sampling interval was ++ * too long. Typically this will happen when the polling is started ++ * after the temperature has risen above a certain trip point. After ++ * that regular calls every 25-100 ms interval are expected. ++ */ ++ if (diff_ms > MAX_SAMPLE_INTERVAL_MS) { ++ dev_dbg(kbdev->dev, ++ "Last sample was taken %lld milli seconds ago", ++ diff_ms); ++ return -EOVERFLOW; ++ } ++ ++ /* Range: 0 (GPU not used at all), to the max sampling interval, say ++ * 1 seconds, * max GPU frequency (GPU 100% utilized). ++ * 0 <= active_cycles <= 1 * ~2GHz ++ * 0 <= active_cycles < 2^31 ++ */ ++ if (*cnt_values_p > U32_MAX) { ++ dev_warn(kbdev->dev, ++ "Increment in GPU_ACTIVE counter more than expected"); ++ return -ERANGE; ++ } ++ ++ active_cycles = (u32)*cnt_values_p; ++ ++ /* If the value of the active_cycles is less than the threshold, then ++ * return an error so that IPA framework can approximate using the ++ * cached simple model results instead. This may be more accurate ++ * than extrapolating using a very small counter dump. ++ */ ++ if (active_cycles < (u32)max(model_data->min_sample_cycles, 0)) ++ return -ENODATA; ++ ++ /* Range: 1 <= active_cycles < 2^31 */ ++ active_cycles = max(1u, active_cycles); ++ ++ cnt_values_p++; ++ ret = calculate_coeff(model_data, model_data->top_level_cntrs_def, ++ model_data->num_top_level_cntrs, ++ counter_coeffs_p, cnt_values_p, active_cycles, ++ &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ if (ret) ++ return ret; ++ ++ cnt_values_p += model_data->num_top_level_cntrs; ++ counter_coeffs_p += model_data->num_top_level_cntrs; ++ ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def, ++ model_data->num_shader_cores_cntrs, ++ counter_coeffs_p, cnt_values_p, active_cycles, ++ &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); ++ ++ return ret; ++} ++ ++void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model) ++{ ++ struct kbase_ipa_counter_model_data *model_data = ++ (struct kbase_ipa_counter_model_data *)model->model_data; ++ u64 *cnt_values_p = model_data->counter_values; ++ const u64 num_counters = model_data->num_counters; ++ int ret; ++ ++ lockdep_assert_held(&model->kbdev->ipa.lock); ++ ++ ret = kbase_ipa_control_query(model->kbdev, ++ model_data->ipa_control_client, ++ cnt_values_p, num_counters, NULL); ++ WARN_ON(ret); ++} ++ ++int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, ++ const struct kbase_ipa_counter *top_level_cntrs_def, ++ size_t num_top_level_cntrs, ++ const struct kbase_ipa_counter *shader_cores_cntrs_def, ++ size_t num_shader_cores_cntrs, ++ s32 reference_voltage) ++{ ++ struct kbase_ipa_counter_model_data *model_data; ++ s32 *counter_coeffs_p; ++ int err = 0; ++ size_t i; ++ ++ if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def || ++ !num_top_level_cntrs || !num_shader_cores_cntrs) ++ return -EINVAL; ++ ++ model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); ++ if (!model_data) ++ return -ENOMEM; ++ ++ model_data->kbdev = model->kbdev; ++ ++ model_data->top_level_cntrs_def = top_level_cntrs_def; ++ model_data->num_top_level_cntrs = num_top_level_cntrs; ++ ++ model_data->shader_cores_cntrs_def = shader_cores_cntrs_def; ++ model_data->num_shader_cores_cntrs = num_shader_cores_cntrs; ++ ++ model->model_data = (void *)model_data; ++ ++ counter_coeffs_p = model_data->counter_coeffs; ++ ++ for (i = 0; i < model_data->num_top_level_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->top_level_cntrs_def[i]; ++ ++ *counter_coeffs_p = counter->coeff_default_value; ++ ++ err = kbase_ipa_model_add_param_s32( ++ model, counter->name, counter_coeffs_p, 1, false); ++ if (err) ++ goto exit; ++ ++ counter_coeffs_p++; ++ } ++ ++ for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { ++ const struct kbase_ipa_counter *counter = ++ &model_data->shader_cores_cntrs_def[i]; ++ ++ *counter_coeffs_p = counter->coeff_default_value; ++ ++ err = kbase_ipa_model_add_param_s32( ++ model, counter->name, counter_coeffs_p, 1, false); ++ if (err) ++ goto exit; ++ ++ counter_coeffs_p++; ++ } ++ ++ model_data->scaling_factor = DEFAULT_SCALING_FACTOR; ++ err = kbase_ipa_model_add_param_s32( ++ model, "scale", &model_data->scaling_factor, 1, false); ++ if (err) ++ goto exit; ++ ++ model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; ++ err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", ++ &model_data->min_sample_cycles, 1, ++ false); ++ if (err) ++ goto exit; ++ ++ model_data->reference_voltage = reference_voltage; ++ err = kbase_ipa_model_add_param_s32(model, "reference_voltage", ++ &model_data->reference_voltage, 1, ++ false); ++ if (err) ++ goto exit; ++ ++ err = kbase_ipa_attach_ipa_control(model_data); ++ ++exit: ++ if (err) { ++ kbase_ipa_model_param_free_all(model); ++ kfree(model_data); ++ } ++ return err; ++} ++ ++void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model) ++{ ++ struct kbase_ipa_counter_model_data *model_data = ++ (struct kbase_ipa_counter_model_data *)model->model_data; ++ ++ kbase_ipa_detach_ipa_control(model_data); ++ kfree(model_data); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h +new file mode 100644 +index 0000000..37d2efc +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h +@@ -0,0 +1,159 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_IPA_COUNTER_COMMON_CSF_H_ ++#define _KBASE_IPA_COUNTER_COMMON_CSF_H_ ++ ++#include "mali_kbase.h" ++#include "csf/ipa_control/mali_kbase_csf_ipa_control.h" ++ ++/* Maximum number of HW counters used by the IPA counter model. */ ++#define KBASE_IPA_MAX_COUNTER_DEF_NUM 24 ++ ++struct kbase_ipa_counter_model_data; ++ ++/** ++ * struct kbase_ipa_counter_model_data - IPA counter model context per device ++ * @kbdev: Pointer to kbase device ++ * @ipa_control_client: Handle returned on registering IPA counter model as a ++ * client of kbase_ipa_control. ++ * @top_level_cntrs_def: Array of description of HW counters used by the IPA ++ * counter model for top-level. ++ * @num_top_level_cntrs: Number of elements in @top_level_cntrs_def array. ++ * @shader_cores_cntrs_def: Array of description of HW counters used by the IPA ++ * counter model for shader cores. ++ * @num_shader_cores_cntrs: Number of elements in @shader_cores_cntrs_def array. ++ * @counter_coeffs: Buffer to store coefficient value used for HW counters ++ * @counter_values: Buffer to store the accumulated value of HW counters ++ * retreived from kbase_ipa_control. ++ * @num_counters: Number of counters queried from kbase_ipa_control. ++ * @reference_voltage: voltage, in mV, of the operating point used when ++ * deriving the power model coefficients. Range approx ++ * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 ++ * @scaling_factor: User-specified power scaling factor. This is an ++ * integer, which is multiplied by the power coefficient ++ * just before OPP scaling. ++ * Range approx 0-32: 0 < scaling_factor < 2^5 ++ * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of ++ * cycles the GPU was working) is less than ++ * min_sample_cycles, the counter model will return an ++ * error, causing the IPA framework to approximate using ++ * the cached simple model results instead. This may be ++ * more accurate than extrapolating using a very small ++ * counter dump. ++ */ ++struct kbase_ipa_counter_model_data { ++ struct kbase_device *kbdev; ++ void *ipa_control_client; ++ const struct kbase_ipa_counter *top_level_cntrs_def; ++ size_t num_top_level_cntrs; ++ const struct kbase_ipa_counter *shader_cores_cntrs_def; ++ size_t num_shader_cores_cntrs; ++ s32 counter_coeffs[KBASE_IPA_MAX_COUNTER_DEF_NUM]; ++ u64 counter_values[KBASE_IPA_MAX_COUNTER_DEF_NUM]; ++ u64 num_counters; ++ s32 reference_voltage; ++ s32 scaling_factor; ++ s32 min_sample_cycles; ++}; ++ ++/** ++ * struct kbase_ipa_counter - represents a single HW counter used by IPA model ++ * @name: Name of the HW counter used by IPA counter model ++ * for energy estimation. ++ * @coeff_default_value: Default value of coefficient for the counter. ++ * Coefficients are interpreted as fractions where the ++ * denominator is 1000000. ++ * @counter_block_offset: Index to the counter within the counter block of ++ * type @counter_block_type. ++ * @counter_block_type: Type of the counter block. ++ */ ++struct kbase_ipa_counter { ++ const char *name; ++ s32 coeff_default_value; ++ u32 counter_block_offset; ++ enum kbase_ipa_core_type counter_block_type; ++}; ++ ++/** ++ * kbase_ipa_counter_dynamic_coeff() - calculate dynamic power based on HW counters ++ * @model: pointer to instantiated model ++ * @coeffp: pointer to location where calculated power, in ++ * pW/(Hz V^2), is stored for top level and shader cores. ++ * ++ * This is a GPU-agnostic implementation of the get_dynamic_coeff() ++ * function of an IPA model. It relies on the model being populated ++ * with GPU-specific attributes at initialization time. ++ * ++ * Return: 0 on success, or an error code. ++ */ ++int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); ++ ++/** ++ * kbase_ipa_counter_reset_data() - Reset the counters data used for dynamic ++ * power estimation ++ * @model: pointer to instantiated model ++ * ++ * Retrieve the accumulated value of HW counters from the kbase_ipa_control ++ * component, without doing any processing, which is effectively a reset as the ++ * next call to kbase_ipa_counter_dynamic_coeff() will see the increment in ++ * counter values from this point onwards. ++ */ ++void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model); ++ ++/** ++ * kbase_ipa_counter_common_model_init() - initialize ipa power model ++ * @model: Pointer to the ipa power model to initialize ++ * @top_level_cntrs_def: Array corresponding to the HW counters used in the ++ * top level counter model, contains the counter index, ++ * default value of the coefficient. ++ * @num_top_level_cntrs: Number of elements in the array @top_level_cntrs_def ++ * @shader_cores_cntrs_def: Array corresponding to the HW counters used in the ++ * shader cores counter model, contains the counter index, ++ * default value of the coefficient. ++ * @num_shader_cores_cntrs: Number of elements in the array ++ * @shader_cores_cntrs_def. ++ * @reference_voltage: voltage, in mV, of the operating point used when ++ * deriving the power model coefficients. ++ * ++ * This function performs initialization steps common for ipa counter based ++ * model of all CSF GPUs. The set of counters and their respective weights ++ * could be different for each GPU. The tuple of counter index and weight ++ * is passed via @top_level_cntrs_def and @shader_cores_cntrs_def array. ++ * ++ * Return: 0 on success, error code otherwise ++ */ ++int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, ++ const struct kbase_ipa_counter *top_level_cntrs_def, ++ size_t num_top_level_cntrs, ++ const struct kbase_ipa_counter *shader_cores_cntrs_def, ++ size_t num_shader_cores_cntrs, ++ s32 reference_voltage); ++/** ++ * kbase_ipa_counter_common_model_term() - terminate ipa power model ++ * @model: ipa power model to terminate ++ * ++ * This function performs all necessary steps to terminate ipa power model ++ * including clean up of resources allocated to hold model data. ++ */ ++void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model); ++ ++#endif /* _KBASE_IPA_COUNTER_COMMON_CSF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c +similarity index 95% +rename from dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c +rename to dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c +index 9fae8f1..4737b0e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#include "mali_kbase_ipa_vinstr_common.h" +-#include "mali_kbase_ipa_debugfs.h" ++#include "mali_kbase_ipa_counter_common_jm.h" ++#include "ipa/mali_kbase_ipa_debugfs.h" + + #define DEFAULT_SCALING_FACTOR 5 + +@@ -145,6 +144,9 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) + + kbase_hwcnt_enable_map_enable_all(&enable_map); + ++ /* Disable cycle counter only. */ ++ enable_map.clk_enable_map = 0; ++ + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &enable_map, &model_data->hvirt_cli); + kbase_hwcnt_enable_map_free(&enable_map); +@@ -270,6 +272,12 @@ err0: + return err; + } + ++void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model) ++{ ++ /* Currently not implemented */ ++ WARN_ON_ONCE(1); ++} ++ + int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size, +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h +similarity index 85% +rename from dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h +rename to dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h +index 46e3cd4..3486a9b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_IPA_VINSTR_COMMON_H_ +-#define _KBASE_IPA_VINSTR_COMMON_H_ ++#ifndef _KBASE_IPA_COUNTER_COMMON_JM_H_ ++#define _KBASE_IPA_COUNTER_COMMON_JM_H_ + + #include "mali_kbase.h" + #include "mali_kbase_hwcnt_virtualizer.h" +@@ -42,11 +41,13 @@ + + struct kbase_ipa_model_vinstr_data; + +-typedef u32 (*kbase_ipa_get_active_cycles_callback)(struct kbase_ipa_model_vinstr_data *); ++typedef u32 ++kbase_ipa_get_active_cycles_callback(struct kbase_ipa_model_vinstr_data *); + + /** + * struct kbase_ipa_model_vinstr_data - IPA context per device + * @kbdev: pointer to kbase device ++ * @group_values: values of coefficients for IPA groups + * @groups_def: Array of IPA groups. + * @groups_def_num: Number of elements in the array of IPA groups. + * @get_active_cycles: Callback to return number of active cycles during +@@ -73,7 +74,7 @@ struct kbase_ipa_model_vinstr_data { + s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM]; + const struct kbase_ipa_group *groups_def; + size_t groups_def_num; +- kbase_ipa_get_active_cycles_callback get_active_cycles; ++ kbase_ipa_get_active_cycles_callback *get_active_cycles; + struct kbase_hwcnt_virtualizer_client *hvirt_cli; + struct kbase_hwcnt_dump_buffer dump_buf; + s32 reference_voltage; +@@ -102,7 +103,7 @@ struct kbase_ipa_group { + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 +- * @counter offset in bytes of the counter used to calculate energy ++ * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter' +@@ -149,7 +150,7 @@ s64 kbase_ipa_single_counter( + + /** + * attach_vinstr() - attach a vinstr_buffer to an IPA model. +- * @model_data pointer to model data ++ * @model_data: pointer to model data + * + * Attach a vinstr_buffer to an IPA model. The vinstr_buffer + * allows access to the hardware counters used to calculate +@@ -161,7 +162,7 @@ int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + + /** + * detach_vinstr() - detach a vinstr_buffer from an IPA model. +- * @model_data pointer to model data ++ * @model_data: pointer to model data + * + * Detach a vinstr_buffer from an IPA model. + */ +@@ -181,6 +182,19 @@ void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + */ + int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); + ++/** ++ * kbase_ipa_vinstr_reset_data() - Reset the counters data used for dynamic ++ * power estimation ++ * @model: pointer to instantiated model ++ * ++ * Currently it is not implemented for JM GPUs. ++ * When implemented it is expected to retrieve the accumulated value of HW ++ * counters from the Vinstr component, without doing any processing, which is ++ * effectively a reset as the next call to kbase_ipa_counter_dynamic_coeff() ++ * will see the increment in counter values from this point onwards. ++ */ ++void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model); ++ + /** + * kbase_ipa_vinstr_common_model_init() - initialize ipa power model + * @model: ipa power model to initialize +@@ -202,7 +216,7 @@ int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); + int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size, +- kbase_ipa_get_active_cycles_callback get_active_cycles, ++ kbase_ipa_get_active_cycles_callback *get_active_cycles, + s32 reference_voltage); + + /** +@@ -214,4 +228,4 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + */ + void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); + +-#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */ ++#endif /* _KBASE_IPA_COUNTER_COMMON_JM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c +new file mode 100644 +index 0000000..1852c3c +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c +@@ -0,0 +1,171 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_ipa_counter_common_csf.h" ++#include "mali_kbase.h" ++ ++/* MEMSYS counter block offsets */ ++#define L2_RD_MSG_IN (16) ++#define L2_WR_MSG_IN (18) ++#define L2_READ_LOOKUP (26) ++#define L2_EXT_WRITE_NOSNP_FULL (43) ++ ++/* SC counter block offsets */ ++#define FRAG_QUADS_EZS_UPDATE (13) ++#define FULL_QUAD_WARPS (21) ++#define EXEC_INSTR_FMA (27) ++#define EXEC_INSTR_CVT (28) ++#define TEX_FILT_NUM_OPS (39) ++#define LS_MEM_READ_SHORT (45) ++#define LS_MEM_WRITE_SHORT (47) ++#define VARY_SLOT_16 (51) ++ ++/* Tiler counter block offsets */ ++#define IDVS_POS_SHAD_STALL (23) ++#define PREFETCH_STALL (25) ++#define VFETCH_POS_READ_WAIT (29) ++#define VFETCH_VERTEX_WAIT (30) ++#define IDVS_VAR_SHAD_STALL (38) ++ ++#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \ ++ { \ ++ .name = cnt_name, \ ++ .coeff_default_value = coeff, \ ++ .counter_block_offset = cnt_idx, \ ++ .counter_block_type = block_type, \ ++ } ++ ++#define CSHW_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_CSHW) ++ ++#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS) ++ ++#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_SHADER) ++ ++#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ ++ COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_TILER) ++ ++/* Tables of description of HW counters used by IPA counter model. ++ * ++ * These tables provide a description of each performance counter ++ * used by the top level counter model for energy estimation. ++ */ ++static const struct kbase_ipa_counter ipa_top_level_cntrs_def_todx[] = { ++ MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN), ++ MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL), ++ ++ TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL), ++ TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL), ++ TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL), ++ TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT), ++}; ++ ++ ++/* These tables provide a description of each performance counter ++ * used by the shader cores counter model for energy estimation. ++ */ ++static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = { ++ SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA), ++ SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS), ++ SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT), ++ SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE), ++ SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT), ++ SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16), ++}; ++ ++ ++#define IPA_POWER_MODEL_OPS(gpu, init_token) \ ++ const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ ++ .name = "mali-" #gpu "-power-model", \ ++ .init = kbase_ ## init_token ## _power_model_init, \ ++ .term = kbase_ipa_counter_common_model_term, \ ++ .get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \ ++ .reset_counter_data = kbase_ipa_counter_reset_data, \ ++ }; \ ++ KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) ++ ++#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ ++ static int kbase_ ## gpu ## _power_model_init(\ ++ struct kbase_ipa_model *model) \ ++ { \ ++ BUILD_BUG_ON((1 + \ ++ ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu) +\ ++ ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu)) > \ ++ KBASE_IPA_MAX_COUNTER_DEF_NUM); \ ++ return kbase_ipa_counter_common_model_init(model, \ ++ ipa_top_level_cntrs_def_ ## gpu, \ ++ ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu), \ ++ ipa_shader_core_cntrs_def_ ## gpu, \ ++ ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu), \ ++ (reference_voltage)); \ ++ } \ ++ IPA_POWER_MODEL_OPS(gpu, gpu) ++ ++ ++#define ALIAS_POWER_MODEL(gpu, as_gpu) \ ++ IPA_POWER_MODEL_OPS(gpu, as_gpu) ++ ++/* Reference voltage value is 750 mV. ++ */ ++STANDARD_POWER_MODEL(todx, 750); ++ ++ ++/* Assuming LODX is an alias of TODX for IPA */ ++ALIAS_POWER_MODEL(lodx, todx); ++ ++static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { ++ &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, ++}; ++ ++const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( ++ struct kbase_device *kbdev, const char *name) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { ++ const struct kbase_ipa_model_ops *ops = ++ ipa_counter_model_ops[i]; ++ ++ if (!strcmp(ops->name, name)) ++ return ops; ++ } ++ ++ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++ ++ return NULL; ++} ++ ++const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) ++{ ++ const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> ++ GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ ++ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { ++ case GPU_ID2_PRODUCT_TODX: ++ return "mali-todx-power-model"; ++ case GPU_ID2_PRODUCT_LODX: ++ return "mali-lodx-power-model"; ++ default: ++ return NULL; ++ } ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c +similarity index 83% +rename from dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c +rename to dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c +index 270b75e..2f4c9d9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ ++ + #include + +-#include "mali_kbase_ipa_vinstr_common.h" ++#include "mali_kbase_ipa_counter_common_jm.h" + #include "mali_kbase.h" + + +@@ -97,7 +97,6 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da + const u32 sc_base = MEMSYS_BASE + + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * + KBASE_IPA_NR_BYTES_PER_BLOCK); +- + return sc_base + counter_block_offset; + } + +@@ -178,7 +177,7 @@ static u32 kbase_g7x_get_active_cycles( + return kbase_ipa_single_counter(model_data, 1, counter); + } + +-/** Table of IPA group definitions. ++/* Table of IPA group definitions. + * + * For each IPA group, this table defines a function to access the given performance block counter (or counters, + * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. +@@ -415,6 +414,39 @@ static const struct kbase_ipa_group ipa_groups_def_tbex[] = { + }, + }; + ++static const struct kbase_ipa_group ipa_groups_def_tbax[] = { ++ { ++ .name = "l2_access", ++ .default_value = 599800, ++ .op = kbase_g7x_sum_all_memsys_blocks, ++ .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, ++ }, ++ { ++ .name = "exec_instr_msg", ++ .default_value = 1830200, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_MSG, ++ }, ++ { ++ .name = "exec_instr_fma", ++ .default_value = 407300, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_EXEC_INSTR_FMA, ++ }, ++ { ++ .name = "tex_filt_num_operations", ++ .default_value = 224500, ++ .op = kbase_g7x_sum_all_shader_cores, ++ .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, ++ }, ++ { ++ .name = "gpu_active", ++ .default_value = 153800, ++ .op = kbase_g7x_jm_single_counter, ++ .counter_block_offset = JM_GPU_ACTIVE, ++ }, ++}; ++ + + #define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ +@@ -422,6 +454,7 @@ static const struct kbase_ipa_group ipa_groups_def_tbex[] = { + .init = kbase_ ## init_token ## _power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ ++ .reset_counter_data = kbase_ipa_vinstr_reset_data, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + +@@ -449,8 +482,74 @@ STANDARD_POWER_MODEL(g52_r1, 1000); + STANDARD_POWER_MODEL(g51, 1000); + STANDARD_POWER_MODEL(g77, 1000); + STANDARD_POWER_MODEL(tbex, 1000); ++STANDARD_POWER_MODEL(tbax, 1000); + + /* g52 is an alias of g76 (TNOX) for IPA */ + ALIAS_POWER_MODEL(g52, g76); + /* tnax is an alias of g77 (TTRX) for IPA */ + ALIAS_POWER_MODEL(tnax, g77); ++ ++static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { ++ &kbase_g71_ipa_model_ops, ++ &kbase_g72_ipa_model_ops, ++ &kbase_g76_ipa_model_ops, ++ &kbase_g52_ipa_model_ops, ++ &kbase_g52_r1_ipa_model_ops, ++ &kbase_g51_ipa_model_ops, ++ &kbase_g77_ipa_model_ops, ++ &kbase_tnax_ipa_model_ops, ++ &kbase_tbex_ipa_model_ops, ++ &kbase_tbax_ipa_model_ops ++}; ++ ++const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( ++ struct kbase_device *kbdev, const char *name) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { ++ const struct kbase_ipa_model_ops *ops = ++ ipa_counter_model_ops[i]; ++ ++ if (!strcmp(ops->name, name)) ++ return ops; ++ } ++ ++ dev_err(kbdev->dev, "power model \'%s\' not found\n", name); ++ ++ return NULL; ++} ++ ++const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) ++{ ++ const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> ++ GPU_ID_VERSION_PRODUCT_ID_SHIFT; ++ ++ switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { ++ case GPU_ID2_PRODUCT_TMIX: ++ return "mali-g71-power-model"; ++ case GPU_ID2_PRODUCT_THEX: ++ return "mali-g72-power-model"; ++ case GPU_ID2_PRODUCT_TNOX: ++ return "mali-g76-power-model"; ++ case GPU_ID2_PRODUCT_TSIX: ++ return "mali-g51-power-model"; ++ case GPU_ID2_PRODUCT_TGOX: ++ if ((gpu_id & GPU_ID2_VERSION_MAJOR) == ++ (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) ++ /* g52 aliased to g76 power-model's ops */ ++ return "mali-g52-power-model"; ++ else ++ return "mali-g52_r1-power-model"; ++ case GPU_ID2_PRODUCT_TNAX: ++ return "mali-tnax-power-model"; ++ case GPU_ID2_PRODUCT_TTRX: ++ return "mali-g77-power-model"; ++ case GPU_ID2_PRODUCT_TBEX: ++ return "mali-tbex-power-model"; ++ case GPU_ID2_PRODUCT_TBAX: ++ return "mali-tbax-power-model"; ++ default: ++ return NULL; ++ } ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c +old mode 100644 +new mode 100755 +index d663ccb..24d7b06 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,9 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ ++ + #include + #include + #include +@@ -27,30 +27,19 @@ + #include "mali_kbase_ipa_debugfs.h" + #include "mali_kbase_ipa_simple.h" + #include "backend/gpu/mali_kbase_pm_internal.h" +- +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) ++#include "backend/gpu/mali_kbase_devfreq.h" + #include +-#else +-#include +-#define dev_pm_opp_find_freq_exact opp_find_freq_exact +-#define dev_pm_opp_get_voltage opp_get_voltage +-#define dev_pm_opp opp +-#endif + + #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" + +-static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { +- &kbase_simple_ipa_model_ops, +- &kbase_g71_ipa_model_ops, +- &kbase_g72_ipa_model_ops, +- &kbase_g76_ipa_model_ops, +- &kbase_g52_ipa_model_ops, +- &kbase_g52_r1_ipa_model_ops, +- &kbase_g51_ipa_model_ops, +- &kbase_g77_ipa_model_ops, +- &kbase_tnax_ipa_model_ops, +- &kbase_tbex_ipa_model_ops +-}; ++/* Polling by thermal governor starts when the temperature exceeds the certain ++ * trip point. In order to have meaningful value for the counters, when the ++ * polling starts and first call to kbase_get_real_power() is made, it is ++ * required to reset the counter values every now and then. ++ * It is reasonable to do the reset every second if no polling is being done, ++ * the counter model implementation also assumes max sampling interval of 1 sec. ++ */ ++#define RESET_INTERVAL_MS ((s64)1000) + + int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) + { +@@ -71,53 +60,24 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) + } + + const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, +- const char *name) ++ const char *name) + { +- int i; ++ if (!strcmp(name, kbase_simple_ipa_model_ops.name)) ++ return &kbase_simple_ipa_model_ops; + +- for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { +- const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; +- +- if (!strcmp(ops->name, name)) +- return ops; +- } +- +- dev_err(kbdev->dev, "power model \'%s\' not found\n", name); +- +- return NULL; ++ return kbase_ipa_counter_model_ops_find(kbdev, name); + } + KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); + + const char *kbase_ipa_model_name_from_id(u32 gpu_id) + { +- const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> +- GPU_ID_VERSION_PRODUCT_ID_SHIFT; +- +- switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { +- case GPU_ID2_PRODUCT_TMIX: +- return "mali-g71-power-model"; +- case GPU_ID2_PRODUCT_THEX: +- return "mali-g72-power-model"; +- case GPU_ID2_PRODUCT_TNOX: +- return "mali-g76-power-model"; +- case GPU_ID2_PRODUCT_TSIX: +- return "mali-g51-power-model"; +- case GPU_ID2_PRODUCT_TGOX: +- if ((gpu_id & GPU_ID2_VERSION_MAJOR) == +- (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) +- /* g52 aliased to g76 power-model's ops */ +- return "mali-g52-power-model"; +- else +- return "mali-g52_r1-power-model"; +- case GPU_ID2_PRODUCT_TNAX: +- return "mali-tnax-power-model"; +- case GPU_ID2_PRODUCT_TTRX: +- return "mali-g77-power-model"; +- case GPU_ID2_PRODUCT_TBEX: +- return "mali-tbex-power-model"; +- default: ++ const char* model_name = ++ kbase_ipa_counter_model_name_from_id(gpu_id); ++ ++ if (!model_name) + return KBASE_IPA_FALLBACK_MODEL_NAME; +- } ++ else ++ return model_name; + } + KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); + +@@ -364,6 +324,8 @@ int kbase_ipa_init(struct kbase_device *kbdev) + kbdev->ipa.configured_model = default_model; + } + ++ kbdev->ipa.last_sample_time = ktime_get(); ++ + end: + if (err) + kbase_ipa_term_locked(kbdev); +@@ -418,7 +380,8 @@ static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, + const u32 v2f = v2f_big / 1000; + + /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. +- * Must be < 2^42 to avoid overflowing the return value. */ ++ * Must be < 2^42 to avoid overflowing the return value. ++ */ + const u64 v2fc = (u64) c * (u64) v2f; + + /* Range: 0 < v2fc / 1000 < 2^13 mW */ +@@ -514,8 +477,9 @@ static u32 get_static_power_locked(struct kbase_device *kbdev, + return power; + } + +-#if defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + static unsigned long kbase_get_static_power(struct devfreq *df, + unsigned long voltage) + #else +@@ -524,8 +488,8 @@ static unsigned long kbase_get_static_power(unsigned long voltage) + { + struct kbase_ipa_model *model; + u32 power = 0; +-#if defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + #else + struct kbase_device *kbdev = kbase_find_device(-1); +@@ -541,16 +505,55 @@ static unsigned long kbase_get_static_power(unsigned long voltage) + + mutex_unlock(&kbdev->ipa.lock); + +-#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) ++#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + kbase_release_device(kbdev); + #endif + + return power; + } ++#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ ++ ++/** ++ * opp_translate_freq_voltage() - Translate nominal OPP frequency from ++ * devicetree into the real frequency for ++ * top-level and shader cores. ++ * @kbdev: Device pointer ++ * @nominal_freq: Nominal frequency in Hz. ++ * @nominal_voltage: Nominal voltage, in mV. ++ * @freqs: Pointer to array of real frequency values. ++ * @volts: Pointer to array of voltages. ++ * ++ * If there are 2 clock domains, then top-level and shader cores can operate ++ * at different frequency and voltage level. The nominal frequency ("opp-hz") ++ * used by devfreq from the devicetree may not be same as the real frequency ++ * at which top-level and shader cores are operating, so a translation is ++ * needed. ++ * Nominal voltage shall always be same as the real voltage for top-level. ++ */ ++static void opp_translate_freq_voltage(struct kbase_device *kbdev, ++ unsigned long nominal_freq, ++ unsigned long nominal_voltage, ++ unsigned long *freqs, ++ unsigned long *volts) ++{ ++ u64 core_mask; ++ ++ kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, ++ freqs, volts); ++ CSTD_UNUSED(core_mask); ++ ++ if (kbdev->nr_clocks == 1) { ++ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; ++ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; ++ } ++} + +-#if defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + static unsigned long kbase_get_dynamic_power(struct devfreq *df, + unsigned long freq, + unsigned long voltage) +@@ -560,10 +563,13 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, + #endif + { + struct kbase_ipa_model *model; +- u32 power_coeff = 0, power = 0; ++ unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ u32 power = 0; + int err = 0; +-#if defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + #else + struct kbase_device *kbdev = kbase_find_device(-1); +@@ -576,34 +582,53 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, + + model = kbdev->ipa.fallback_model; + +- err = model->ops->get_dynamic_coeff(model, &power_coeff); +- +- if (!err) +- power = kbase_scale_dynamic_power(power_coeff, freq, voltage); +- else ++ err = model->ops->get_dynamic_coeff(model, power_coeffs); ++ ++ if (!err) { ++ opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); ++ ++ power = kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ ++ /* Here unlike kbase_get_real_power(), shader core frequency is ++ * used for the scaling as simple power model is used to obtain ++ * the value of dynamic coefficient (which is is a fixed value ++ * retrieved from the device tree). ++ */ ++ power += kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], ++ freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], ++ volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); ++ } else + dev_err_ratelimited(kbdev->dev, + "Model %s returned error code %d\n", + model->ops->name, err); + + mutex_unlock(&kbdev->ipa.lock); + +-#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) ++#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + kbase_release_device(kbdev); + #endif + + return power; + } ++#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ + + int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + unsigned long freq, + unsigned long voltage) + { + struct kbase_ipa_model *model; +- u32 power_coeff = 0; +- int err = 0; ++ unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; ++ u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + struct kbasep_pm_metrics diff; + u64 total_time; ++ bool skip_utilization_scaling = false; ++ int err = 0; + + lockdep_assert_held(&kbdev->ipa.lock); + +@@ -611,30 +636,62 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + + model = get_current_model(kbdev); + +- err = model->ops->get_dynamic_coeff(model, &power_coeff); ++ err = model->ops->get_dynamic_coeff(model, power_coeffs); + + /* If the counter model returns an error (e.g. switching back to + * protected mode and failing to read counters, or a counter sample + * with too few cycles), revert to the fallback model. + */ + if (err && model != kbdev->ipa.fallback_model) { ++ /* No meaningful scaling for GPU utilization can be done if ++ * the sampling interval was too long. This is equivalent to ++ * assuming GPU was busy throughout (similar to what is done ++ * during protected mode). ++ */ ++ if (err == -EOVERFLOW) ++ skip_utilization_scaling = true; ++ + model = kbdev->ipa.fallback_model; +- err = model->ops->get_dynamic_coeff(model, &power_coeff); ++ err = model->ops->get_dynamic_coeff(model, power_coeffs); + } + +- if (err) ++ if (WARN_ON(err)) + return err; + +- *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); ++ opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); + +- /* time_busy / total_time cannot be >1, so assigning the 64-bit +- * result of div_u64 to *power cannot overflow. +- */ +- total_time = diff.time_busy + (u64) diff.time_idle; +- *power = div_u64(*power * (u64) diff.time_busy, +- max(total_time, 1ull)); ++ *power = kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); ++ ++ if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) { ++ unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]; + +- *power += get_static_power_locked(kbdev, model, voltage); ++ /* As per the HW team, the top-level frequency needs to be used ++ * for the scaling if the counter based model was used as ++ * counter values are normalized with the GPU_ACTIVE counter ++ * value, which increments at the rate of top-level frequency. ++ */ ++ if (model != kbdev->ipa.fallback_model) ++ freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; ++ ++ *power += kbase_scale_dynamic_power( ++ power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], ++ freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); ++ } ++ ++ if (!skip_utilization_scaling) { ++ /* time_busy / total_time cannot be >1, so assigning the 64-bit ++ * result of div_u64 to *power cannot overflow. ++ */ ++ total_time = diff.time_busy + (u64) diff.time_idle; ++ *power = div_u64(*power * (u64) diff.time_busy, ++ max(total_time, 1ull)); ++ } ++ ++ *power += get_static_power_locked(kbdev, model, ++ volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + + return err; + } +@@ -658,18 +715,42 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, + } + KBASE_EXPORT_TEST_API(kbase_get_real_power); + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +-struct devfreq_cooling_ops kbase_ipa_power_model_ops = { +-#else + struct devfreq_cooling_power kbase_ipa_power_model_ops = { +-#endif +-#ifdef CONFIG_MALI_DEVFREQ ++#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE + .get_static_power = &kbase_get_static_power, + .get_dynamic_power = &kbase_get_dynamic_power, +-#endif +-#if defined(CONFIG_MALI_PWRSOFT_765) || \ +- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ++#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ ++#if defined(CONFIG_MALI_PWRSOFT_765) || \ ++ KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + .get_real_power = &kbase_get_real_power, + #endif + }; + KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); ++ ++void kbase_ipa_reset_data(struct kbase_device *kbdev) ++{ ++ ktime_t now, diff; ++ s64 elapsed_time; ++ ++ mutex_lock(&kbdev->ipa.lock); ++ ++ now = ktime_get(); ++ diff = ktime_sub(now, kbdev->ipa.last_sample_time); ++ elapsed_time = ktime_to_ms(diff); ++ ++ if (elapsed_time > RESET_INTERVAL_MS) { ++ struct kbasep_pm_metrics diff; ++ struct kbase_ipa_model *model; ++ ++ kbase_pm_get_dvfs_metrics( ++ kbdev, &kbdev->ipa.last_metrics, &diff); ++ ++ model = get_current_model(kbdev); ++ if (model != kbdev->ipa.fallback_model) ++ model->ops->reset_counter_data(model); ++ ++ kbdev->ipa.last_sample_time = ktime_get(); ++ } ++ ++ mutex_unlock(&kbdev->ipa.lock); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h +index 92aace9..c668af9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_IPA_H_ +@@ -27,6 +26,20 @@ + + struct devfreq; + ++/** ++ * enum kbase_ipa_block_type - Type of block for which power estimation is done. ++ * ++ * @KBASE_IPA_BLOCK_TYPE_TOP_LEVEL: Top-level block, that covers CSHW, ++ * MEMSYS, Tiler. ++ * @KBASE_IPA_BLOCK_TYPE_SHADER_CORES: All Shader cores. ++ * @KBASE_IPA_BLOCK_TYPE_NUM: Number of blocks. ++ */ ++enum kbase_ipa_block_type { ++ KBASE_IPA_BLOCK_TYPE_TOP_LEVEL, ++ KBASE_IPA_BLOCK_TYPE_SHADER_CORES, ++ KBASE_IPA_BLOCK_TYPE_NUM ++}; ++ + /** + * struct kbase_ipa_model - Object describing a particular IPA model. + * @kbdev: pointer to kbase device +@@ -89,7 +102,8 @@ struct kbase_ipa_model_ops { + int (*init)(struct kbase_ipa_model *model); + /* Called immediately after init(), or when a parameter is changed, so + * that any coefficients derived from model parameters can be +- * recalculated. */ ++ * recalculated ++ */ + int (*recalculate)(struct kbase_ipa_model *model); + void (*term)(struct kbase_ipa_model *model); + /* +@@ -101,7 +115,9 @@ struct kbase_ipa_model_ops { + * is then scaled by the IPA framework according to the current OPP's + * frequency and voltage. + * +- * Return: 0 on success, or an error code. ++ * Return: 0 on success, or an error code. -EOVERFLOW error code will ++ * indicate that sampling interval was too large and no meaningful ++ * scaling for GPU utiliation can be done. + */ + int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); + /* +@@ -115,6 +131,18 @@ struct kbase_ipa_model_ops { + * Return: 0 on success, or an error code. + */ + int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); ++ ++ /* ++ * reset_counter_data() - Reset the HW counter data used for calculating ++ * dynamic power coefficient ++ * @model: pointer to model ++ * ++ * This method is currently applicable only to the counter based model. ++ * The next call to get_dynamic_coeff() will have to calculate the ++ * dynamic power coefficient based on the HW counter data generated ++ * from this point onwards. ++ */ ++ void (*reset_counter_data)(struct kbase_ipa_model *model); + }; + + /** +@@ -163,6 +191,17 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); + const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name); + ++/** ++ * kbase_ipa_counter_model_ops_find - Lookup an IPA counter model using its name ++ * @kbdev: pointer to kbase device ++ * @name: name of counter model to lookup ++ * ++ * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup ++ * failed. ++ */ ++const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( ++ struct kbase_device *kbdev, const char *name); ++ + /** + * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID + * @gpu_id: GPU ID of GPU the model will be used for +@@ -172,6 +211,16 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device * + */ + const char *kbase_ipa_model_name_from_id(u32 gpu_id); + ++/** ++ * kbase_ipa_counter_model_name_from_id - Find the best counter model for a ++ * given GPU ID ++ * @gpu_id: GPU ID of GPU the counter model will be used for ++ * ++ * Return: The name of the appropriate counter-based model, or NULL if the ++ * no counter model exists. ++ */ ++const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id); ++ + /** + * kbase_ipa_init_model - Initilaize the particular IPA model + * @kbdev: pointer to kbase device +@@ -183,7 +232,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id); + * Return: pointer to kbase_ipa_model on success, NULL on error + */ + struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, +- const struct kbase_ipa_model_ops *ops); ++ const struct kbase_ipa_model_ops *ops); + /** + * kbase_ipa_term_model - Terminate the particular IPA model + * @model: pointer to the IPA model object, already initialized +@@ -202,16 +251,6 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model); + */ + void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); + +-extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops; +-extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops; +- + /** + * kbase_get_real_power() - get the real power consumption of the GPU + * @df: dynamic voltage and frequency scaling information for the GPU. +@@ -237,11 +276,20 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + unsigned long voltage); + #endif /* MALI_UNIT_TEST */ + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +-extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; +-#else + extern struct devfreq_cooling_power kbase_ipa_power_model_ops; +-#endif ++ ++/** ++ * kbase_ipa_reset_data() - Reset the data required for power estimation. ++ * @kbdev: Pointer to kbase device. ++ * ++ * This function is called to ensure a meaningful baseline for ++ * kbase_get_real_power(), when thermal governor starts the polling, and ++ * that is achieved by updating the GPU utilization metrics and retrieving ++ * the accumulated value of HW counters. ++ * Basically this function collects all the data required for power estimation ++ * but does not process it. ++ */ ++void kbase_ipa_reset_data(struct kbase_device *kbdev); + + #else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c +index 30a3b7d..5976389 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -28,7 +27,7 @@ + #include "mali_kbase_ipa.h" + #include "mali_kbase_ipa_debugfs.h" + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) ++#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) + #define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE + #endif + +@@ -160,7 +159,8 @@ int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, + return -ENOMEM; + + /* 'name' is stack-allocated for array elements, so copy it into +- * heap-allocated storage */ ++ * heap-allocated storage ++ */ + param->name = kstrdup(name, GFP_KERNEL); + + if (!param->name) { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h +index a983d9c..f690367 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_IPA_DEBUGFS_H_ +@@ -28,7 +27,7 @@ enum kbase_ipa_model_param_type { + PARAM_TYPE_STRING, + }; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + void kbase_ipa_debugfs_init(struct kbase_device *kbdev); + int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, +@@ -63,6 +62,9 @@ static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, + static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) + { } + ++static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, ++ const char *name, s32 val) ++{ } + #endif /* CONFIG_DEBUG_FS */ + + #endif /* _KBASE_IPA_DEBUGFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c +index 852559e..55f1d1c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include +-#ifdef CONFIG_DEVFREQ_THERMAL ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + #include + #endif + #include +@@ -34,20 +33,18 @@ + #include "mali_kbase_ipa_simple.h" + #include "mali_kbase_ipa_debugfs.h" + +-#if MALI_UNIT_TEST +- +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +-static unsigned long dummy_temp; ++/* This is used if the dynamic power for top-level is estimated separately ++ * through the counter model. To roughly match the contribution of top-level ++ * power in the total dynamic power, when calculated through counter model, ++ * this scalar is used for the dynamic coefficient specified in the device tree ++ * for simple power model. This value was provided by the HW team after ++ * taking all the power data collected and dividing top level power by shader ++ * core power and then averaging it across all samples. ++ */ ++#define TOP_LEVEL_DYN_COEFF_SCALER (3) + +-static int kbase_simple_power_model_get_dummy_temp( +- struct thermal_zone_device *tz, +- unsigned long *temp) +-{ +- *temp = READ_ONCE(dummy_temp); +- return 0; +-} ++#if MALI_UNIT_TEST + +-#else + static int dummy_temp; + + static int kbase_simple_power_model_get_dummy_temp( +@@ -57,7 +54,6 @@ static int kbase_simple_power_model_get_dummy_temp( + *temp = READ_ONCE(dummy_temp); + return 0; + } +-#endif + + /* Intercept calls to the kernel function using a macro */ + #ifdef thermal_zone_get_temp +@@ -143,16 +139,13 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) + + /* We can't call thermal_zone_get_temp() directly in model_static_coeff(), + * because we don't know if tz->lock is held in the same thread. So poll it in +- * a separate thread to get around this. */ ++ * a separate thread to get around this. ++ */ + static int poll_temperature(void *data) + { + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) data; +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +- unsigned long temp; +-#else + int temp; +-#endif + + while (!kthread_should_stop()) { + struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); +@@ -208,7 +201,21 @@ static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) model->model_data; + ++#if MALI_USE_CSF ++ /* On CSF GPUs, the dynamic power for top-level and shader cores is ++ * estimated separately. Currently there is a single dynamic ++ * coefficient value provided in the device tree for simple model. ++ * As per the discussion with HW team the coefficient value needs to ++ * be scaled down for top-level to limit its contribution in the ++ * total dyanmic power. ++ */ ++ coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = ++ model_data->dynamic_coefficient / TOP_LEVEL_DYN_COEFF_SCALER; ++ coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = ++ model_data->dynamic_coefficient; ++#else + *coeffp = model_data->dynamic_coefficient; ++#endif + + return 0; + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h +index fed67d5..fb174e2 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_IPA_SIMPLE_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h +index aac561b..c490f1c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Definitions (types, defines, etcs) specific to Job Manager Kbase. + * They are placed here to allow the hierarchy of header files to work. +@@ -129,7 +126,7 @@ + /* Reset the GPU after each atom completion */ + #define KBASE_SERIALIZE_RESET (1 << 2) + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + /** + * struct base_job_fault_event - keeps track of the atom which faulted or which + * completed after the faulty atom but before the +@@ -409,6 +406,16 @@ struct kbase_ext_res { + * sync through soft jobs and for the implicit + * synchronization required on access to external + * resources. ++ * @dma_fence.fence_in: Input fence ++ * @dma_fence.fence: Points to the dma-buf output fence for this atom. ++ * @dma_fence.context: The dma-buf fence context number for this atom. A ++ * unique context number is allocated to each katom in ++ * the context on context creation. ++ * @dma_fence.seqno: The dma-buf fence sequence number for this atom. This ++ * is increased every time this katom uses dma-buf fence ++ * @dma_fence.callbacks: List of all callbacks set up to wait on other fences ++ * @dma_fence.dep_count: Atomic counter of number of outstandind dma-buf fence ++ * dependencies for this atom. + * @event_code: Event code for the job chain represented by the atom, + * both HW and low-level SW events are represented by + * event codes. +@@ -443,6 +450,8 @@ struct kbase_ext_res { + * @blocked: flag indicating that atom's resubmission to GPU is + * blocked till the work item is scheduled to return the + * atom to JS. ++ * @seq_nr: user-space sequence number, to order atoms in some ++ * temporal order + * @pre_dep: Pointer to atom that this atom has same-slot + * dependency on + * @post_dep: Pointer to atom that has same-slot dependency on +@@ -477,11 +486,19 @@ struct kbase_ext_res { + * when transitioning into or out of protected mode. + * Atom will be either entering or exiting the + * protected mode. ++ * @protected_state.enter: entering the protected mode. ++ * @protected_state.exit: exiting the protected mode. + * @runnable_tree_node: The node added to context's job slot specific rb tree + * when the atom becomes runnable. + * @age: Age of atom relative to other atoms in the context, + * is snapshot of the age_count counter in kbase + * context. ++ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. ++ * @renderpass_id:Renderpass identifier used to associate an atom that has ++ * BASE_JD_REQ_START_RENDERPASS set in its core requirements ++ * with an atom that has BASE_JD_REQ_END_RENDERPASS set. ++ * @jc_fragment: Set of GPU fragment job chains ++ * @retry_count: TODO: Not used,to be removed + */ + struct kbase_jd_atom { + struct work_struct work; +@@ -496,9 +513,9 @@ struct kbase_jd_atom { + struct list_head jd_item; + bool in_jd_list; + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + u8 jit_ids[2]; +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + u16 nr_extres; + struct kbase_ext_res *extres; +@@ -516,7 +533,6 @@ struct kbase_jd_atom { + * when working with this sub struct + */ + #if defined(CONFIG_SYNC_FILE) +- /* Input fence */ + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_in; + #else +@@ -539,14 +555,7 @@ struct kbase_jd_atom { + #else + struct dma_fence *fence; + #endif +- /* The dma-buf fence context number for this atom. A unique +- * context number is allocated to each katom in the context on +- * context creation. +- */ + unsigned int context; +- /* The dma-buf fence sequence number for this atom. This is +- * increased every time this katom uses dma-buf fence. +- */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all +@@ -593,7 +602,7 @@ struct kbase_jd_atom { + + wait_queue_head_t completed; + enum kbase_jd_atom_state status; +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + int work_id; + #endif + int slot_nr; +@@ -608,6 +617,8 @@ struct kbase_jd_atom { + + atomic_t blocked; + ++ u64 seq_nr; ++ + struct kbase_jd_atom *pre_dep; + struct kbase_jd_atom *post_dep; + +@@ -616,7 +627,7 @@ struct kbase_jd_atom { + + u32 flush_id; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + struct base_job_fault_event fault_event; + #endif + struct list_head queue; +@@ -778,6 +789,7 @@ struct kbase_jd_renderpass { + * @jit_pending_alloc: A list of just-in-time memory allocation + * soft-jobs which will be reattempted after the + * impending free of other active allocations. ++ * @max_priority: Max priority level allowed for this context. + */ + struct kbase_jd_context { + struct mutex lock; +@@ -792,12 +804,13 @@ struct kbase_jd_context { + u32 job_nr; + size_t tb_wrap_offset; + +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + atomic_t work_id; + #endif + + struct list_head jit_atoms_head; + struct list_head jit_pending_alloc; ++ int max_priority; + }; + + /** +@@ -815,4 +828,27 @@ struct jsctx_queue { + struct list_head x_dep_head; + }; + ++/** ++ * struct kbase_as - Object representing an address space of GPU. ++ * @number: Index at which this address space structure is present ++ * in an array of address space structures embedded inside ++ * the &struct kbase_device. ++ * @pf_wq: Workqueue for processing work items related to ++ * Page fault and Bus fault handling. ++ * @work_pagefault: Work item for the Page fault handling. ++ * @work_busfault: Work item for the Bus fault handling. ++ * @pf_data: Data relating to Page fault. ++ * @bf_data: Data relating to Bus fault. ++ * @current_setup: Stores the MMU configuration for this address space. ++ */ ++struct kbase_as { ++ int number; ++ struct workqueue_struct *pf_wq; ++ struct work_struct work_pagefault; ++ struct work_struct work_busfault; ++ struct kbase_fault pf_data; ++ struct kbase_fault bf_data; ++ struct kbase_mmu_setup current_setup; ++}; ++ + #endif /* _KBASE_JM_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h +index 6c222ce..5e0c4bc 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -32,6 +31,7 @@ + + /** + * kbasep_js_devdata_init - Initialize the Job Scheduler ++ * @kbdev: The kbase_device to operate on + * + * The struct kbasep_js_device_data sub-structure of kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is +@@ -41,6 +41,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev); + + /** + * kbasep_js_devdata_halt - Halt the Job Scheduler. ++ * @kbdev: The kbase_device to operate on + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient +@@ -58,6 +59,7 @@ void kbasep_js_devdata_halt(struct kbase_device *kbdev); + + /** + * kbasep_js_devdata_term - Terminate the Job Scheduler ++ * @kbdev: The kbase_device to operate on + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient +@@ -75,6 +77,7 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev); + /** + * kbasep_js_kctx_init - Initialize the Scheduling Component of a + * struct kbase_context on the Job Scheduler. ++ * @kctx: The kbase_context to operate on + * + * This effectively registers a struct kbase_context with a Job Scheduler. + * +@@ -89,6 +92,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx); + /** + * kbasep_js_kctx_term - Terminate the Scheduling Component of a + * struct kbase_context on the Job Scheduler ++ * @kctx: The kbase_context to operate on + * + * This effectively de-registers a struct kbase_context from its Job Scheduler + * +@@ -108,6 +112,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); + * kbasep_js_add_job - Add a job chain to the Job Scheduler, + * and take necessary actions to + * schedule the context/run the job. ++ * @kctx: The kbase_context to operate on ++ * @atom: Atom to add + * + * This atomically does the following: + * * Update the numbers of jobs information +@@ -151,7 +157,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + /** + * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, + * except for its 'retained state'. +- * ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @atom: Atom to remove ++* + * Completely removing a job requires several calls: + * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom +@@ -185,6 +194,9 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, + * kbasep_js_remove_cancelled_job - Completely remove a job chain from the + * Job Scheduler, in the case + * where the job chain was cancelled. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @katom: Atom to remove + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, +@@ -215,6 +227,9 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a + * context that was evicted from the + * policy queue or runpool. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on ++ * @has_pm_ref: tells whether to release Power Manager active reference + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: +@@ -242,6 +257,8 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + /** + * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, + * allowing it to be scheduled out. ++ * @kbdev: The kbase_device to operate on ++ * @kctx: The kbase_context to operate on + * + * When the refcount reaches zero and the context might be scheduled out + * (depending on whether the Scheduling Policy has deemed it so, or if it has +@@ -296,6 +313,9 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of + * kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * @katom_retained_state: Retained state from the atom + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. +@@ -315,8 +335,12 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbasep_js_atom_retained_state *katom_retained_state); + + /** +- * kbasep_js_runpool_release_ctx_nolock - Variant of +- * kbase_js_runpool_release_ctx() that assumes that ++ * kbasep_js_runpool_release_ctx_nolock - ++ * Variant of kbase_js_runpool_release_ctx() w/out locks ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * ++ * Variant of kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_device_data::runpool_mutex and + * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not + * attempt to schedule new contexts. +@@ -326,6 +350,8 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + + /** + * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context ++ * @kbdev: KBase device ++ * @kctx: KBase context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the +@@ -351,6 +377,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + /** + * kbasep_js_release_privileged_ctx - Release a privileged context, + * allowing it to be scheduled out. ++ * @kbdev: KBase device ++ * @kctx: KBase context + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * +@@ -368,6 +396,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + + /** + * kbase_js_try_run_jobs - Try to submit the next job on each slot ++ * @kbdev: KBase device + * + * The following locks may be used: + * * kbasep_js_device_data::runpool_mutex +@@ -378,6 +407,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev); + /** + * kbasep_js_suspend - Suspend the job scheduler during a Power Management + * Suspend event. ++ * @kbdev: KBase device + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. +@@ -401,6 +431,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev); + /** + * kbasep_js_resume - Resume the Job Scheduler after a Power Management + * Resume event. ++ * @kbdev: KBase device + * + * This restores the actions from kbasep_js_suspend(): + * * Schedules contexts back into the runpool +@@ -412,7 +443,7 @@ void kbasep_js_resume(struct kbase_device *kbdev); + * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. + * + * @kctx: Context pointer +- * @atom: Pointer to the atom to submit ++ * @katom: Pointer to the atom to submit + * + * The atom is enqueued on the context's ringbuffer. The caller must have + * ensured that all dependencies can be represented in the ringbuffer. +@@ -457,7 +488,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. + * + * @kctx: Context pointer +- * @atom: Pointer to the atom to unpull ++ * @katom: Pointer to the atom to unpull + * + * An atom is 'unpulled' if execution is stopped but intended to be returned to + * later. The most common reason for this is that the atom has been +@@ -584,7 +615,6 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev); + */ + void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + +- + /** + * kbase_js_update_ctx_priority - update the context priority + * +@@ -603,6 +633,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx); + /** + * kbasep_js_is_submit_allowed - Check that a context is allowed to submit + * jobs on this policy ++ * @js_devdata: KBase Job Scheduler Device Data ++ * @kctx: KBase context + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. +@@ -625,13 +657,15 @@ static inline bool kbasep_js_is_submit_allowed( + test_bit = (u16) (1u << kctx->as_nr); + + is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); +- dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", ++ dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", + is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); + return is_allowed; + } + + /** + * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy ++ * @js_devdata: KBase Job Scheduler Device Data ++ * @kctx: KBase context + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. +@@ -650,7 +684,7 @@ static inline void kbasep_js_set_submit_allowed( + + set_bit = (u16) (1u << kctx->as_nr); + +- dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", ++ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +@@ -659,6 +693,8 @@ static inline void kbasep_js_set_submit_allowed( + /** + * kbasep_js_clear_submit_allowed - Prevent a context from submitting more + * jobs on this policy ++ * @js_devdata: KBase Job Scheduler Device Data ++ * @kctx: KBase context + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. +@@ -679,13 +715,17 @@ static inline void kbasep_js_clear_submit_allowed( + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + +- dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", ++ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; + } + + /** ++ * kbasep_js_atom_retained_state_init_invalid - ++ * Create an initial 'invalid' atom retained state ++ * @retained_state: pointer where to create and initialize the state ++ * + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() +@@ -699,6 +739,10 @@ static inline void kbasep_js_atom_retained_state_init_invalid( + } + + /** ++ * kbasep_js_atom_retained_state_copy() - Copy atom state ++ * @retained_state: where to copy ++ * @katom: where to copy from ++ * + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +@@ -743,7 +787,7 @@ static inline bool kbasep_js_has_atom_finished( + * kbasep_js_atom_retained_state_is_valid - Determine whether a struct + * kbasep_js_atom_retained_state + * is valid +- * @katom_retained_state the atom's retained state to check ++ * @katom_retained_state: the atom's retained state to check + * + * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates + * that the code should just ignore it. +@@ -759,6 +803,8 @@ static inline bool kbasep_js_atom_retained_state_is_valid( + + /** + * kbase_js_runpool_inc_context_count - Increment number of running contexts. ++ * @kbdev: KBase device ++ * @kctx: KBase context + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. +@@ -795,6 +841,8 @@ static inline void kbase_js_runpool_inc_context_count( + /** + * kbase_js_runpool_dec_context_count - decrement number of running contexts. + * ++ * @kbdev: KBase device ++ * @kctx: KBase context + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex +@@ -889,4 +937,17 @@ static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) + return kbasep_js_relative_priority_to_atom[prio_idx]; + } + ++/** ++ * kbase_js_priority_check - Check the priority requested ++ * ++ * @kbdev: Device pointer ++ * @priority: Requested priority ++ * ++ * This will determine whether the requested priority can be satisfied. ++ * ++ * Return: The same or lower priority than requested. ++ */ ++ ++base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority); ++ + #endif /* _KBASE_JM_JS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h +index 0b48615..75152fb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,33 +17,15 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /** +- * @file mali_kbase_js.h +- * Job Scheduler Type Definitions ++ * DOC: Job Scheduler Type Definitions + */ + + #ifndef _KBASE_JS_DEFS_H_ + #define _KBASE_JS_DEFS_H_ + +-/** +- * @addtogroup base_api +- * @{ +- */ +- +-/** +- * @addtogroup base_kbase_api +- * @{ +- */ +- +-/** +- * @addtogroup kbase_js +- * @{ +- */ + /* Forward decls */ + struct kbase_device; + struct kbase_jd_atom; +@@ -50,11 +33,14 @@ struct kbase_jd_atom; + + typedef u32 kbase_context_flags; + +-/** Callback function run on all of a context's jobs registered with the Job +- * Scheduler */ +-typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); ++/* ++ * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's ++ * jobs registered with the Job Scheduler ++ */ ++typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, ++ struct kbase_jd_atom *katom); + +-/** ++/* + * @brief Maximum number of jobs that can be submitted to a job slot whilst + * inside the IRQ handler. + * +@@ -65,7 +51,15 @@ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd + #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + + /** +- * @brief Context attributes ++ * enum kbasep_js_ctx_attr - Context attributes ++ * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains ++ * Compute jobs. ++ * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains ++ * Non-Compute jobs. ++ * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context ++ * contains compute-job atoms that aren't restricted to a coherent group, ++ * and can run on all cores. ++ * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum + * + * Each context attribute can be thought of as a boolean value that caches some + * state information about either the runpool, or the context: +@@ -82,61 +76,70 @@ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd + * - The runpool holds a refcount of how many contexts in the runpool have this + * attribute. + * - The context holds a refcount of how many atoms have this attribute. ++ * ++ * KBASEP_JS_CTX_ATTR_COMPUTE: ++ * Attribute indicating a context that contains Compute jobs. That is, ++ * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE ++ * ++ * @note A context can be both 'Compute' and 'Non Compute' if it contains ++ * both types of jobs. ++ * ++ * KBASEP_JS_CTX_ATTR_NON_COMPUTE: ++ * Attribute indicating a context that contains Non-Compute jobs. That is, ++ * the context has some jobs that are \b not of type @ref ++ * BASE_JD_REQ_ONLY_COMPUTE. ++ * ++ * @note A context can be both 'Compute' and 'Non Compute' if it contains ++ * both types of jobs. ++ * ++ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: ++ * Attribute indicating that a context contains compute-job atoms that ++ * aren't restricted to a coherent group, and can run on all cores. ++ * ++ * Specifically, this is when the atom's \a core_req satisfy: ++ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 ++ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups ++ * ++ * Such atoms could be blocked from running if one of the coherent groups ++ * is being used by another job slot, so tracking this context attribute ++ * allows us to prevent such situations. ++ * ++ * @note This doesn't take into account the 1-coregroup case, where all ++ * compute atoms would effectively be able to run on 'all cores', but ++ * contexts will still not always get marked with this attribute. Instead, ++ * it is the caller's responsibility to take into account the number of ++ * coregroups when interpreting this attribute. ++ * ++ * @note Whilst Tiler atoms are normally combined with ++ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without ++ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy ++ * enough to handle anyway. ++ * ++ * + */ + enum kbasep_js_ctx_attr { +- /** Attribute indicating a context that contains Compute jobs. That is, +- * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE +- * +- * @note A context can be both 'Compute' and 'Non Compute' if it contains +- * both types of jobs. +- */ + KBASEP_JS_CTX_ATTR_COMPUTE, +- +- /** Attribute indicating a context that contains Non-Compute jobs. That is, +- * the context has some jobs that are \b not of type @ref +- * BASE_JD_REQ_ONLY_COMPUTE. +- * +- * @note A context can be both 'Compute' and 'Non Compute' if it contains +- * both types of jobs. +- */ + KBASEP_JS_CTX_ATTR_NON_COMPUTE, +- +- /** Attribute indicating that a context contains compute-job atoms that +- * aren't restricted to a coherent group, and can run on all cores. +- * +- * Specifically, this is when the atom's \a core_req satisfy: +- * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 +- * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups +- * +- * Such atoms could be blocked from running if one of the coherent groups +- * is being used by another job slot, so tracking this context attribute +- * allows us to prevent such situations. +- * +- * @note This doesn't take into account the 1-coregroup case, where all +- * compute atoms would effectively be able to run on 'all cores', but +- * contexts will still not always get marked with this attribute. Instead, +- * it is the caller's responsibility to take into account the number of +- * coregroups when interpreting this attribute. +- * +- * @note Whilst Tiler atoms are normally combined with +- * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without +- * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy +- * enough to handle anyway. +- */ + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, +- +- /** Must be the last in the enum */ + KBASEP_JS_CTX_ATTR_COUNT + }; + + enum { +- /** Bit indicating that new atom should be started because this atom completed */ ++ /* ++ * Bit indicating that new atom should be started because this atom ++ * completed ++ */ + KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), +- /** Bit indicating that the atom was evicted from the JS_NEXT registers */ ++ /* ++ * Bit indicating that the atom was evicted from the JS_NEXT registers ++ */ + KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) + }; + +-/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ ++/** ++ * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...> ++ * bits ++ */ + typedef u32 kbasep_js_atom_done_code; + + /* +@@ -168,7 +171,9 @@ enum { + * Internal atom priority defines for kbase_jd_atom::sched_prio + */ + enum { +- KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, ++ KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, ++ KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, ++ KBASE_JS_ATOM_SCHED_PRIO_HIGH, + KBASE_JS_ATOM_SCHED_PRIO_MED, + KBASE_JS_ATOM_SCHED_PRIO_LOW, + KBASE_JS_ATOM_SCHED_PRIO_COUNT, +@@ -183,7 +188,70 @@ enum { + #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + + /** +- * @brief KBase Device Data Job Scheduler sub-structure ++ * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure ++ * @runpool_irq: Sub-structure to collect together Job Scheduling data used in ++ * IRQ context. The hwaccess_lock must be held when accessing. ++ * @runpool_irq.submit_allowed: Bitvector indicating whether a currently ++ * scheduled context is allowed to submit jobs. When bit 'N' is set in ++ * this, it indicates whether the context bound to address space 'N' is ++ * allowed to submit jobs. ++ * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters: ++ * Each is large enough to hold a refcount of the number of contexts ++ * that can fit into the runpool. This is currently BASE_MAX_NR_AS. ++ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store ++ * the refcount. Hence, it's not worthwhile reducing this to ++ * bit-manipulation on u32s to save space (where in contrast, 4 bit ++ * sub-fields would be easy to do and would save space). ++ * Whilst this must not become negative, the sign bit is used for: ++ * - error detection in debug builds ++ * - Optimization: it is undefined for a signed int to overflow, and so ++ * the compiler can optimize for that never happening (thus, no masking ++ * is required on updating the variable) ++ * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector ++ * to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n' ++ * is using core i (i.e. slot_affinity_refcount[n][i] > 0) ++ * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned ++ * by each slot. Used to generate the slot_affinities array of bitvectors. ++ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, ++ * because it is refcounted only when a job is definitely about to be ++ * submitted to a slot, and is de-refcounted immediately after a job ++ * finishes ++ * @schedule_sem: Scheduling semaphore. This must be held when calling ++ * kbase_jm_kick() ++ * @ctx_list_pullable: List of contexts that can currently be pulled from ++ * @ctx_list_unpullable: List of contexts that can not currently be pulled ++ * from, but have jobs currently running. ++ * @nr_user_contexts_running: Number of currently scheduled user contexts ++ * (excluding ones that are not submitting jobs) ++ * @nr_all_contexts_running: Number of currently scheduled contexts (including ++ * ones that are not submitting jobs) ++ * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber ++ * @note This is a write-once member, and so no locking is required to ++ * read ++ * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS ++ * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS ++ * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL ++ * @hard_stop_ticks_ss: Value for JS_HARD_STOP_TICKS_SS ++ * @hard_stop_ticks_cl: Value for JS_HARD_STOP_TICKS_CL ++ * @hard_stop_ticks_dumping: Value for JS_HARD_STOP_TICKS_DUMPING ++ * @gpu_reset_ticks_ss: Value for JS_RESET_TICKS_SS ++ * @gpu_reset_ticks_cl: Value for JS_RESET_TICKS_CL ++ * @gpu_reset_ticks_dumping: Value for JS_RESET_TICKS_DUMPING ++ * @ctx_timeslice_ns: Value for JS_CTX_TIMESLICE_NS ++ * @suspended_soft_jobs_list: List of suspended soft jobs ++ * @softstop_always: Support soft-stop on a single context ++ * @init_status:The initialized-flag is placed at the end, to avoid ++ * cache-pollution (we should only be using this during init/term paths). ++ * @note This is a write-once member, and so no locking is required to ++ * read ++ * @nr_contexts_pullable:Number of contexts that can currently be pulled from ++ * @nr_contexts_runnable:Number of contexts that can either be pulled from or ++ * arecurrently running ++ * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT ++ * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts ++ * independently of the Run Pool. ++ * Of course, you don't need the Run Pool lock to access this. ++ * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool. + * + * This encapsulates the current context of the Job Scheduler on a particular + * device. This context is global to the device, and is not tied to any +@@ -191,121 +259,49 @@ enum { + * + * nr_contexts_running and as_free are optimized for packing together (by making + * them smaller types than u32). The operations on them should rarely involve +- * masking. The use of signed types for arithmetic indicates to the compiler that +- * the value will not rollover (which would be undefined behavior), and so under +- * the Total License model, it is free to make optimizations based on that (i.e. +- * to remove masking). ++ * masking. The use of signed types for arithmetic indicates to the compiler ++ * that the value will not rollover (which would be undefined behavior), and so ++ * under the Total License model, it is free to make optimizations based on ++ * that (i.e. to remove masking). + */ + struct kbasep_js_device_data { +- /* Sub-structure to collect together Job Scheduling data used in IRQ +- * context. The hwaccess_lock must be held when accessing. */ + struct runpool_irq { +- /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. +- * When bit 'N' is set in this, it indicates whether the context bound to address space +- * 'N' is allowed to submit jobs. +- */ + u16 submit_allowed; +- +- /** Context Attributes: +- * Each is large enough to hold a refcount of the number of contexts +- * that can fit into the runpool. This is currently BASE_MAX_NR_AS +- * +- * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store +- * the refcount. Hence, it's not worthwhile reducing this to +- * bit-manipulation on u32s to save space (where in contrast, 4 bit +- * sub-fields would be easy to do and would save space). +- * +- * Whilst this must not become negative, the sign bit is used for: +- * - error detection in debug builds +- * - Optimization: it is undefined for a signed int to overflow, and so +- * the compiler can optimize for that never happening (thus, no masking +- * is required on updating the variable) */ + s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; +- +- /* +- * Affinity management and tracking +- */ +- /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates +- * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ + u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; +- /** Refcount for each core owned by each slot. Used to generate the +- * slot_affinities array of bitvectors +- * +- * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, +- * because it is refcounted only when a job is definitely about to be +- * submitted to a slot, and is de-refcounted immediately after a job +- * finishes */ + s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; + } runpool_irq; +- +- /** +- * Scheduling semaphore. This must be held when calling +- * kbase_jm_kick() +- */ + struct semaphore schedule_sem; +- +- /** +- * List of contexts that can currently be pulled from +- */ +- struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; +- /** +- * List of contexts that can not currently be pulled from, but have +- * jobs currently running. +- */ +- struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; +- +- /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ ++ struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] ++ [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; ++ struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] ++ [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + s8 nr_user_contexts_running; +- /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ + s8 nr_all_contexts_running; +- +- /** Core Requirements to match up with base_js_atom's core_req memeber +- * @note This is a write-once member, and so no locking is required to read */ + base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + +- u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ +- u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ +- u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ +- u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ +- u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ +- u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ +- u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ +- u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ +- u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ +- u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ ++ u32 scheduling_period_ns; ++ u32 soft_stop_ticks; ++ u32 soft_stop_ticks_cl; ++ u32 hard_stop_ticks_ss; ++ u32 hard_stop_ticks_cl; ++ u32 hard_stop_ticks_dumping; ++ u32 gpu_reset_ticks_ss; ++ u32 gpu_reset_ticks_cl; ++ u32 gpu_reset_ticks_dumping; ++ u32 ctx_timeslice_ns; + +- /** List of suspended soft jobs */ + struct list_head suspended_soft_jobs_list; + + #ifdef CONFIG_MALI_DEBUG +- /* Support soft-stop on a single context */ + bool softstop_always; + #endif /* CONFIG_MALI_DEBUG */ +- +- /** The initalized-flag is placed at the end, to avoid cache-pollution (we should +- * only be using this during init/term paths). +- * @note This is a write-once member, and so no locking is required to read */ + int init_status; +- +- /* Number of contexts that can currently be pulled from */ + u32 nr_contexts_pullable; +- +- /* Number of contexts that can either be pulled from or are currently +- * running */ + atomic_t nr_contexts_runnable; +- +- /** Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; +- +- /** +- * Queue Lock, used to access the Policy's queue of contexts +- * independently of the Run Pool. +- * +- * Of course, you don't need the Run Pool lock to access this. +- */ + struct mutex queue_mutex; +- +- /** ++ /* + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing + * any members that follow +@@ -317,61 +313,59 @@ struct kbasep_js_device_data { + }; + + /** +- * @brief KBase Context Job Scheduling information structure ++ * struct kbasep_js_kctx_info - KBase Context Job Scheduling information ++ * structure ++ * @ctx: Job Scheduler Context information sub-structure.Its members are ++ * accessed regardless of whether the context is: ++ * - In the Policy's Run Pool ++ * - In the Policy's Queue ++ * - Not queued nor in the Run Pool. ++ * You must obtain the @ctx.jsctx_mutex before accessing any other members ++ * of this substructure. ++ * You may not access any of its members from IRQ context. ++ * @ctx.jsctx_mutex: Job Scheduler Context lock ++ * @ctx.nr_jobs: Number of jobs ready to run - does \em not include ++ * the jobs waiting in the dispatcher, and dependency-only ++ * jobs. See kbase_jd_context::job_nr for such jobs ++ * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough ++ * to hold a refcount of the number of atoms on the context. ++ * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state ++ * changes. ++ * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on ++ * one list per job slot. ++ * @init_status: The initalized-flag is placed at the end, to avoid ++ * cache-pollution (we should only be using this during init/term paths) + * + * This is a substructure in the struct kbase_context that encapsulates all the + * scheduling information. + */ + struct kbasep_js_kctx_info { +- +- /** +- * Job Scheduler Context information sub-structure. These members are +- * accessed regardless of whether the context is: +- * - In the Policy's Run Pool +- * - In the Policy's Queue +- * - Not queued nor in the Run Pool. +- * +- * You must obtain the jsctx_mutex before accessing any other members of +- * this substructure. +- * +- * You may not access any of these members from IRQ context. +- */ + struct kbase_jsctx { +- struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ ++ struct mutex jsctx_mutex; + +- /** Number of jobs ready to run - does \em not include the jobs waiting in +- * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr +- * for such jobs*/ + u32 nr_jobs; +- +- /** Context Attributes: +- * Each is large enough to hold a refcount of the number of atoms on +- * the context. **/ + u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; +- +- /** +- * Wait queue to wait for KCTX_SHEDULED flag state changes. +- * */ + wait_queue_head_t is_scheduled_wait; +- +- /** Link implementing JS queues. Context can be present on one +- * list per job slot +- */ + struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; + } ctx; +- +- /* The initalized-flag is placed at the end, to avoid cache-pollution (we should +- * only be using this during init/term paths) */ + int init_status; + }; + +-/** Subset of atom state that can be available after jd_done_nolock() is called ++/** ++ * struct kbasep_js_atom_retained_state - Subset of atom state. ++ * @event_code: to determine whether the atom has finished ++ * @core_req: core requirements ++ * @sched_priority: priority ++ * @device_nr: Core group atom was executed on ++ * ++ * Subset of atom state that can be available after jd_done_nolock() is called + * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), +- * because the original atom could disappear. */ ++ * because the original atom could disappear. ++ */ + struct kbasep_js_atom_retained_state { +- /** Event code - to determine whether the atom has finished */ ++ /* Event code - to determine whether the atom has finished */ + enum base_jd_event_code event_code; +- /** core requirements */ ++ /* core requirements */ + base_jd_core_req core_req; + /* priority */ + int sched_priority; +@@ -380,30 +374,23 @@ struct kbasep_js_atom_retained_state { + + }; + +-/** ++/* + * Value signifying 'no retry on a slot required' for: + * - kbase_js_atom_retained_state::retry_submit_on_slot + * - kbase_jd_atom::retry_submit_on_slot + */ + #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + +-/** +- * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. +- * +- * @see kbase_atom_retained_state_is_valid() ++/* ++ * base_jd_core_req value signifying 'invalid' for a ++ * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid() + */ + #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP + +-/** +- * @brief The JS timer resolution, in microseconds +- * ++/* ++ * The JS timer resolution, in microseconds + * Any non-zero difference in time will be at least this size. + */ + #define KBASEP_JS_TICK_RESOLUTION_US 1 + +- +- /** @} *//* end group kbase_js */ +- /** @} *//* end group base_kbase_api */ +- /** @} *//* end group base_api */ +- +-#endif /* _KBASE_JS_DEFS_H_ */ ++#endif /* _KBASE_JS_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +index 6885f8d..93cd05f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, +@@ -50,12 +49,12 @@ enum base_hw_feature { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, ++ BASE_HW_FEATURE_ASN_HASH, + BASE_HW_FEATURE_END + }; + +@@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END + }; + +@@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END + }; + +@@ -139,7 +136,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END + }; + +@@ -166,7 +162,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END + }; + +@@ -193,7 +188,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +@@ -222,7 +216,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +@@ -250,7 +243,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +@@ -278,7 +270,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +@@ -306,14 +297,13 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END + }; + +-static const enum base_hw_feature base_hw_features_tDUx[] = { ++static const enum base_hw_feature base_hw_features_tBAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, +@@ -335,14 +325,13 @@ static const enum base_hw_feature base_hw_features_tDUx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END + }; + +-static const enum base_hw_feature base_hw_features_tODx[] = { ++static const enum base_hw_feature base_hw_features_tDUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, +@@ -364,13 +353,13 @@ static const enum base_hw_feature base_hw_features_tODx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, ++ BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END + }; + +-static const enum base_hw_feature base_hw_features_tGRx[] = { ++static const enum base_hw_feature base_hw_features_tODx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, +@@ -392,13 +381,12 @@ static const enum base_hw_feature base_hw_features_tGRx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END + }; + +-static const enum base_hw_feature base_hw_features_tVAx[] = { ++static const enum base_hw_feature base_hw_features_tGRx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, +@@ -420,13 +408,12 @@ static const enum base_hw_feature base_hw_features_tVAx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END + }; + +-static const enum base_hw_feature base_hw_features_tTUx[] = { ++static const enum base_hw_feature base_hw_features_tVAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, +@@ -448,39 +435,10 @@ static const enum base_hw_feature base_hw_features_tTUx[] = { + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END + }; + +-static const enum base_hw_feature base_hw_features_tE2x[] = { +- BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, +- BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, +- BASE_HW_FEATURE_XAFFINITY, +- BASE_HW_FEATURE_WARPING, +- BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, +- BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, +- BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, +- BASE_HW_FEATURE_BRNDOUT_CC, +- BASE_HW_FEATURE_BRNDOUT_KILL, +- BASE_HW_FEATURE_LD_ST_LEA_TEX, +- BASE_HW_FEATURE_LD_ST_TILEBUFFER, +- BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, +- BASE_HW_FEATURE_MRT, +- BASE_HW_FEATURE_MSAA_16X, +- BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, +- BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, +- BASE_HW_FEATURE_T7XX_PAIRING_RULES, +- BASE_HW_FEATURE_TEST4_DATUM_MODE, +- BASE_HW_FEATURE_FLUSH_REDUCTION, +- BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, +- BASE_HW_FEATURE_COHERENCY_REG, +- BASE_HW_FEATURE_AARCH64_MMU, +- BASE_HW_FEATURE_IDVS_GROUP_SIZE, +- BASE_HW_FEATURE_L2_CONFIG, +- BASE_HW_FEATURE_CLEAN_ONLY_SAFE, +- BASE_HW_FEATURE_END +-}; + + #endif /* _BASE_HWCONFIG_FEATURES_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +index 3966069..beda1e4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, +@@ -59,6 +58,7 @@ enum base_hw_issue { + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, ++ BASE_HW_ISSUE_GPU2019_3212, + BASE_HW_ISSUE_END + }; + +@@ -532,79 +532,89 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { ++static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_model_tDUx[] = { +- BASE_HW_ISSUE_5736, ++static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_2968_TTRX_3162, ++ BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { +- BASE_HW_ISSUE_9435, +- BASE_HW_ISSUE_TSIX_2033, +- BASE_HW_ISSUE_TTRX_1337, +- BASE_HW_ISSUE_END +-}; +- +-static const enum base_hw_issue base_hw_issues_model_tODx[] = { ++static const enum base_hw_issue base_hw_issues_model_tBAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, ++ BASE_HW_ISSUE_TTRX_3470, ++ BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { ++static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_921, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_model_tGRx[] = { ++static const enum base_hw_issue base_hw_issues_model_tDUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_TTRX_3414, ++ BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { ++static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3212, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_model_tVAx[] = { ++static const enum base_hw_issue base_hw_issues_model_tODx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, ++ BASE_HW_ISSUE_GPU2019_3212, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { ++static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_model_tTUx[] = { ++static const enum base_hw_issue base_hw_issues_model_tGRx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, +@@ -612,24 +622,20 @@ static const enum base_hw_issue base_hw_issues_model_tTUx[] = { + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = { ++static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, +- BASE_HW_ISSUE_TTRX_921, +- BASE_HW_ISSUE_TTRX_3414, +- BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END + }; + +-static const enum base_hw_issue base_hw_issues_model_tE2x[] = { ++static const enum base_hw_issue base_hw_issues_model_tVAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, +- BASE_HW_ISSUE_TTRX_3414, +- BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END + }; + ++ + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h +index 907142d..56db8ca 100755 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #ifndef _KBASE_H_ + #define _KBASE_H_ + +@@ -38,7 +35,7 @@ + #include + #include + #include +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) + #include + #endif + #include +@@ -46,8 +43,9 @@ + #include + #include + #include ++#include + +-#include "mali_base_kernel.h" ++#include + #include + + /* +@@ -66,26 +64,40 @@ + #include "mali_kbase_gpu_memory_debugfs.h" + #include "mali_kbase_mem_profile_debugfs.h" + #include "mali_kbase_gpuprops.h" +-#include "mali_kbase_ioctl.h" ++#include ++#if !MALI_USE_CSF + #include "mali_kbase_debug_job_fault.h" + #include "mali_kbase_jd_debugfs.h" + #include "mali_kbase_jm.h" + #include "mali_kbase_js.h" ++#endif /* !MALI_USE_CSF */ + + #include "ipa/mali_kbase_ipa.h" + +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + #include + #endif + + #include "mali_linux_trace.h" + ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf.h" ++#endif + + #ifndef u64_to_user_ptr + /* Introduced in Linux v4.6 */ + #define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) + #endif + ++#if MALI_USE_CSF ++/* Physical memory group ID for CSF user I/O. ++ */ ++#define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT ++ ++/* Physical memory group ID for CSF firmware. ++ */ ++#define KBASE_MEM_GROUP_CSF_FW BASE_MEM_GROUP_DEFAULT ++#endif + + /* Physical memory group ID for a special page which can alias several regions. + */ +@@ -140,9 +152,9 @@ void kbase_release_device(struct kbase_device *kbdev); + * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase + * region): + * - alignment offset is set to the difference between the kbase region +- * extent (converted from the original value in pages to bytes) and the kbase ++ * extension (converted from the original value in pages to bytes) and the kbase + * region initial_commit (also converted from the original value in pages to +- * bytes); alignment mask is set to the kbase region extent in bytes and ++ * bytes); alignment mask is set to the kbase region extension in bytes and + * decremented by 1. + * + * Return: if successful, address of the unmapped area aligned as required; +@@ -184,7 +196,7 @@ void kbase_device_pm_term(struct kbase_device *kbdev); + int power_control_init(struct kbase_device *kbdev); + void power_control_term(struct kbase_device *kbdev); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + void kbase_device_debugfs_term(struct kbase_device *kbdev); + int kbase_device_debugfs_init(struct kbase_device *kbdev); + #else /* CONFIG_DEBUG_FS */ +@@ -201,11 +213,8 @@ void registers_unmap(struct kbase_device *kbdev); + + int kbase_device_coherency_init(struct kbase_device *kbdev); + +-#ifdef CONFIG_MALI_BUSLOG +-int buslog_init(struct kbase_device *kbdev); +-void buslog_term(struct kbase_device *kbdev); +-#endif + ++#if !MALI_USE_CSF + int kbase_jd_init(struct kbase_context *kctx); + void kbase_jd_exit(struct kbase_context *kctx); + +@@ -213,9 +222,9 @@ void kbase_jd_exit(struct kbase_context *kctx); + * kbase_jd_submit - Submit atoms to the job dispatcher + * + * @kctx: The kbase context to submit to +- * @user_addr: The address in user space of the struct base_jd_atom_v2 array ++ * @user_addr: The address in user space of the struct base_jd_atom array + * @nr_atoms: The number of atoms in the array +- * @stride: sizeof(struct base_jd_atom_v2) ++ * @stride: sizeof(struct base_jd_atom) + * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) + * + * Return: 0 on success or error code +@@ -306,9 +315,12 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + ++#endif /* !MALI_USE_CSF */ + + void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); ++#if !MALI_USE_CSF + int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); ++#endif /* !MALI_USE_CSF */ + int kbase_event_pending(struct kbase_context *ctx); + int kbase_event_init(struct kbase_context *kctx); + void kbase_event_close(struct kbase_context *kctx); +@@ -372,6 +384,7 @@ static inline void kbase_free_user_buffer( + */ + int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data); ++#if !MALI_USE_CSF + int kbase_process_soft_job(struct kbase_jd_atom *katom); + int kbase_prepare_soft_job(struct kbase_jd_atom *katom); + void kbase_finish_soft_job(struct kbase_jd_atom *katom); +@@ -387,18 +400,21 @@ int kbase_soft_event_update(struct kbase_context *kctx, + + void kbasep_soft_job_timeout_worker(struct timer_list *timer); + void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); ++#endif /* !MALI_USE_CSF */ + + void kbasep_as_do_poke(struct work_struct *work); + + /** + * Check whether a system suspend is in progress, or has already been suspended ++ * @kbdev: The kbase device structure for the device + * + * The caller should ensure that either kbdev->pm.active_count_lock is held, or + * a dmb was executed recently (to ensure the value is most + * up-to-date). However, without a lock the value could change afterwards. + * +- * @return false if a suspend is not in progress +- * @return !=false otherwise ++ * Return: ++ * * false if a suspend is not in progress ++ * * !=false otherwise + */ + static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) + { +@@ -419,7 +435,27 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) + */ + static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) + { +- return kbdev->pm.gpu_lost; ++ return (atomic_read(&kbdev->pm.gpu_lost) == 0 ? false : true); ++} ++ ++/* ++ * Set or clear gpu lost state ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @gpu_lost: true to activate GPU lost state, FALSE is deactive it ++ * ++ * Puts power management code into gpu lost state or takes it out of the ++ * state. Once in gpu lost state new GPU jobs will no longer be ++ * scheduled. ++ */ ++static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, ++ bool gpu_lost) ++{ ++ const int new_val = (gpu_lost ? 1 : 0); ++ const int cur_val = atomic_xchg(&kbdev->pm.gpu_lost, new_val); ++ ++ if (new_val != cur_val) ++ KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, new_val); + } + #endif + +@@ -455,9 +491,12 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev); + */ + void kbase_pm_metrics_stop(struct kbase_device *kbdev); + ++#if !MALI_USE_CSF + /** + * Return the atom's ID, as was originally supplied by userspace in +- * base_jd_atom_v2::atom_number ++ * base_jd_atom::atom_number ++ * @kctx: KBase context pointer ++ * @katom: Atome for which to return ID + */ + static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) + { +@@ -484,6 +523,7 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id( + { + return &kctx->jctx.atoms[id]; + } ++#endif /* !MALI_USE_CSF */ + + /** + * Initialize the disjoint state +@@ -508,7 +548,7 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id( + * The disjoint event counter is also incremented immediately whenever a job is soft stopped + * and during context creation. + * +- * @param kbdev The kbase device ++ * @kbdev: The kbase device + * + * Return: 0 on success and non-zero value on failure. + */ +@@ -518,7 +558,7 @@ void kbase_disjoint_init(struct kbase_device *kbdev); + * Increase the count of disjoint events + * called when a disjoint event has happened + * +- * @param kbdev The kbase device ++ * @kbdev: The kbase device + */ + void kbase_disjoint_event(struct kbase_device *kbdev); + +@@ -528,14 +568,14 @@ void kbase_disjoint_event(struct kbase_device *kbdev); + * This should be called when something happens which could be disjoint if the GPU + * is in a disjoint state. The state refcount keeps track of this. + * +- * @param kbdev The kbase device ++ * @kbdev: The kbase device + */ + void kbase_disjoint_event_potential(struct kbase_device *kbdev); + + /** + * Returns the count of disjoint events + * +- * @param kbdev The kbase device ++ * @kbdev: The kbase device + * @return the count of disjoint events + */ + u32 kbase_disjoint_event_get(struct kbase_device *kbdev); +@@ -547,7 +587,7 @@ u32 kbase_disjoint_event_get(struct kbase_device *kbdev); + * eventually after the disjoint state has completed @ref kbase_disjoint_state_down + * should be called + * +- * @param kbdev The kbase device ++ * @kbdev: The kbase device + */ + void kbase_disjoint_state_up(struct kbase_device *kbdev); + +@@ -558,68 +598,43 @@ void kbase_disjoint_state_up(struct kbase_device *kbdev); + * + * Called after @ref kbase_disjoint_state_up once the disjoint state is over + * +- * @param kbdev The kbase device ++ * @kbdev: The kbase device + */ + void kbase_disjoint_state_down(struct kbase_device *kbdev); + + /** +- * If a job is soft stopped and the number of contexts is >= this value +- * it is reported as a disjoint event +- */ +-#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 +- +-#if !defined(UINT64_MAX) +- #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +-#endif +- +-#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) +- +-/* kbase_io_history_init - initialize data struct for register access history +- * +- * @kbdev The register history to initialize +- * @n The number of register accesses that the buffer could hold ++ * kbase_device_pcm_dev_init() - Initialize the priority control manager device + * +- * @return 0 if successfully initialized, failure otherwise +- */ +-int kbase_io_history_init(struct kbase_io_history *h, u16 n); +- +-/* kbase_io_history_term - uninit all resources for the register access history ++ * @kbdev: Pointer to the structure for the kbase device + * +- * @h The register history to terminate +- */ +-void kbase_io_history_term(struct kbase_io_history *h); +- +-/* kbase_io_history_dump - print the register history to the kernel ring buffer ++ * Pointer to the priority control manager device is retrieved from the device ++ * tree and a reference is taken on the module implementing the callbacks for ++ * priority control manager operations. + * +- * @kbdev Pointer to kbase_device containing the register history to dump ++ * Return: 0 if successful, or an error code on failure + */ +-void kbase_io_history_dump(struct kbase_device *kbdev); ++int kbase_device_pcm_dev_init(struct kbase_device *const kbdev); + + /** +- * kbase_io_history_resize - resize the register access history buffer. ++ * kbase_device_pcm_dev_term() - Performs priority control manager device ++ * deinitialization. + * +- * @h: Pointer to a valid register history to resize +- * @new_size: Number of accesses the buffer could hold ++ * @kbdev: Pointer to the structure for the kbase device + * +- * A successful resize will clear all recent register accesses. +- * If resizing fails for any reason (e.g., could not allocate memory, invalid +- * buffer size) then the original buffer will be kept intact. +- * +- * @return 0 if the buffer was resized, failure otherwise ++ * Reference is released on the module implementing the callbacks for priority ++ * control manager operations. + */ +-int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); +- +-#else /* CONFIG_DEBUG_FS */ ++void kbase_device_pcm_dev_term(struct kbase_device *const kbdev); + +-#define kbase_io_history_init(...) ((int)0) +- +-#define kbase_io_history_term CSTD_NOP +- +-#define kbase_io_history_dump CSTD_NOP +- +-#define kbase_io_history_resize CSTD_NOP ++/** ++ * If a job is soft stopped and the number of contexts is >= this value ++ * it is reported as a disjoint event ++ */ ++#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 + +-#endif /* CONFIG_DEBUG_FS */ ++#if !defined(UINT64_MAX) ++ #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) ++#endif + + /*meson graphics start */ + extern int meson_gpu_data_invalid_count; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c +index 2e2e394..027eb8c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -26,7 +25,7 @@ + #include + #include + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + #ifdef CONFIG_MALI_DEBUG + + static int kbase_as_fault_read(struct seq_file *sfile, void *data) +@@ -80,7 +79,7 @@ static const struct file_operations as_fault_fops = { + */ + void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) + { +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + #ifdef CONFIG_MALI_DEBUG + uint i; + char as_name[64]; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h +index 496d8b1..919fbc1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_AS_FAULT_DEBUG_FS_H +@@ -39,7 +38,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); + static inline void + kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) + { +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + #ifdef CONFIG_MALI_DEBUG + kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); + #endif /* CONFIG_DEBUG_FS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h +index 2c11093..a085fd8 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,17 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. + */ + + #ifndef _KBASE_BITS_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +index 27a03cf..af51ed8 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Cache Policy API. + */ +@@ -58,10 +55,11 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + { + dma_sync_single_for_device(kbdev->dev, handle, size, dir); + } +- ++KBASE_EXPORT_TEST_API(kbase_sync_single_for_device); + + void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) + { + dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); + } ++KBASE_EXPORT_TEST_API(kbase_sync_single_for_cpu); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +index 8a1e529..7da33a6 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Cache Policy API. + */ +@@ -30,7 +27,7 @@ + #define _KBASE_CACHE_POLICY_H_ + + #include "mali_kbase.h" +-#include "mali_base_kernel.h" ++#include + + /** + * kbase_cache_enabled - Choose the cache policy for a specific region +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h +new file mode 100644 +index 0000000..c232e21 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h +@@ -0,0 +1,61 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/** ++ * DOC: Driver Capability Queries. ++ */ ++ ++#ifndef _KBASE_CAPS_H_ ++#define _KBASE_CAPS_H_ ++ ++#include ++ ++typedef enum mali_kbase_cap { ++ MALI_KBASE_CAP_SYSTEM_MONITOR = 0, ++ MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, ++ MALI_KBASE_CAP_MEM_GROW_ON_GPF, ++ MALI_KBASE_CAP_MEM_PROTECTED, ++ MALI_KBASE_NUM_CAPS ++} mali_kbase_cap; ++ ++extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap); ++ ++static inline bool mali_kbase_supports_system_monitor(unsigned long api_version) ++{ ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR); ++} ++ ++static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version) ++{ ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); ++} ++ ++static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) ++{ ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); ++} ++ ++static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) ++{ ++ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); ++} ++ ++#endif /* __KBASE_CAPS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c +new file mode 100644 +index 0000000..6a1e7e4 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c +@@ -0,0 +1,100 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_ccswe.h" ++#include "mali_kbase_linux.h" ++ ++#include ++#include ++ ++static u64 kbasep_ccswe_cycle_at_no_lock( ++ struct kbase_ccswe *self, u64 timestamp_ns) ++{ ++ s64 diff_s, diff_ns; ++ u32 gpu_freq; ++ ++ lockdep_assert_held(&self->access); ++ ++ diff_ns = timestamp_ns - self->timestamp_ns; ++ gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq; ++ ++ diff_s = div_s64(diff_ns, NSEC_PER_SEC); ++ diff_ns -= diff_s * NSEC_PER_SEC; ++ ++ return self->cycles_elapsed + diff_s * gpu_freq ++ + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); ++} ++ ++void kbase_ccswe_init(struct kbase_ccswe *self) ++{ ++ memset(self, 0, sizeof(*self)); ++ ++ spin_lock_init(&self->access); ++} ++ ++u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) ++{ ++ unsigned long flags; ++ u64 result; ++ ++ spin_lock_irqsave(&self->access, flags); ++ result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); ++ spin_unlock_irqrestore(&self->access, flags); ++ ++ return result; ++} ++ ++void kbase_ccswe_freq_change( ++ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&self->access, flags); ++ ++ /* The time must go only forward. */ ++ if (WARN_ON(timestamp_ns < self->timestamp_ns)) ++ goto exit; ++ ++ /* If this is the first frequency change, cycles_elapsed is zero. */ ++ if (self->timestamp_ns) ++ self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( ++ self, timestamp_ns); ++ ++ self->timestamp_ns = timestamp_ns; ++ self->prev_gpu_freq = self->gpu_freq; ++ self->gpu_freq = gpu_freq; ++exit: ++ spin_unlock_irqrestore(&self->access, flags); ++} ++ ++void kbase_ccswe_reset(struct kbase_ccswe *self) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&self->access, flags); ++ ++ self->timestamp_ns = 0; ++ self->cycles_elapsed = 0; ++ self->gpu_freq = 0; ++ self->prev_gpu_freq = 0; ++ ++ spin_unlock_irqrestore(&self->access, flags); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h +new file mode 100644 +index 0000000..8e55ffc +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_CCSWE_H_ ++#define _KBASE_CCSWE_H_ ++ ++#include ++ ++/** ++ * struct kbase_ccswe - Cycle count software estimator. ++ * ++ * @access: Spinlock protecting this structure access. ++ * @timestamp_ns: Timestamp(ns) when the last frequency change ++ * occurred. ++ * @cycles_elapsed: Number of cycles elapsed before the last frequency ++ * change ++ * @gpu_freq: Current GPU frequency(Hz) value. ++ * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency ++ * change. ++ */ ++struct kbase_ccswe { ++ spinlock_t access; ++ u64 timestamp_ns; ++ u64 cycles_elapsed; ++ u32 gpu_freq; ++ u32 prev_gpu_freq; ++}; ++ ++/** ++ * kbase_ccswe_init() - initialize the cycle count estimator. ++ * ++ * @self: Cycles count software estimator instance. ++ */ ++void kbase_ccswe_init(struct kbase_ccswe *self); ++ ++ ++/** ++ * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp. ++ * ++ * @self: Cycles count software estimator instance. ++ * @timestamp_ns: The timestamp(ns) for cycle count estimation. ++ * ++ * The timestamp must be bigger than the timestamp of the penultimate ++ * frequency change. If only one frequency change occurred, the ++ * timestamp must be bigger than the timestamp of the frequency change. ++ * This is to allow the following code to be executed w/o synchronization. ++ * If lines below executed atomically, it is safe to assume that only ++ * one frequency change may happen in between. ++ * ++ * u64 ts = ktime_get_raw_ns(); ++ * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts) ++ * ++ * Returns: estimated value of cycle count at a given time. ++ */ ++u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); ++ ++/** ++ * kbase_ccswe_freq_change() - update GPU frequency. ++ * ++ * @self: Cycles count software estimator instance. ++ * @timestamp_ns: Timestamp(ns) when frequency change occurred. ++ * @gpu_freq: New GPU frequency value. ++ * ++ * The timestamp must be bigger than the timestamp of the previous ++ * frequency change. The function is to be called at the frequency ++ * change moment (not later). ++ */ ++void kbase_ccswe_freq_change( ++ struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); ++ ++/** ++ * kbase_ccswe_reset() - reset estimator state ++ * ++ * @self: Cycles count software estimator instance. ++ */ ++void kbase_ccswe_reset(struct kbase_ccswe *self); ++ ++#endif /* _KBASE_CCSWE_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c +index ce7070d..37dbca1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include + #include + #include +@@ -46,3 +43,62 @@ void kbasep_platform_device_term(struct kbase_device *kbdev) + platform_funcs_p->platform_term_func(kbdev); + } + ++int kbasep_platform_device_late_init(struct kbase_device *kbdev) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; ++ ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_late_init_func) ++ platform_funcs_p->platform_late_init_func(kbdev); ++ ++ return 0; ++} ++ ++void kbasep_platform_device_late_term(struct kbase_device *kbdev) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; ++ ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_late_term_func) ++ platform_funcs_p->platform_late_term_func(kbdev); ++} ++ ++#if !MALI_USE_CSF ++int kbasep_platform_context_init(struct kbase_context *kctx) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; ++ ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_context_init_func) ++ return platform_funcs_p->platform_handler_context_init_func(kctx); ++ ++ return 0; ++} ++ ++void kbasep_platform_context_term(struct kbase_context *kctx) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; ++ ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_context_term_func) ++ platform_funcs_p->platform_handler_context_term_func(kctx); ++} ++ ++void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; ++ ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_atom_submit_func) ++ platform_funcs_p->platform_handler_atom_submit_func(katom); ++} ++ ++void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom) ++{ ++ struct kbase_platform_funcs_conf *platform_funcs_p; ++ ++ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; ++ if (platform_funcs_p && platform_funcs_p->platform_handler_atom_complete_func) ++ platform_funcs_p->platform_handler_atom_complete_func(katom); ++} ++#endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h +index 69723ea..e7eb334 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2017, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_config.h +- * Configuration API and Attributes for KBase ++ * DOC: Configuration API and Attributes for KBase + */ + + #ifndef _KBASE_CONFIG_H_ +@@ -32,37 +28,31 @@ + + #include + #include +-#include ++#include + #include + +-/** +- * @addtogroup base_api +- * @{ +- */ +- +-/** +- * @addtogroup base_kbase_api +- * @{ +- */ +- +-/** +- * @addtogroup kbase_config Configuration API and Attributes +- * @{ +- */ +- + /* Forward declaration of struct kbase_device */ + struct kbase_device; + ++#if !MALI_USE_CSF ++/* Forward declaration of struct kbase_context */ ++struct kbase_context; ++ ++/* Forward declaration of struct kbase_atom */ ++struct kbase_jd_atom; ++#endif ++ + /** +- * kbase_platform_funcs_conf - Specifies platform init/term function pointers ++ * struct kbase_platform_funcs_conf - Specifies platform integration function ++ * pointers for DDK events such as device init and term. + * + * Specifies the functions pointers for platform specific initialization and +- * termination. By default no functions are required. No additional platform +- * specific control is necessary. ++ * termination as well as other events. By default no functions are required. ++ * No additional platform specific control is necessary. + */ + struct kbase_platform_funcs_conf { + /** +- * platform_init_func - platform specific init function pointer ++ * @platform_init_func: platform specific init function pointer + * @kbdev - kbase_device pointer + * + * Returns 0 on success, negative error code otherwise. +@@ -77,7 +67,7 @@ struct kbase_platform_funcs_conf { + */ + int (*platform_init_func)(struct kbase_device *kbdev); + /** +- * platform_term_func - platform specific termination function pointer ++ * @platform_term_func: platform specific termination function pointer + * @kbdev - kbase_device pointer + * + * Function pointer for platform specific termination or NULL if no +@@ -88,6 +78,84 @@ struct kbase_platform_funcs_conf { + * can be accessed (and possibly terminated) in here. + */ + void (*platform_term_func)(struct kbase_device *kbdev); ++ ++ /** ++ * @platform_late_init_func: platform specific late init function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Function pointer to inform that the kbase driver initialization completed ++ * or NULL if no such function is required. At this point the GPU driver will be ++ * fully initialized. ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly terminated) in here. ++ */ ++ int (*platform_late_init_func)(struct kbase_device *kbdev); ++ ++ /** ++ * @platform_late_term_func: platform specific late termination function pointer ++ * @kbdev - kbase_device pointer ++ * ++ * Function pointer for platform specific termination or NULL if no ++ * termination function is required. At this point the GPU driver will complete ++ * termination process ++ * ++ * The platform specific private pointer kbase_device::platform_context ++ * can be accessed (and possibly terminated) in here. ++ */ ++ void (*platform_late_term_func)(struct kbase_device *kbdev); ++ ++#if !MALI_USE_CSF ++ /** ++ * @platform_handler_context_init_func: platform specific handler for ++ * when a new kbase_context is created. ++ * @kctx - kbase_context pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * Function pointer for platform specific initialization of a kernel ++ * context or NULL if not required. Called at the last stage of kernel ++ * context initialization. ++ */ ++ int (*platform_handler_context_init_func)(struct kbase_context *kctx); ++ /** ++ * @platform_handler_context_term_func: platform specific handler for ++ * when a kbase_context is terminated. ++ * @kctx - kbase_context pointer ++ * ++ * Function pointer for platform specific termination of a kernel ++ * context or NULL if not required. Called at the first stage of kernel ++ * context termination. ++ */ ++ void (*platform_handler_context_term_func)(struct kbase_context *kctx); ++ /** ++ * @platform_handler_atom_submit_func: platform specific handler for ++ * when a kbase_jd_atom is submitted. ++ * @katom - kbase_jd_atom pointer ++ * ++ * Function pointer for platform specific handling at the point when an ++ * atom is submitted to the GPU or set to NULL if not required. The ++ * function cannot assume that it is running in a process context. ++ * ++ * Context: The caller must hold the hwaccess_lock. Function must be ++ * runnable in an interrupt context. ++ */ ++ void (*platform_handler_atom_submit_func)(struct kbase_jd_atom *katom); ++ /** ++ * @platform_handler_atom_complete_func: platform specific handler for ++ * when a kbase_jd_atom completes. ++ * @katom - kbase_jd_atom pointer ++ * ++ * Function pointer for platform specific handling at the point when an ++ * atom stops running on the GPU or set to NULL if not required. The ++ * function cannot assume that it is running in a process context. ++ * ++ * Context: The caller must hold the hwaccess_lock. Function must be ++ * runnable in an interrupt context. ++ */ ++ void (*platform_handler_atom_complete_func)( ++ struct kbase_jd_atom *katom); ++#endif + }; + + /* +@@ -223,7 +291,90 @@ struct kbase_pm_callback_conf { + int (*soft_reset_callback)(struct kbase_device *kbdev); + }; + +-#ifdef CONFIG_OF ++/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier. ++ * ++ * Pointer to this structure is supposed to be passed to the gpu clock rate ++ * change notifier function. This structure is deliberately aligned with the ++ * common clock framework notification structure 'struct clk_notifier_data' ++ * and such alignment should be maintained. ++ * ++ * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered. ++ * @old_rate: Previous rate of this GPU clock in Hz. ++ * @new_rate: New rate of this GPU clock in Hz. ++ */ ++struct kbase_gpu_clk_notifier_data { ++ void *gpu_clk_handle; ++ unsigned long old_rate; ++ unsigned long new_rate; ++}; ++ ++/** ++ * struct kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace ++ * operations. ++ * ++ * Specifies the functions pointers for platform specific GPU clock rate trace ++ * operations. By default no functions are required. ++ */ ++struct kbase_clk_rate_trace_op_conf { ++ /** ++ * @enumerate_gpu_clk: Enumerate a GPU clock on the given index ++ * @kbdev - kbase_device pointer ++ * @index - GPU clock index ++ * ++ * Returns a handle unique to the given GPU clock, or NULL if the clock ++ * array has been exhausted at the given index value. ++ * ++ * Kbase will use this function pointer to enumerate the existence of a ++ * GPU clock on the given index. ++ */ ++ void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, ++ unsigned int index); ++ ++ /** ++ * @get_gpu_clk_rate: Get the current rate for an enumerated clock. ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * ++ * Returns current rate of the GPU clock in unit of Hz. ++ */ ++ unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, ++ void *gpu_clk_handle); ++ ++ /** ++ * @gpu_clk_notifier_register: Register a clock rate change notifier. ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * @nb - notifier block containing the callback function ++ * pointer ++ * ++ * Returns 0 on success, negative error code otherwise. ++ * ++ * This function pointer is used to register a callback function that ++ * is supposed to be invoked whenever the rate of clock corresponding ++ * to @gpu_clk_handle changes. ++ * @nb contains the pointer to callback function. ++ * The callback function expects the pointer of type ++ * 'struct kbase_gpu_clk_notifier_data' as the third argument. ++ */ ++ int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb); ++ ++ /** ++ * @gpu_clk_notifier_unregister: Unregister clock rate change notifier ++ * @kbdev - kbase_device pointer ++ * @gpu_clk_handle - Handle unique to the enumerated GPU clock ++ * @nb - notifier block containing the callback function ++ * pointer ++ * ++ * This function pointer is used to unregister a callback function that ++ * was previously registered to get notified of the change in rate ++ * of clock corresponding to @gpu_clk_handle. ++ */ ++ void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb); ++}; ++ ++#if IS_ENABLED(CONFIG_OF) + struct kbase_platform_config { + }; + #else +@@ -253,7 +404,7 @@ struct kbase_platform_config { + #endif /* CONFIG_OF */ + + /** +- * @brief Gets the pointer to platform config. ++ * kbase_get_platform_config - Gets the pointer to platform config. + * + * @return Pointer to the platform config + */ +@@ -284,6 +435,83 @@ int kbasep_platform_device_init(struct kbase_device *kbdev); + */ + void kbasep_platform_device_term(struct kbase_device *kbdev); + ++/** ++ * kbasep_platform_device_late_init: - Platform specific call to finish hardware ++ * initialization ++ * @kbdev: kbase device pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can initialize any hardware and context state that ++ * is required for the GPU block to function. ++ * ++ * Return: 0 if no errors have been found in the config. ++ * Negative error code otherwise. ++ */ ++int kbasep_platform_device_late_init(struct kbase_device *kbdev); ++ ++/** ++ * kbasep_platform_device_late_term - Platform specific call to finish hardware ++ * termination ++ * @kbdev: Kbase device pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can destroy any platform specific context state and ++ * shut down any hardware functionality that are outside of the Power Management ++ * callbacks. ++ * ++ */ ++void kbasep_platform_device_late_term(struct kbase_device *kbdev); ++ ++#if !MALI_USE_CSF ++/** ++ * kbasep_platform_context_init - Platform specific callback when a kernel ++ * context is created ++ * @kctx: kbase_context pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine can initialize any per kernel context structures ++ * that are required for the GPU block to function. ++ * ++ * Return: 0 if no errors were encountered. Negative error code otherwise. ++ */ ++int kbasep_platform_context_init(struct kbase_context *kctx); ++ ++/** ++ * kbasep_platform_context_term - Platform specific callback when a kernel ++ * context is terminated ++ * @kctx: kbase_context pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine should terminate any per kernel context structures ++ * created as part of &kbasep_platform_context_init. ++ * ++ */ ++void kbasep_platform_context_term(struct kbase_context *kctx); ++ ++/** ++ * kbasep_platform_event_atom_submit - Platform specific callback when an atom ++ * is submitted to the GPU ++ * @katom: kbase_jd_atom pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine should not assume that it is in a process context. ++ * ++ * Return: 0 if no errors were encountered. Negative error code otherwise. ++ */ ++void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom); ++ ++/** ++ * kbasep_platform_event_atom_complete - Platform specific callback when an atom ++ * has stopped running on the GPU ++ * @katom: kbase_jd_atom pointer ++ * ++ * Function calls a platform defined routine if specified in the configuration ++ * attributes. The routine should not assume that it is in a process context. ++ * ++ */ ++void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom); ++#endif ++ + #ifndef CONFIG_OF + /** + * kbase_platform_register - Register a platform device for the GPU +@@ -304,8 +532,4 @@ int kbase_platform_register(void); + void kbase_platform_unregister(void); + #endif + +- /** @} *//* end group kbase_config */ +- /** @} *//* end group base_kbase_api */ +- /** @} *//* end group base_api */ +- + #endif /* _KBASE_CONFIG_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +index e079281..63c36e2 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,14 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_config_defaults.h +- * +- * Default values for configuration settings ++ * DOC: Default values for configuration settings + * + */ + +@@ -88,29 +85,38 @@ enum { + }; + + /** +- * Default period for DVFS sampling ++ * Default period for DVFS sampling (can be overridden by platform header) + */ ++#ifndef DEFAULT_PM_DVFS_PERIOD + #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ ++#endif + + /** + * Power Management poweroff tick granuality. This is in nanoseconds to +- * allow HR timer support. ++ * allow HR timer support (can be overridden by platform header). + * + * On each scheduling tick, the power manager core may decide to: + * -# Power off one or more shader cores + * -# Power off the entire GPU + */ ++#ifndef DEFAULT_PM_GPU_POWEROFF_TICK_NS + #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ ++#endif + + /** + * Power Manager number of ticks before shader cores are powered off ++ * (can be overridden by platform header). + */ ++#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER + #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ ++#endif + + /** +- * Default scheduling tick granuality ++ * Default scheduling tick granuality (can be overridden by platform header) + */ ++#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS + #define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ ++#endif + + /** + * Default minimum number of scheduling ticks before jobs are soft-stopped. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +index fb2353e..ce84219 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -28,37 +27,39 @@ + #ifdef CONFIG_MALI_DEVFREQ + #include + #include +-#ifdef CONFIG_DEVFREQ_THERMAL ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + #include + #endif /* CONFIG_DEVFREQ_THERMAL */ + #endif /* CONFIG_MALI_DEVFREQ */ +-#ifdef CONFIG_MALI_NO_MALI +-#include "mali_kbase_model_linux.h" +-#include +-#endif /* CONFIG_MALI_NO_MALI */ + #include "mali_kbase_mem_profile_debugfs_buf_size.h" +-#include "mali_kbase_debug_mem_view.h" + #include "mali_kbase_mem.h" + #include "mali_kbase_mem_pool_debugfs.h" ++#include "mali_kbase_mem_pool_group.h" + #include "mali_kbase_debugfs_helper.h" +-#if !MALI_CUSTOMER_RELEASE +-#include "mali_kbase_regs_dump_debugfs.h" +-#endif /* !MALI_CUSTOMER_RELEASE */ + #include "mali_kbase_regs_history_debugfs.h" + #include + #include ++#if !MALI_USE_CSF + #include +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS ++#endif /* !MALI_USE_CSF */ ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + #include + #endif +-#include + #include +-#include +-#include "mali_kbase_ioctl.h" ++#include ++#if !MALI_USE_CSF ++#include "mali_kbase_kinstr_jm.h" ++#endif + #include "mali_kbase_hwcnt_context.h" + #include "mali_kbase_hwcnt_virtualizer.h" + #include "mali_kbase_hwcnt_legacy.h" + #include "mali_kbase_vinstr.h" ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf_firmware.h" ++#include "csf/mali_kbase_csf_tiler_heap.h" ++#include "csf/mali_kbase_csf_csg_debugfs.h" ++#include "csf/mali_kbase_csf_cpu_queue_debugfs.h" ++#endif + #ifdef CONFIG_MALI_ARBITER_SUPPORT + #include "arbiter/mali_kbase_arbiter_pm.h" + #endif +@@ -68,7 +69,8 @@ + #ifdef CONFIG_MALI_CINSTR_GWT + #include "mali_kbase_gwt.h" + #endif +-#include "mali_kbase_pm_internal.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "mali_kbase_dvfs_debugfs.h" + + #include + #include +@@ -99,13 +101,7 @@ + + #include + +- +-#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE) + #include +-#else +-#include +-#endif +- + #include + + #include +@@ -114,6 +110,8 @@ + #include + #include + ++#include ++ + /* GPU IRQ Tags */ + #define JOB_IRQ_TAG 0 + #define MMU_IRQ_TAG 1 +@@ -121,6 +119,72 @@ + + #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" + ++/** ++ * KBASE_API_VERSION - KBase API Version ++ * @major: Kernel major version ++ * @minor: Kernel minor version ++ */ ++#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ ++ (((minor) & 0xFFF) << 8) | \ ++ ((0 & 0xFF) << 0)) ++ ++#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF) ++#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF) ++ ++/** ++ * typedef mali_kbase_capability_def - kbase capabilities table ++ */ ++typedef struct mali_kbase_capability_def { ++ u16 required_major; ++ u16 required_minor; ++} mali_kbase_capability_def; ++ ++/* ++ * This must be kept in-sync with mali_kbase_cap ++ * ++ * TODO: The alternative approach would be to embed the cap enum values ++ * in the table. Less efficient but potentially safer. ++ */ ++static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { ++#if MALI_USE_CSF ++ { 1, 0 }, /* SYSTEM_MONITOR */ ++ { 1, 0 }, /* JIT_PRESSURE_LIMIT */ ++ { 1, 0 }, /* MEM_GROW_ON_GPF */ ++ { 1, 0 } /* MEM_PROTECTED */ ++#else ++ { 11, 15 }, /* SYSTEM_MONITOR */ ++ { 11, 25 }, /* JIT_PRESSURE_LIMIT */ ++ { 11, 2 }, /* MEM_GROW_ON_GPF */ ++ { 11, 2 } /* MEM_PROTECTED */ ++#endif ++}; ++ ++/** ++ * mali_kbase_supports_cap - Query whether a kbase capability is supported ++ * ++ * @api_version: API version to convert ++ * @cap: Capability to query for - see mali_kbase_caps.h ++ */ ++bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap) ++{ ++ bool supported = false; ++ unsigned long required_ver; ++ ++ mali_kbase_capability_def const *cap_def; ++ ++ if (WARN_ON(cap < 0)) ++ return false; ++ ++ if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) ++ return false; ++ ++ cap_def = &kbase_caps_table[(int)cap]; ++ required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); ++ supported = (api_version >= required_ver); ++ ++ return supported; ++} ++ + /** + * kbase_file_new - Create an object representing a device file + * +@@ -152,7 +216,7 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, + } + + /** +- * kbase_file_get_api_version - Set the application programmer interface version ++ * kbase_file_set_api_version - Set the application programmer interface version + * + * @kfile: A device file created by kbase_file_new() + * @major: Major version number (must not exceed 12 bits) +@@ -271,7 +335,7 @@ static void kbase_file_delete(struct kbase_file *const kfile) + if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { + struct kbase_context *kctx = kfile->kctx; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + kbasep_mem_profile_debugfs_remove(kctx); + #endif + +@@ -326,31 +390,18 @@ static int kbase_api_handshake(struct kbase_file *kfile, + * the flags have been set. Originally it was created on file open + * (with job submission disabled) but we don't support that usage. + */ +- if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15)) ++ if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) + err = kbase_file_create_kctx(kfile, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); + + return err; + } + +-/** +- * enum mali_error - Mali error codes shared with userspace +- * +- * This is subset of those common Mali errors that can be returned to userspace. +- * Values of matching user and kernel space enumerators MUST be the same. +- * MALI_ERROR_NONE is guaranteed to be 0. +- * +- * @MALI_ERROR_NONE: Success +- * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver +- * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure +- * @MALI_ERROR_FUNCTION_FAILED: Generic error code +- */ +-enum mali_error { +- MALI_ERROR_NONE = 0, +- MALI_ERROR_OUT_OF_GPU_MEMORY, +- MALI_ERROR_OUT_OF_MEMORY, +- MALI_ERROR_FUNCTION_FAILED, +-}; ++static int kbase_api_handshake_dummy(struct kbase_file *kfile, ++ struct kbase_ioctl_version_check *version) ++{ ++ return -EPERM; ++} + + static struct kbase_device *to_kbase_device(struct device *dev) + { +@@ -377,7 +428,7 @@ int assign_irqs(struct kbase_device *kbdev) + return -ENOENT; + } + +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + if (!strncasecmp(irq_res->name, "JOB", 4)) { + irqtag = JOB_IRQ_TAG; + } else if (!strncasecmp(irq_res->name, "MMU", 4)) { +@@ -428,10 +479,10 @@ void kbase_release_device(struct kbase_device *kbdev) + } + EXPORT_SYMBOL(kbase_release_device); + +-#ifdef CONFIG_DEBUG_FS +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \ +- !(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \ +- LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)) ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && \ ++ !(KERNEL_VERSION(4, 4, 28) <= LINUX_VERSION_CODE && \ ++ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE) + /* + * Older versions, before v4.6, of the kernel doesn't have + * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 +@@ -545,7 +596,7 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, + { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx = NULL; +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + char kctx_name[64]; + #endif + +@@ -576,9 +627,11 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, + if (kbdev->infinite_cache_active_default) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + ++ mutex_init(&kctx->mem_profile_lock); ++ + kctx->kctx_dentry = debugfs_create_dir(kctx_name, + kbdev->debugfs_ctx_directory); + +@@ -599,8 +652,6 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, + debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, + kctx, &kbase_force_same_va_fops); + +- mutex_init(&kctx->mem_profile_lock); +- + kbase_context_debugfs_init(kctx); + } + #endif /* CONFIG_DEBUG_FS */ +@@ -612,7 +663,6 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, + + return 0; + } +- + static int kbase_open(struct inode *inode, struct file *filp) + { + struct kbase_device *kbdev = NULL; +@@ -624,6 +674,13 @@ static int kbase_open(struct inode *inode, struct file *filp) + if (!kbdev) + return -ENODEV; + ++ /* Device-wide firmware load is moved here from probing to comply with ++ * Android GKI vendor guideline. ++ */ ++ ret = kbase_device_firmware_init_once(kbdev); ++ if (ret) ++ goto out; ++ + kfile = kbase_file_new(kbdev, filp); + if (!kfile) { + ret = -ENOMEM; +@@ -635,7 +692,7 @@ static int kbase_open(struct inode *inode, struct file *filp) + + return 0; + +- out: ++out: + kbase_release_device(kbdev); + return ret; + } +@@ -663,11 +720,13 @@ static int kbase_api_set_flags(struct kbase_file *kfile, + /* For backward compatibility, the context may have been created before + * the flags were set. + */ +- if (api_version >= KBASE_API_VERSION(11, 15)) { ++ if (mali_kbase_supports_system_monitor(api_version)) { + err = kbase_file_create_kctx(kfile, flags->create_flags); + } else { ++#if !MALI_USE_CSF + struct kbasep_js_kctx_info *js_kctx_info = NULL; + unsigned long irq_flags = 0; ++#endif + + /* If setup is incomplete (e.g. because the API version + * wasn't set) then we have to give up. +@@ -676,6 +735,12 @@ static int kbase_api_set_flags(struct kbase_file *kfile, + if (unlikely(!kctx)) + return -EPERM; + ++#if MALI_USE_CSF ++ /* On CSF GPUs Job Manager interface isn't used to submit jobs ++ * (there are no job slots). So the legacy job manager path to ++ * submit jobs needs to remain disabled for CSF GPUs. ++ */ ++#else + js_kctx_info = &kctx->jctx.sched_info; + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); +@@ -687,11 +752,13 @@ static int kbase_api_set_flags(struct kbase_file *kfile, + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); ++#endif + } + + return err; + } + ++#if !MALI_USE_CSF + static int kbase_api_job_submit(struct kbase_context *kctx, + struct kbase_ioctl_job_submit *submit) + { +@@ -699,6 +766,7 @@ static int kbase_api_job_submit(struct kbase_context *kctx, + submit->nr_atoms, + submit->stride, false); + } ++#endif /* !MALI_USE_CSF */ + + static int kbase_api_get_gpuprops(struct kbase_context *kctx, + struct kbase_ioctl_get_gpuprops *get_props) +@@ -724,11 +792,13 @@ static int kbase_api_get_gpuprops(struct kbase_context *kctx, + return kprops->prop_buffer_size; + } + ++#if !MALI_USE_CSF + static int kbase_api_post_term(struct kbase_context *kctx) + { + kbase_event_close(kctx); + return 0; + } ++#endif /* !MALI_USE_CSF */ + + static int kbase_api_mem_alloc(struct kbase_context *kctx, + union kbase_ioctl_mem_alloc *alloc) +@@ -762,11 +832,23 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, + flags |= BASE_MEM_SAME_VA; + } + ++#if MALI_USE_CSF ++ /* If CSF event memory allocation, need to force certain flags. ++ * SAME_VA - GPU address needs to be used as a CPU address, explicit ++ * mmap has to be avoided. ++ * CACHED_CPU - Frequent access to the event memory by CPU. ++ * COHERENT_SYSTEM - No explicit cache maintenance around the access ++ * to event memory so need to leverage the coherency support. ++ */ ++ if (flags & BASE_MEM_CSF_EVENT) { ++ flags |= (BASE_MEM_SAME_VA | ++ BASE_MEM_CACHED_CPU | ++ BASE_MEM_COHERENT_SYSTEM); ++ } ++#endif + +- reg = kbase_mem_alloc(kctx, alloc->in.va_pages, +- alloc->in.commit_pages, +- alloc->in.extent, +- &flags, &gpu_va); ++ reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, ++ alloc->in.extension, &flags, &gpu_va); + + if (!reg) + return -ENOMEM; +@@ -790,6 +872,14 @@ static int kbase_api_mem_free(struct kbase_context *kctx, + return kbase_mem_free(kctx, free->gpu_addr); + } + ++#if !MALI_USE_CSF ++static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, ++ union kbase_kinstr_jm_fd *arg) ++{ ++ return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); ++} ++#endif ++ + static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) + { +@@ -883,17 +973,6 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, + return 0; + } + +-#ifdef CONFIG_MALI_NO_MALI +-static int kbase_api_hwcnt_set(struct kbase_context *kctx, +- struct kbase_ioctl_hwcnt_values *values) +-{ +- gpu_model_set_dummy_prfcnt_sample( +- (u32 __user *)(uintptr_t)values->data, +- values->size); +- +- return 0; +-} +-#endif + + static int kbase_api_disjoint_query(struct kbase_context *kctx, + struct kbase_ioctl_disjoint_query *query) +@@ -1058,10 +1137,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, + u64 flags; + int err; + +- if (alias->in.nents == 0 || alias->in.nents > 2048) +- return -EINVAL; +- +- if (alias->in.stride > (U64_MAX / 2048)) ++ if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) + return -EINVAL; + + ai = vmalloc(sizeof(*ai) * alias->in.nents); +@@ -1185,6 +1261,7 @@ static int kbase_api_mem_profile_add(struct kbase_context *kctx, + return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); + } + ++#if !MALI_USE_CSF + static int kbase_api_soft_event_update(struct kbase_context *kctx, + struct kbase_ioctl_soft_event_update *update) + { +@@ -1193,6 +1270,7 @@ static int kbase_api_soft_event_update(struct kbase_context *kctx, + + return kbase_soft_event_update(kctx, update->event, update->new_status); + } ++#endif /* !MALI_USE_CSF */ + + static int kbase_api_sticky_resource_map(struct kbase_context *kctx, + struct kbase_ioctl_sticky_resource_map *map) +@@ -1263,18 +1341,6 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, + } + + #if MALI_UNIT_TEST +-static int kbase_api_tlstream_test(struct kbase_context *kctx, +- struct kbase_ioctl_tlstream_test *test) +-{ +- kbase_timeline_test( +- kctx->kbdev, +- test->tpw_count, +- test->msg_delay, +- test->msg_count, +- test->aux_msg); +- +- return 0; +-} + + static int kbase_api_tlstream_stats(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_stats *stats) +@@ -1287,55 +1353,266 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx, + } + #endif /* MALI_UNIT_TEST */ + ++#if MALI_USE_CSF ++static int kbasep_cs_event_signal(struct kbase_context *kctx) ++{ ++ kbase_csf_event_signal_notify_gpu(kctx); ++ return 0; ++} ++ ++static int kbasep_cs_queue_register(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register *reg) ++{ ++ kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; ++ ++ return kbase_csf_queue_register(kctx, reg); ++} ++ ++static int kbasep_cs_queue_register_ex(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_register_ex *reg) ++{ ++ kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; ++ ++ return kbase_csf_queue_register_ex(kctx, reg); ++} ++ ++static int kbasep_cs_queue_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_terminate *term) ++{ ++ kbase_csf_queue_terminate(kctx, term); ++ ++ return 0; ++} ++ ++static int kbasep_cs_queue_bind(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_bind *bind) ++{ ++ return kbase_csf_queue_bind(kctx, bind); ++} ++ ++static int kbasep_cs_queue_kick(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_kick *kick) ++{ ++ return kbase_csf_queue_kick(kctx, kick); ++} ++ ++static int kbasep_cs_queue_group_create(struct kbase_context *kctx, ++ union kbase_ioctl_cs_queue_group_create *create) ++{ ++ return kbase_csf_queue_group_create(kctx, create); ++} ++ ++static int kbasep_cs_queue_group_terminate(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_queue_group_term *term) ++{ ++ kbase_csf_queue_group_terminate(kctx, term->group_handle); ++ ++ return 0; ++} ++ ++static int kbasep_kcpu_queue_new(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_new *new) ++{ ++ return kbase_csf_kcpu_queue_new(kctx, new); ++} ++ ++static int kbasep_kcpu_queue_delete(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_delete *delete) ++{ ++ return kbase_csf_kcpu_queue_delete(kctx, delete); ++} ++ ++static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, ++ struct kbase_ioctl_kcpu_queue_enqueue *enqueue) ++{ ++ return kbase_csf_kcpu_queue_enqueue(kctx, enqueue); ++} ++ ++static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, ++ union kbase_ioctl_cs_tiler_heap_init *heap_init) ++{ ++ kctx->jit_group_id = heap_init->in.group_id; ++ ++ return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, ++ heap_init->in.initial_chunks, heap_init->in.max_chunks, ++ heap_init->in.target_in_flight, ++ &heap_init->out.gpu_heap_va, &heap_init->out.first_chunk_va); ++} ++ ++static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_tiler_heap_term *heap_term) ++{ ++ return kbase_csf_tiler_heap_term(kctx, heap_term->gpu_heap_va); ++} ++ ++static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, ++ union kbase_ioctl_cs_get_glb_iface *param) ++{ ++ struct basep_cs_stream_control *stream_data = NULL; ++ struct basep_cs_group_control *group_data = NULL; ++ void __user *user_groups, *user_streams; ++ int err = 0; ++ u32 const max_group_num = param->in.max_group_num; ++ u32 const max_total_stream_num = param->in.max_total_stream_num; ++ ++ if (max_group_num > MAX_SUPPORTED_CSGS) ++ return -EINVAL; ++ ++ if (max_total_stream_num > ++ MAX_SUPPORTED_CSGS * MAX_SUPPORTED_STREAMS_PER_GROUP) ++ return -EINVAL; ++ ++ user_groups = u64_to_user_ptr(param->in.groups_ptr); ++ user_streams = u64_to_user_ptr(param->in.streams_ptr); ++ ++ if (max_group_num > 0) { ++ if (!user_groups) ++ err = -EINVAL; ++ else { ++ group_data = kcalloc(max_group_num, ++ sizeof(*group_data), GFP_KERNEL); ++ if (!group_data) ++ err = -ENOMEM; ++ } ++ } ++ ++ if (max_total_stream_num > 0) { ++ if (!user_streams) ++ err = -EINVAL; ++ else { ++ stream_data = kcalloc(max_total_stream_num, ++ sizeof(*stream_data), GFP_KERNEL); ++ if (!stream_data) ++ err = -ENOMEM; ++ } ++ } ++ ++ if (!err) { ++ param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( ++ kctx->kbdev, group_data, max_group_num, stream_data, ++ max_total_stream_num, ¶m->out.glb_version, ++ ¶m->out.features, ¶m->out.group_num, ++ ¶m->out.prfcnt_size, ¶m->out.instr_features); ++ ++ if (copy_to_user(user_groups, group_data, ++ MIN(max_group_num, param->out.group_num) * ++ sizeof(*group_data))) ++ err = -EFAULT; ++ } ++ ++ if (!err) ++ if (copy_to_user(user_streams, stream_data, ++ MIN(max_total_stream_num, param->out.total_stream_num) * ++ sizeof(*stream_data))) ++ err = -EFAULT; ++ ++ kfree(group_data); ++ kfree(stream_data); ++ return err; ++} ++ ++static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, ++ struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info) ++{ ++ return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer, ++ cpu_queue_info->size); ++} ++ ++#endif /* MALI_USE_CSF */ ++ ++static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, ++ struct kbase_ioctl_context_priority_check *priority_check) ++{ ++#if MALI_USE_CSF ++ priority_check->priority = kbase_csf_priority_check(kctx->kbdev, priority_check->priority); ++#else ++ base_jd_prio req_priority = (base_jd_prio)priority_check->priority; ++ ++ priority_check->priority = (u8)kbase_js_priority_check(kctx->kbdev, req_priority); ++#endif ++ return 0; ++} + +-#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ +- do { \ +- BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ +- return function(arg); \ ++#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ ++ do { \ ++ int ret; \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ ret = function(arg); \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ + } while (0) + +-#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ +- do { \ +- type param; \ +- int err; \ +- BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ +- BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ +- err = copy_from_user(¶m, uarg, sizeof(param)); \ +- if (err) \ +- return -EFAULT; \ +- return function(arg, ¶m); \ ++#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ err = copy_from_user(¶m, uarg, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ ret = function(arg, ¶m); \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ + } while (0) + +-#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ +- do { \ +- type param; \ +- int ret, err; \ +- BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ +- BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ +- memset(¶m, 0, sizeof(param)); \ +- ret = function(arg, ¶m); \ +- err = copy_to_user(uarg, ¶m, sizeof(param)); \ +- if (err) \ +- return -EFAULT; \ +- return ret; \ ++#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ memset(¶m, 0, sizeof(param)); \ ++ ret = function(arg, ¶m); \ ++ err = copy_to_user(uarg, ¶m, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ + } while (0) + +-#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ +- do { \ +- type param; \ +- int ret, err; \ +- BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ +- BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ +- err = copy_from_user(¶m, uarg, sizeof(param)); \ +- if (err) \ +- return -EFAULT; \ +- ret = function(arg, ¶m); \ +- err = copy_to_user(uarg, ¶m, sizeof(param)); \ +- if (err) \ +- return -EFAULT; \ +- return ret; \ ++#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ ++ do { \ ++ type param; \ ++ int ret, err; \ ++ dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ ++ BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ ++ BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ ++ err = copy_from_user(¶m, uarg, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ ret = function(arg, ¶m); \ ++ err = copy_to_user(uarg, ¶m, sizeof(param)); \ ++ if (err) \ ++ return -EFAULT; \ ++ dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ ++ #function); \ ++ return ret; \ + } while (0) + ++static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, ++ struct kbase_ioctl_set_limited_core_count *set_limited_core_count) ++{ ++ const u64 shader_core_mask = ++ kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); ++ const u64 limited_core_mask = ++ ((u64)1 << (set_limited_core_count->max_core_count)) - 1; ++ ++ if ((shader_core_mask & limited_core_mask) == 0) { ++ /* At least one shader core must be available after applying the mask */ ++ return -EINVAL; ++ } ++ ++ kctx->limited_core_mask = limited_core_mask; ++ return 0; ++} ++ + static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + { + struct kbase_file *const kfile = filp->private_data; +@@ -1352,6 +1629,13 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + kfile); + break; + ++ case KBASE_IOCTL_VERSION_CHECK_RESERVED: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK_RESERVED, ++ kbase_api_handshake_dummy, ++ struct kbase_ioctl_version_check, ++ kfile); ++ break; ++ + case KBASE_IOCTL_SET_FLAGS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, + kbase_api_set_flags, +@@ -1366,23 +1650,27 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + + /* Normal ioctls */ + switch (cmd) { ++#if !MALI_USE_CSF + case KBASE_IOCTL_JOB_SUBMIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, + kbase_api_job_submit, + struct kbase_ioctl_job_submit, + kctx); + break; ++#endif /* !MALI_USE_CSF */ + case KBASE_IOCTL_GET_GPUPROPS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, + kbase_api_get_gpuprops, + struct kbase_ioctl_get_gpuprops, + kctx); + break; ++#if !MALI_USE_CSF + case KBASE_IOCTL_POST_TERM: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, + kbase_api_post_term, + kctx); + break; ++#endif /* !MALI_USE_CSF */ + case KBASE_IOCTL_MEM_ALLOC: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, + kbase_api_mem_alloc, +@@ -1515,12 +1803,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + kctx); + break; + ++#if !MALI_USE_CSF + case KBASE_IOCTL_SOFT_EVENT_UPDATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, + kbase_api_soft_event_update, + struct kbase_ioctl_soft_event_update, + kctx); + break; ++#endif /* !MALI_USE_CSF */ + + case KBASE_IOCTL_STICKY_RESOURCE_MAP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, +@@ -1536,6 +1826,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + break; + + /* Instrumentation. */ ++#if !MALI_USE_CSF ++ case KBASE_IOCTL_KINSTR_JM_FD: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, ++ kbase_api_kinstr_jm_fd, ++ union kbase_kinstr_jm_fd, ++ kctx); ++ break; ++#endif + case KBASE_IOCTL_HWCNT_READER_SETUP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, + kbase_api_hwcnt_reader_setup, +@@ -1564,14 +1862,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + union kbase_ioctl_get_cpu_gpu_timeinfo, + kctx); + break; +-#ifdef CONFIG_MALI_NO_MALI +- case KBASE_IOCTL_HWCNT_SET: +- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, +- kbase_api_hwcnt_set, +- struct kbase_ioctl_hwcnt_values, +- kctx); +- break; +-#endif + #ifdef CONFIG_MALI_CINSTR_GWT + case KBASE_IOCTL_CINSTR_GWT_START: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, +@@ -1590,13 +1880,98 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + kctx); + break; + #endif +-#if MALI_UNIT_TEST +- case KBASE_IOCTL_TLSTREAM_TEST: +- KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, +- kbase_api_tlstream_test, +- struct kbase_ioctl_tlstream_test, ++#if MALI_USE_CSF ++ case KBASE_IOCTL_CS_EVENT_SIGNAL: ++ KBASE_HANDLE_IOCTL(KBASE_IOCTL_CS_EVENT_SIGNAL, ++ kbasep_cs_event_signal, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_REGISTER: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER, ++ kbasep_cs_queue_register, ++ struct kbase_ioctl_cs_queue_register, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_REGISTER_EX: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER_EX, ++ kbasep_cs_queue_register_ex, ++ struct kbase_ioctl_cs_queue_register_ex, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_TERMINATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_TERMINATE, ++ kbasep_cs_queue_terminate, ++ struct kbase_ioctl_cs_queue_terminate, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_BIND: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_BIND, ++ kbasep_cs_queue_bind, ++ union kbase_ioctl_cs_queue_bind, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_KICK: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_KICK, ++ kbasep_cs_queue_kick, ++ struct kbase_ioctl_cs_queue_kick, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, ++ kbasep_cs_queue_group_create, ++ union kbase_ioctl_cs_queue_group_create, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE, ++ kbasep_cs_queue_group_terminate, ++ struct kbase_ioctl_cs_queue_group_term, ++ kctx); ++ break; ++ case KBASE_IOCTL_KCPU_QUEUE_CREATE: ++ KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_KCPU_QUEUE_CREATE, ++ kbasep_kcpu_queue_new, ++ struct kbase_ioctl_kcpu_queue_new, ++ kctx); ++ break; ++ case KBASE_IOCTL_KCPU_QUEUE_DELETE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_DELETE, ++ kbasep_kcpu_queue_delete, ++ struct kbase_ioctl_kcpu_queue_delete, ++ kctx); ++ break; ++ case KBASE_IOCTL_KCPU_QUEUE_ENQUEUE: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, ++ kbasep_kcpu_queue_enqueue, ++ struct kbase_ioctl_kcpu_queue_enqueue, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_TILER_HEAP_INIT: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, ++ kbasep_cs_tiler_heap_init, ++ union kbase_ioctl_cs_tiler_heap_init, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_TILER_HEAP_TERM: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, ++ kbasep_cs_tiler_heap_term, ++ struct kbase_ioctl_cs_tiler_heap_term, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_GET_GLB_IFACE: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_GET_GLB_IFACE, ++ kbase_ioctl_cs_get_glb_iface, ++ union kbase_ioctl_cs_get_glb_iface, ++ kctx); ++ break; ++ case KBASE_IOCTL_CS_CPU_QUEUE_DUMP: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP, ++ kbasep_ioctl_cs_cpu_queue_dump, ++ struct kbase_ioctl_cs_cpu_queue_info, + kctx); + break; ++#endif /* MALI_USE_CSF */ ++#if MALI_UNIT_TEST + case KBASE_IOCTL_TLSTREAM_STATS: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, + kbase_api_tlstream_stats, +@@ -1604,6 +1979,18 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + kctx); + break; + #endif /* MALI_UNIT_TEST */ ++ case KBASE_IOCTL_CONTEXT_PRIORITY_CHECK: ++ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CONTEXT_PRIORITY_CHECK, ++ kbasep_ioctl_context_priority_check, ++ struct kbase_ioctl_context_priority_check, ++ kctx); ++ break; ++ case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: ++ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, ++ kbasep_ioctl_set_limited_core_count, ++ struct kbase_ioctl_set_limited_core_count, ++ kctx); ++ break; + } + + dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); +@@ -1611,6 +1998,51 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + return -ENOIOCTLCMD; + } + ++#if MALI_USE_CSF ++static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) ++{ ++ struct kbase_file *const kfile = filp->private_data; ++ struct kbase_context *const kctx = ++ kbase_file_get_kctx_if_setup_complete(kfile); ++ struct base_csf_notification event_data = { ++ .type = BASE_CSF_NOTIFICATION_EVENT }; ++ const size_t data_size = sizeof(event_data); ++ bool read_event = false, read_error = false; ++ ++ if (unlikely(!kctx)) ++ return -EPERM; ++ ++ if (atomic_read(&kctx->event_count)) ++ read_event = true; ++ else ++ read_error = kbase_csf_read_error(kctx, &event_data); ++ ++ if (!read_event && !read_error) { ++ bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, ++ &event_data); ++ /* This condition is not treated as an error. ++ * It is possible that event handling thread was woken up due ++ * to a fault/error that occurred for a queue group, but before ++ * the corresponding fault data was read by the thread the ++ * queue group was already terminated by the userspace. ++ */ ++ if (!dump) ++ dev_dbg(kctx->kbdev->dev, ++ "Neither event nor error signaled"); ++ } ++ ++ if (copy_to_user(buf, &event_data, data_size) != 0) { ++ dev_warn(kctx->kbdev->dev, ++ "Failed to copy data\n"); ++ return -EFAULT; ++ } ++ ++ if (read_event) ++ atomic_set(&kctx->event_count, 0); ++ ++ return data_size; ++} ++#else /* MALI_USE_CSF */ + static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) + { + struct kbase_file *const kfile = filp->private_data; +@@ -1654,6 +2086,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof + out: + return out_count * sizeof(uevent); + } ++#endif /* MALI_USE_CSF */ + + static unsigned int kbase_poll(struct file *filp, poll_table *wait) + { +@@ -1674,19 +2107,31 @@ static unsigned int kbase_poll(struct file *filp, poll_table *wait) + void kbase_event_wakeup(struct kbase_context *kctx) + { + KBASE_DEBUG_ASSERT(kctx); +- ++ dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", ++ (void *)kctx); + wake_up_interruptible(&kctx->event_queue); + } + + KBASE_EXPORT_TEST_API(kbase_event_wakeup); + ++#if MALI_USE_CSF ++int kbase_event_pending(struct kbase_context *ctx) ++{ ++ WARN_ON_ONCE(!ctx); ++ ++ return (atomic_read(&ctx->event_count) != 0) || ++ kbase_csf_error_pending(ctx) || ++ kbase_csf_cpu_queue_dump_needed(ctx); ++} ++#else + int kbase_event_pending(struct kbase_context *ctx) + { + KBASE_DEBUG_ASSERT(ctx); + + return (atomic_read(&ctx->event_count) != 0) || +- (atomic_read(&ctx->event_closed) != 0); ++ (atomic_read(&ctx->event_closed) != 0); + } ++#endif + + KBASE_EXPORT_TEST_API(kbase_event_pending); + +@@ -1740,42 +2185,166 @@ static const struct file_operations kbase_fops = { + .get_unmapped_area = kbase_get_unmapped_area, + }; + +-/** +- * show_policy - Show callback for the power_policy sysfs file. +- * +- * This function is called to get the contents of the power_policy sysfs +- * file. This is a list of the available policies with the currently active one +- * surrounded by square brackets. +- * +- * @dev: The device this sysfs file is for +- * @attr: The attributes of the sysfs file +- * @buf: The output buffer for the sysfs file contents +- * +- * Return: The number of bytes output to @buf. +- */ +-static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) ++static ssize_t show_gpu_memory(struct device *dev, struct device_attribute *attr, char * const buf) + { + struct kbase_device *kbdev; +- const struct kbase_pm_policy *current_policy; +- const struct kbase_pm_policy *const *policy_list; +- int policy_count; +- int i; + ssize_t ret = 0; ++ struct list_head *entry; ++ const struct list_head *kbdev_list; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + +- current_policy = kbase_pm_get_policy(kbdev); ++ kbdev_list = kbase_device_get_list(); ++ list_for_each(entry, kbdev_list) { ++ struct kbase_device *kbdev = NULL; ++ struct kbase_context *kctx; ++ ++ kbdev = list_entry(entry, struct kbase_device, entry); ++ /* output the total memory usage and cap for this device */ ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "%-16s %-16s %10u\n", ++ kbdev->devname, ++ "total used_pages", ++ atomic_read(&(kbdev->memdev.used_pages))); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "----------------------------------------------------\n"); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "%-16s %-16s %-16s\n", ++ "kctx", "pid", "used_pages"); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "----------------------------------------------------\n"); ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ /* output the memory usage and cap for each kctx ++ * opened on this device */ ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "%p %10u %10u\n", ++ kctx, ++ kctx->tgid, ++ atomic_read(&(kctx->used_pages))); ++ } ++ mutex_unlock(&kbdev->kctx_list_lock); ++ } + +- policy_count = kbase_pm_list_policies(kbdev, &policy_list); ++ kbase_device_put_list(kbdev_list); + +- for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { +- if (policy_list[i] == current_policy) +- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); +- else +- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); ++ ++ return ret; ++} ++ ++static ssize_t set_gpu_memory(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ ssize_t err = count; ++ ++ kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ return err; ++} ++ ++static DEVICE_ATTR(gpu_memory, S_IRUGO | S_IWUSR, show_gpu_memory, set_gpu_memory); ++ ++static ssize_t show_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, char * const buf) ++{ ++ struct list_head *entry; ++ const struct list_head *kbdev_list; ++ ssize_t ret = 0; ++ int i = 0; ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ kbdev_list = kbase_device_get_list(); ++ list_for_each(entry, kbdev_list) { ++ struct kbase_device *kbdev = NULL; ++ struct kbase_context *kctx; ++ ++ kbdev = list_entry(entry, struct kbase_device, entry); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "%-16s %-16s %-16s\n", ++ "kctx", "pid", "cached_pages"); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "----------------------------------------------------\n"); ++ mutex_lock(&kbdev->kctx_list_lock); ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ /* output the memory cached and cap for each kctx ++ * opened on this device */ ++ unsigned long cached_mem = 0; ++ for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) ++ //pr_info("[%d]:kctx->mem_pools.small[%d] = %d", kctx->tgid, i, kctx->mem_pools.small[i].cur_size); ++ cached_mem += kctx->mem_pools.small[i].cur_size; ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "%p %10u %10lu\n", ++ kctx, ++ kctx->tgid, ++ cached_mem); ++ } ++ mutex_unlock(&kbdev->kctx_list_lock); ++ } ++ ++ kbase_device_put_list(kbdev_list); ++ ++ return ret; ++} ++ ++static ssize_t set_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ ssize_t err = count; ++ ++ kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ return err; ++} ++ ++static DEVICE_ATTR(ctx_mem_pool_size, S_IRUGO | S_IWUSR, show_ctx_mem_pool_size, set_ctx_mem_pool_size); ++ ++/** ++ * show_policy - Show callback for the power_policy sysfs file. ++ * ++ * This function is called to get the contents of the power_policy sysfs ++ * file. This is a list of the available policies with the currently active one ++ * surrounded by square brackets. ++ * ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) ++{ ++ struct kbase_device *kbdev; ++ const struct kbase_pm_policy *current_policy; ++ const struct kbase_pm_policy *const *policy_list; ++ int policy_count; ++ int i; ++ ssize_t ret = 0; ++ ++ kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ current_policy = kbase_pm_get_policy(kbdev); ++ ++ policy_count = kbase_pm_list_policies(kbdev, &policy_list); ++ ++ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { ++ if (policy_list[i] == current_policy) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); ++ else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { +@@ -1800,7 +2369,7 @@ static ssize_t show_policy(struct device *dev, struct device_attribute *attr, ch + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file +- * @count: The number of bytes written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +@@ -1859,6 +2428,7 @@ static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); + static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) + { + struct kbase_device *kbdev; ++ unsigned long flags; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); +@@ -1866,6 +2436,19 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, + if (!kbdev) + return -ENODEV; + ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++#if MALI_USE_CSF ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current debug core mask : 0x%llX\n", ++ kbdev->pm.debug_core_mask); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current desired core mask : 0x%llX\n", ++ kbase_pm_ca_get_core_mask(kbdev)); ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, ++ "Current in use core mask : 0x%llX\n", ++ kbdev->pm.backend.shaders_avail); ++#else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS0) : 0x%llX\n", + kbdev->pm.debug_core_mask[0]); +@@ -1875,10 +2458,14 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS2) : 0x%llX\n", + kbdev->pm.debug_core_mask[2]); ++#endif /* MALI_USE_CSF */ ++ + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Available core mask : 0x%llX\n", + kbdev->gpu_props.props.raw_props.shader_present); + ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ + return ret; + } + +@@ -1890,24 +2477,42 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file +- * @count: The number of bytes written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ + static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) + { + struct kbase_device *kbdev; ++#if MALI_USE_CSF ++ u64 new_core_mask; ++#else + u64 new_core_mask[3]; +- int items, i; ++ u64 group0_core_mask; ++ int i; ++#endif /* MALI_USE_CSF */ ++ ++ int items; + ssize_t err = count; + unsigned long flags; +- u64 shader_present, group0_core_mask; ++ u64 shader_present; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + ++#if MALI_USE_CSF ++ items = sscanf(buf, "%llx", &new_core_mask); ++ ++ if (items != 1) { ++ dev_err(kbdev->dev, ++ "Couldn't process core mask write operation.\n" ++ "Use format \n"); ++ err = -EINVAL; ++ goto end; ++ } ++#else + items = sscanf(buf, "%llx %llx %llx", + &new_core_mask[0], &new_core_mask[1], + &new_core_mask[2]); +@@ -1922,11 +2527,35 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, + + if (items == 1) + new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; ++#endif + + mutex_lock(&kbdev->pm.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + shader_present = kbdev->gpu_props.props.raw_props.shader_present; ++ ++#if MALI_USE_CSF ++ if ((new_core_mask & shader_present) != new_core_mask) { ++ dev_err(dev, ++ "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", ++ new_core_mask, shader_present); ++ err = -EINVAL; ++ goto unlock; ++ ++ } else if (!(new_core_mask & shader_present & ++ kbdev->pm.backend.ca_cores_enabled)) { ++ dev_err(dev, ++ "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", ++ new_core_mask, ++ kbdev->gpu_props.props.raw_props.shader_present, ++ kbdev->pm.backend.ca_cores_enabled); ++ err = -EINVAL; ++ goto unlock; ++ } ++ ++ if (kbdev->pm.debug_core_mask != new_core_mask) ++ kbase_pm_set_debug_core_mask(kbdev, new_core_mask); ++#else + group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + for (i = 0; i < 3; ++i) { +@@ -1949,6 +2578,11 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, + new_core_mask[i], i, group0_core_mask); + err = -EINVAL; + goto unlock; ++ } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { ++ dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", ++ new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); ++ err = -EINVAL; ++ goto unlock; + } + } + +@@ -1961,6 +2595,7 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); + } ++#endif /* MALI_USE_CSF */ + + unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +@@ -1978,6 +2613,7 @@ end: + */ + static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + ++#if !MALI_USE_CSF + /** + * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs + * file. +@@ -1985,7 +2621,7 @@ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. +- * @count: The number of bytes written to the sysfs file. ++ * @count: The number of bytes to write to the sysfs file. + * + * This allows setting the timeout for software jobs. Waiting soft event wait + * jobs will be cancelled after this period expires, while soft fence wait jobs +@@ -2078,7 +2714,7 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file +- * @count: The number of bytes written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +@@ -2255,7 +2891,7 @@ static u32 get_new_js_timeout( + * @dev: The device the sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file +- * @count: The number of bytes written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file + * + * This function is called when the js_scheduling_period sysfs file is written + * to. It checks the data written, and if valid updates the js_scheduling_period +@@ -2295,7 +2931,8 @@ static ssize_t set_js_scheduling_period(struct device *dev, + + /* If no contexts have been scheduled since js_timeouts was last written + * to, the new timeouts might not have been latched yet. So check if an +- * update is pending and use the new values if necessary. */ ++ * update is pending and use the new values if necessary. ++ */ + + /* Use previous 'new' scheduling period as a base if present. */ + old_period = js_data->scheduling_period_ns; +@@ -2420,9 +3057,10 @@ static ssize_t show_js_softstop_always(struct device *dev, + */ + static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); + #endif /* CONFIG_MALI_DEBUG */ ++#endif /* !MALI_USE_CSF */ + + #ifdef CONFIG_MALI_DEBUG +-typedef void (kbasep_debug_command_func) (struct kbase_device *); ++typedef void kbasep_debug_command_func(struct kbase_device *); + + enum kbasep_debug_command_code { + KBASEP_DEBUG_COMMAND_DUMPTRACE, +@@ -2568,24 +3206,20 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, + .name = "Mali-G77" }, + { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G78" }, ++ { .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G78AE" }, + { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G68" }, + { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G57" }, + { .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-TODX" }, ++ .name = "Mali-G710" }, ++ { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, ++ .name = "Mali-G610" }, + { .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-TGRX" }, ++ .name = "Mali-G510" }, + { .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-TVAX" }, +- { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-LODX" }, +- { .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-TTUX" }, +- { .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-LTUX" }, +- { .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, +- .name = "Mali-TE2X" }, ++ .name = "Mali-G310" }, + }; + const char *product_name = "(Unknown Mali GPU)"; + struct kbase_device *kbdev; +@@ -2728,7 +3362,8 @@ static ssize_t set_pm_poweroff(struct device *dev, + + stt = &kbdev->pm.backend.shader_tick_timer; + stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); +- stt->configured_ticks = poweroff_shader_ticks; ++ stt->default_ticks = poweroff_shader_ticks; ++ stt->configured_ticks = stt->default_ticks; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +@@ -2766,7 +3401,7 @@ static ssize_t show_pm_poweroff(struct device *dev, + stt = &kbdev->pm.backend.shader_tick_timer; + ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", + ktime_to_ns(stt->configured_interval), +- stt->configured_ticks); ++ stt->default_ticks); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +@@ -2776,6 +3411,75 @@ static ssize_t show_pm_poweroff(struct device *dev, + static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, + set_pm_poweroff); + ++#if MALI_USE_CSF ++/** ++ * set_idle_hysteresis_time - Store callback for CSF idle_hysteresis_time ++ * sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the idle_hysteresis_time sysfs file is ++ * written to. ++ * ++ * This file contains values of the idle idle hysteresis duration. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_idle_hysteresis_time(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ u32 dur; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ if (kstrtou32(buf, 0, &dur)) { ++ dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } ++ ++ kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); ++ ++ return count; ++} ++ ++/** ++ * show_idle_hysteresis_time - Show callback for CSF idle_hysteresis_time ++ * sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current idle hysteresis duration in ms. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_idle_hysteresis_time(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ u32 dur; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev); ++ ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); ++ ++ return ret; ++} ++ ++static DEVICE_ATTR(idle_hysteresis_time, S_IRUGO | S_IWUSR, ++ show_idle_hysteresis_time, set_idle_hysteresis_time); ++#endif ++ + /** + * set_reset_timeout - Store callback for the reset_timeout sysfs file. + * @dev: The device with sysfs file is for +@@ -3013,122 +3717,359 @@ static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max + set_lp_mem_pool_max_size); + + /** +- * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs +- * entry. ++ * show_simplified_mem_pool_max_size - Show the maximum size for the memory ++ * pool 0 of small (4KiB) pages. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. +- * @buf: The output buffer to receive the context scheduling mode information. ++ * @buf: The output buffer to receive the max size. + * +- * This function is called to get the context scheduling mode being used by JS. ++ * This function is called to get the maximum size for the memory pool 0 of ++ * small (4KiB) pages. It is assumed that the maximum size value is same for ++ * all the pools. + * + * Return: The number of bytes output to @buf. + */ +-static ssize_t show_js_ctx_scheduling_mode(struct device *dev, ++static ssize_t show_simplified_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) + { +- struct kbase_device *kbdev; ++ struct kbase_device *const kbdev = to_kbase_device(dev); + +- kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + +- return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size); + } + + /** +- * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs +- * entry. +- * @dev: The device this sysfs file is for. +- * @attr: The attributes of the sysfs file. +- * @buf: The value written to the sysfs file. +- * @count: The number of bytes written to the sysfs file. ++ * set_simplified_mem_pool_max_size - Set the same maximum size for all the ++ * memory pools of small (4KiB) pages. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file + * +- * This function is called when the js_ctx_scheduling_mode sysfs file is written +- * to. It checks the data written, and if valid updates the ctx scheduling mode +- * being by JS. ++ * This function is called to set the same maximum size for all the memory ++ * pools of small (4KiB) pages. + * +- * Return: @count if the function succeeded. An error code on failure. ++ * Return: The number of bytes output to @buf. + */ +-static ssize_t set_js_ctx_scheduling_mode(struct device *dev, ++static ssize_t set_simplified_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) + { +- struct kbase_context *kctx; +- u32 new_js_ctx_scheduling_mode; +- struct kbase_device *kbdev; +- unsigned long flags; +- int ret; ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ unsigned long new_size; ++ int gid; ++ int err; + +- kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + +- ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); +- if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { +- dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" +- " write operation.\n" +- "Use format \n"); ++ err = kstrtoul(buf, 0, &new_size); ++ if (err) + return -EINVAL; +- } +- +- if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) +- return count; +- +- mutex_lock(&kbdev->kctx_list_lock); +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- +- /* Update the context priority mode */ +- kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; +- +- /* Adjust priority of all the contexts as per the new mode */ +- list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) +- kbase_js_update_ctx_priority(kctx); +- +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +- mutex_unlock(&kbdev->kctx_list_lock); + +- dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) ++ kbase_mem_pool_debugfs_set_max_size( ++ kbdev->mem_pools.small, gid, (size_t)new_size); + + return count; + } + +-static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, +- show_js_ctx_scheduling_mode, +- set_js_ctx_scheduling_mode); +- +-#ifdef MALI_KBASE_BUILD +-#ifdef CONFIG_DEBUG_FS +- +-/* Number of entries in serialize_jobs_settings[] */ +-#define NR_SERIALIZE_JOBS_SETTINGS 5 +-/* Maximum string length in serialize_jobs_settings[].name */ +-#define MAX_SERIALIZE_JOBS_NAME_LEN 16 +- +-static struct +-{ +- char *name; +- u8 setting; +-} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { +- {"none", 0}, +- {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, +- {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, +- {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, +- {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | +- KBASE_SERIALIZE_RESET} +-}; ++static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size, ++ set_simplified_mem_pool_max_size); + + /** +- * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs +- * file +- * @sfile: seq_file pointer +- * @data: Private callback data ++ * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory ++ * pool 0 of large (2MiB) pages. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the total current pool size. + * +- * This function is called to get the contents of the serialize_jobs debugfs +- * file. This is a list of the available settings with the currently active one +- * surrounded by square brackets. ++ * This function is called to get the maximum size for the memory pool 0 of ++ * large (2MiB) pages. It is assumed that the maximum size value is same for ++ * all the pools. + * +- * Return: 0 on success, or an error code on error ++ * Return: The number of bytes output to @buf. + */ +-static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) ++static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, ++ kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size); ++} ++ ++/** ++ * set_simplified_lp_mem_pool_max_size - Set the same maximum size for all the ++ * memory pools of large (2MiB) pages. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called to set the same maximum size for all the memory ++ * pools of large (2MiB) pages. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *const kbdev = to_kbase_device(dev); ++ unsigned long new_size; ++ int gid; ++ int err; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ err = kstrtoul(buf, 0, &new_size); ++ if (err) ++ return -EINVAL; ++ ++ for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) ++ kbase_mem_pool_debugfs_set_max_size( ++ kbdev->mem_pools.large, gid, (size_t)new_size); ++ ++ return count; ++} ++ ++static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size, ++ set_simplified_lp_mem_pool_max_size); ++ ++/** ++ * show_simplified_ctx_default_max_size - Show the default maximum size for the ++ * memory pool 0 of small (4KiB) pages. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the pool size. ++ * ++ * This function is called to get the default ctx maximum size for the memory ++ * pool 0 of small (4KiB) pages. It is assumed that maximum size value is same ++ * for all the pools. The maximum size for the pool of large (2MiB) pages will ++ * be same as max size of the pool of small (4KiB) pages in terms of bytes. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_simplified_ctx_default_max_size(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ size_t max_size; ++ ++ if (!kbdev) ++ return -ENODEV; ++ ++ max_size = kbase_mem_pool_config_debugfs_max_size( ++ kbdev->mem_pool_defaults.small, 0); ++ ++ return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size); ++} ++ ++/** ++ * set_simplified_ctx_default_max_size - Set the same default maximum size for ++ * all the pools created for new ++ * contexts. This covers the pool of ++ * large pages as well and its max size ++ * will be same as max size of the pool ++ * of small pages in terms of bytes. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called to set the same maximum size for all pools created ++ * for new contexts. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_simplified_ctx_default_max_size(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_device *kbdev; ++ unsigned long new_size; ++ int err; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ err = kstrtoul(buf, 0, &new_size); ++ if (err) ++ return -EINVAL; ++ ++ kbase_mem_pool_group_config_set_max_size( ++ &kbdev->mem_pool_defaults, (size_t)new_size); ++ ++ return count; ++} ++ ++static DEVICE_ATTR(ctx_default_max_size, 0600, ++ show_simplified_ctx_default_max_size, ++ set_simplified_ctx_default_max_size); ++ ++#if !MALI_USE_CSF ++/** ++ * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs ++ * entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the context scheduling mode information. ++ * ++ * This function is called to get the context scheduling mode being used by JS. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_js_ctx_scheduling_mode(struct device *dev, ++ struct device_attribute *attr, char * const buf) ++{ ++ struct kbase_device *kbdev; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); ++} ++ ++/** ++ * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs ++ * entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The value written to the sysfs file. ++ * @count: The number of bytes written to the sysfs file. ++ * ++ * This function is called when the js_ctx_scheduling_mode sysfs file is written ++ * to. It checks the data written, and if valid updates the ctx scheduling mode ++ * being by JS. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t set_js_ctx_scheduling_mode(struct device *dev, ++ struct device_attribute *attr, const char *buf, size_t count) ++{ ++ struct kbase_context *kctx; ++ u32 new_js_ctx_scheduling_mode; ++ struct kbase_device *kbdev; ++ unsigned long flags; ++ int ret; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); ++ if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { ++ dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" ++ " write operation.\n" ++ "Use format \n"); ++ return -EINVAL; ++ } ++ ++ if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) ++ return count; ++ ++ mutex_lock(&kbdev->kctx_list_lock); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ /* Update the context priority mode */ ++ kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; ++ ++ /* Adjust priority of all the contexts as per the new mode */ ++ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) ++ kbase_js_update_ctx_priority(kctx); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->kctx_list_lock); ++ ++ dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); ++ ++ return count; ++} ++ ++static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, ++ show_js_ctx_scheduling_mode, ++ set_js_ctx_scheduling_mode); ++ ++#ifdef MALI_KBASE_BUILD ++ ++/* Number of entries in serialize_jobs_settings[] */ ++#define NR_SERIALIZE_JOBS_SETTINGS 5 ++/* Maximum string length in serialize_jobs_settings[].name */ ++#define MAX_SERIALIZE_JOBS_NAME_LEN 16 ++ ++static struct ++{ ++ char *name; ++ u8 setting; ++} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { ++ {"none", 0}, ++ {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, ++ {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, ++ {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, ++ {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | ++ KBASE_SERIALIZE_RESET} ++}; ++ ++/** ++ * update_serialize_jobs_setting - Update the serialization setting for the ++ * submission of GPU jobs. ++ * ++ * This function is called when the serialize_jobs sysfs/debugfs file is ++ * written to. It matches the requested setting against the available settings ++ * and if a matching setting is found updates kbdev->serialize_jobs. ++ * ++ * @kbdev: An instance of the GPU platform device, allocated from the probe ++ * method of the driver. ++ * @buf: Buffer containing the value written to the sysfs/debugfs file. ++ * @count: The number of bytes to write to the sysfs/debugfs file. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, ++ const char *buf, size_t count) ++{ ++ int i; ++ bool valid = false; ++ ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { ++ kbdev->serialize_jobs = ++ serialize_jobs_settings[i].setting; ++ valid = true; ++ break; ++ } ++ } ++ ++ if (!valid) { ++ dev_err(kbdev->dev, "serialize_jobs: invalid setting"); ++ return -EINVAL; ++ } ++ ++ return count; ++} ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs ++ * debugfs file ++ * @sfile: seq_file pointer ++ * @data: Private callback data ++ * ++ * This function is called to get the contents of the serialize_jobs debugfs ++ * file. This is a list of the available settings with the currently active one ++ * surrounded by square brackets. ++ * ++ * Return: 0 on success, or an error code on error ++ */ ++static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, ++ void *data) + { + struct kbase_device *kbdev = sfile->private; + int i; +@@ -3169,8 +4110,6 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, + struct seq_file *s = file->private_data; + struct kbase_device *kbdev = s->private; + char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; +- int i; +- bool valid = false; + + CSTD_UNUSED(ppos); + +@@ -3180,21 +4119,7 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, + + buf[count] = 0; + +- for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { +- if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { +- kbdev->serialize_jobs = +- serialize_jobs_settings[i].setting; +- valid = true; +- break; +- } +- } +- +- if (!valid) { +- dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); +- return -EINVAL; +- } +- +- return count; ++ return update_serialize_jobs_setting(kbdev, buf, count); + } + + /** +@@ -3208,7 +4133,8 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, + static int kbasep_serialize_jobs_debugfs_open(struct inode *in, + struct file *file) + { +- return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); ++ return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, ++ in->i_private); + } + + static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { +@@ -3221,27 +4147,101 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { + }; + + #endif /* CONFIG_DEBUG_FS */ ++ ++/** ++ * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file. ++ * ++ * This function is called to get the contents of the serialize_jobs sysfs ++ * file. This is a list of the available settings with the currently active ++ * one surrounded by square brackets. ++ * ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The output buffer for the sysfs file contents ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t show_serialize_jobs_sysfs(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct kbase_device *kbdev = to_kbase_device(dev); ++ ssize_t ret = 0; ++ int i; ++ ++ for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { ++ if (kbdev->serialize_jobs == ++ serialize_jobs_settings[i].setting) ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", ++ serialize_jobs_settings[i].name); ++ else ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", ++ serialize_jobs_settings[i].name); ++ } ++ ++ if (ret < PAGE_SIZE - 1) { ++ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); ++ } else { ++ buf[PAGE_SIZE - 2] = '\n'; ++ buf[PAGE_SIZE - 1] = '\0'; ++ ret = PAGE_SIZE - 1; ++ } ++ ++ return ret; ++} ++ ++/** ++ * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file. ++ * ++ * This function is called when the serialize_jobs sysfs file is written to. ++ * It matches the requested setting against the available settings and if a ++ * matching setting is found updates kbdev->serialize_jobs. ++ * ++ * @dev: The device this sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes to write to the sysfs file ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t store_serialize_jobs_sysfs(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); ++} ++ ++static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, ++ store_serialize_jobs_sysfs); + #endif /* MALI_KBASE_BUILD */ ++#endif /* !MALI_USE_CSF */ + + static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) + { + struct kbase_device *kbdev = container_of(data, struct kbase_device, + protected_mode_hwcnt_disable_work); ++ spinlock_t *backend_lock; + unsigned long flags; + + bool do_disable; + +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++#if MALI_USE_CSF ++ backend_lock = &kbdev->csf.scheduler.interrupt_lock; ++#else ++ backend_lock = &kbdev->hwaccess_lock; ++#endif ++ ++ spin_lock_irqsave(backend_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_unlock_irqrestore(backend_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ spin_lock_irqsave(backend_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + +@@ -3251,7 +4251,9 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) + * the state machine. + */ + kbdev->protected_mode_hwcnt_disabled = true; ++#if !MALI_USE_CSF + kbase_backend_slot_update(kbdev); ++#endif /* !MALI_USE_CSF */ + } else { + /* Protected mode state was updated while we were doing the + * disable, so we need to undo the disable we just performed. +@@ -3259,9 +4261,10 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ spin_unlock_irqrestore(backend_lock, flags); + } + ++#ifndef PLATFORM_PROTECTED_CALLBACKS + static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) + { + struct kbase_device *kbdev = pdev->data; +@@ -3281,6 +4284,9 @@ static const struct protected_mode_ops kbasep_native_protected_ops = { + .protected_mode_disable = kbasep_protected_mode_disable + }; + ++#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops) ++#endif /* PLATFORM_PROTECTED_CALLBACKS */ ++ + int kbase_protected_mode_init(struct kbase_device *kbdev) + { + /* Use native protected ops */ +@@ -3289,7 +4295,7 @@ int kbase_protected_mode_init(struct kbase_device *kbdev) + if (!kbdev->protected_dev) + return -ENOMEM; + kbdev->protected_dev->data = kbdev; +- kbdev->protected_ops = &kbasep_native_protected_ops; ++ kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS; + INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, + kbasep_protected_mode_hwcnt_disable_worker); + kbdev->protected_mode_hwcnt_desired = true; +@@ -3303,15 +4309,6 @@ void kbase_protected_mode_term(struct kbase_device *kbdev) + kfree(kbdev->protected_dev); + } + +-#ifdef CONFIG_MALI_NO_MALI +-static int kbase_common_reg_map(struct kbase_device *kbdev) +-{ +- return 0; +-} +-static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +-{ +-} +-#else /* CONFIG_MALI_NO_MALI */ + static int kbase_common_reg_map(struct kbase_device *kbdev) + { + int err = 0; +@@ -3347,7 +4344,6 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) + kbdev->reg_size = 0; + } + } +-#endif /* CONFIG_MALI_NO_MALI */ + + int registers_map(struct kbase_device * const kbdev) + { +@@ -3367,6 +4363,15 @@ int registers_map(struct kbase_device * const kbdev) + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + ++#if MALI_USE_CSF ++ if (kbdev->reg_size < ++ (CSF_HW_DOORBELL_PAGE_OFFSET + ++ CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { ++ dev_err(kbdev->dev, "Insufficient register space, will override to the required size\n"); ++ kbdev->reg_size = CSF_HW_DOORBELL_PAGE_OFFSET + ++ CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE; ++ } ++#endif + + err = kbase_common_reg_map(kbdev); + if (err) { +@@ -3448,6 +4453,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev) + u32 gpu_model_id; + + if (kbase_is_pv_enabled(kbdev->dev->of_node)) { ++ dev_info(kbdev->dev, "Arbitration interface enabled\n"); + if (kbase_is_pm_enabled(kbdev->dev->of_node)) { + /* Arbitration AND power management invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); +@@ -3471,13 +4477,16 @@ int kbase_device_pm_init(struct kbase_device *kbdev) + gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); + + if (gpu_model_id != GPU_ID2_PRODUCT_TGOX +- && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { ++ && gpu_model_id != GPU_ID2_PRODUCT_TNOX ++ && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { + kbase_arbiter_pm_early_term(kbdev); + dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); + return -EPERM; + } + } + } else { ++ kbdev->arb.arb_if = NULL; ++ kbdev->arb.arb_dev = NULL; + err = power_control_init(kbdev); + } + #else +@@ -3489,7 +4498,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev) + void kbase_device_pm_term(struct kbase_device *kbdev) + { + #ifdef CONFIG_MALI_ARBITER_SUPPORT +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + if (kbase_is_pv_enabled(kbdev->dev->of_node)) + kbase_arbiter_pm_early_term(kbdev); + else +@@ -3502,7 +4511,7 @@ void kbase_device_pm_term(struct kbase_device *kbdev) + + int power_control_init(struct kbase_device *kbdev) + { +-#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) ++#ifndef CONFIG_OF + /* Power control initialization requires at least the capability to get + * regulators and clocks from the device tree, as well as parsing + * arrays of unsigned integer values. +@@ -3597,12 +4606,6 @@ int power_control_init(struct kbase_device *kbdev) + * on the device tree of the platform shouldn't prevent the driver + * from completing its initialization. + */ +-#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ +- !defined(LSK_OPPV2_BACKPORT)) +- err = of_init_opp_table(kbdev->dev); +- CSTD_UNUSED(err); +-#else +- + #if defined(CONFIG_PM_OPP) + #if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ + defined(CONFIG_REGULATOR)) +@@ -3614,8 +4617,6 @@ int power_control_init(struct kbase_device *kbdev) + err = dev_pm_opp_of_add_table(kbdev->dev); + CSTD_UNUSED(err); + #endif /* CONFIG_PM_OPP */ +- +-#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ + return 0; + + clocks_probe_defer: +@@ -3624,20 +4625,13 @@ clocks_probe_defer: + regulator_put(kbdev->regulators[i]); + #endif + return err; +-#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ ++#endif /* CONFIG_OF */ + } + + void power_control_term(struct kbase_device *kbdev) + { + unsigned int i; + +-#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ +- !defined(LSK_OPPV2_BACKPORT)) +-#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE +- of_free_opp_table(kbdev->dev); +-#endif +-#else +- + #if defined(CONFIG_PM_OPP) + dev_pm_opp_of_remove_table(kbdev->dev); + #if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ +@@ -3647,8 +4641,6 @@ void power_control_term(struct kbase_device *kbdev) + #endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ + #endif /* CONFIG_PM_OPP */ + +-#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ +- + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->clocks[i]) { + if (__clk_is_enabled(kbdev->clocks[i])) +@@ -3659,24 +4651,23 @@ void power_control_term(struct kbase_device *kbdev) + break; + } + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ +- && defined(CONFIG_REGULATOR) ++#if defined(CONFIG_OF) && defined(CONFIG_REGULATOR) + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->regulators[i]) { + regulator_put(kbdev->regulators[i]); + kbdev->regulators[i] = NULL; + } + } +-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ ++#endif + } + + #ifdef MALI_KBASE_BUILD +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + static void trigger_reset(struct kbase_device *kbdev) + { + kbase_pm_context_active(kbdev); +- if (kbase_prepare_to_reset_gpu(kbdev)) ++ if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + kbase_pm_context_idle(kbdev); + } +@@ -3704,7 +4695,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ + MAKE_QUIRK_ACCESSORS(sc); + MAKE_QUIRK_ACCESSORS(tiler); + MAKE_QUIRK_ACCESSORS(mmu); +-MAKE_QUIRK_ACCESSORS(jm); ++MAKE_QUIRK_ACCESSORS(gpu); + + static ssize_t kbase_device_debugfs_reset_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +@@ -3825,7 +4816,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) + kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, + NULL); + if (!kbdev->mali_debugfs_directory) { +- dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); ++ dev_err(kbdev->dev, ++ "Couldn't create mali debugfs directory: %s\n", ++ kbdev->devname); + err = -ENOMEM; + goto out; + } +@@ -3838,6 +4831,14 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) + goto out; + } + ++ kbdev->debugfs_instr_directory = debugfs_create_dir("instrumentation", ++ kbdev->mali_debugfs_directory); ++ if (!kbdev->debugfs_instr_directory) { ++ dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n"); ++ err = -ENOMEM; ++ goto out; ++ } ++ + debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", + kbdev->debugfs_ctx_directory); + if (!debugfs_ctx_defaults_directory) { +@@ -3846,20 +4847,20 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) + goto out; + } + +-#if !MALI_CUSTOMER_RELEASE +- kbasep_regs_dump_debugfs_init(kbdev); +-#endif /* !MALI_CUSTOMER_RELEASE */ + kbasep_regs_history_debugfs_init(kbdev); + ++#if !MALI_USE_CSF + kbase_debug_job_fault_debugfs_init(kbdev); ++#endif /* !MALI_USE_CSF */ + + kbasep_gpu_memory_debugfs_init(kbdev); + kbase_as_fault_debugfs_init(kbdev); +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + kbase_instr_backend_debugfs_init(kbdev); + #endif + /* fops_* variables created by invocations of macro +- * MAKE_QUIRK_ACCESSORS() above. */ ++ * MAKE_QUIRK_ACCESSORS() above. ++ */ + debugfs_create_file("quirks_sc", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_sc_quirks); +@@ -3869,9 +4870,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) + debugfs_create_file("quirks_mmu", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_mmu_quirks); +- debugfs_create_file("quirks_jm", 0644, +- kbdev->mali_debugfs_directory, kbdev, +- &fops_jm_quirks); ++ debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, ++ kbdev, &fops_gpu_quirks); + + debugfs_create_bool("infinite_cache", mode, + debugfs_ctx_defaults_directory, +@@ -3900,16 +4900,20 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) + kbase_ktrace_debugfs_init(kbdev); + + #ifdef CONFIG_MALI_DEVFREQ +-#ifdef CONFIG_DEVFREQ_THERMAL ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + if (kbdev->devfreq) + kbase_ipa_debugfs_init(kbdev); + #endif /* CONFIG_DEVFREQ_THERMAL */ + #endif /* CONFIG_MALI_DEVFREQ */ + ++#if !MALI_USE_CSF + debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_serialize_jobs_debugfs_fops); + ++#endif ++ kbase_dvfs_status_debugfs_init(kbdev); ++ + return 0; + + out: +@@ -3926,7 +4930,7 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev) + + int kbase_device_coherency_init(struct kbase_device *kbdev) + { +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + u32 supported_coherency_bitmap = + kbdev->gpu_props.props.raw_props.coherency_mode; + const void *coherency_override_dts; +@@ -3953,7 +4957,7 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) + kbdev->system_coherency = COHERENCY_NONE; + + /* device tree may override the coherency */ +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + coherency_override_dts = of_get_property(kbdev->dev->of_node, + "system-coherency", + NULL); +@@ -3961,6 +4965,17 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) + + override_coherency = be32_to_cpup(coherency_override_dts); + ++#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) ++ /* ACE coherency mode is not supported by Driver on CSF GPUs. ++ * Return an error to signal the invalid device tree configuration. ++ */ ++ if (override_coherency == COHERENCY_ACE) { ++ dev_err(kbdev->dev, ++ "ACE coherency not supported, wrong DT configuration"); ++ return -EINVAL; ++ } ++#endif ++ + if ((override_coherency <= COHERENCY_NONE) && + (supported_coherency_bitmap & + COHERENCY_FEATURE_BIT(override_coherency))) { +@@ -3984,67 +4999,226 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) + return 0; + } + +-#ifdef CONFIG_MALI_BUSLOG + +-/* Callback used by the kbase bus logger client, to initiate a GPU reset +- * when the bus log is restarted. GPU reset is used as reference point +- * in HW bus log analyses. ++#if MALI_USE_CSF ++/** ++ * csg_scheduling_period_store - Store callback for the csg_scheduling_period ++ * sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the csg_scheduling_period sysfs file is written ++ * to. It checks the data written, and if valid updates the reset timeout. ++ * ++ * Return: @count if the function succeeded. An error code on failure. + */ +-static void kbase_logging_started_cb(void *data) ++static ssize_t csg_scheduling_period_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) + { +- struct kbase_device *kbdev = (struct kbase_device *)data; ++ struct kbase_device *kbdev; ++ int ret; ++ unsigned int csg_scheduling_period; + +- if (kbase_prepare_to_reset_gpu(kbdev)) +- kbase_reset_gpu(kbdev); +- dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = kstrtouint(buf, 0, &csg_scheduling_period); ++ if (ret || csg_scheduling_period == 0) { ++ dev_err(kbdev->dev, ++ "Couldn't process csg_scheduling_period write operation.\n" ++ "Use format 'csg_scheduling_period_ms', and csg_scheduling_period_ms > 0\n"); ++ return -EINVAL; ++ } ++ ++ kbase_csf_scheduler_lock(kbdev); ++ kbdev->csf.scheduler.csg_scheduling_period_ms = csg_scheduling_period; ++ dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n", ++ csg_scheduling_period); ++ kbase_csf_scheduler_unlock(kbdev); ++ ++ return count; + } + +-int buslog_init(struct kbase_device *kbdev) ++/** ++ * csg_scheduling_period_show - Show callback for the csg_scheduling_period ++ * sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current reset timeout. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t csg_scheduling_period_show(struct device *dev, ++ struct device_attribute *attr, ++ char *const buf) + { +- int err = 0; ++ struct kbase_device *kbdev; ++ ssize_t ret; + +- err = bl_core_client_register(kbdev->devname, +- kbase_logging_started_cb, +- kbdev, &kbdev->buslogger, +- THIS_MODULE, NULL); +- if (err == 0) +- bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; + +- return err; ++ ret = scnprintf(buf, PAGE_SIZE, "%u\n", ++ kbdev->csf.scheduler.csg_scheduling_period_ms); ++ ++ return ret; + } + +-void buslog_term(struct kbase_device *kbdev) ++static DEVICE_ATTR(csg_scheduling_period, 0644, csg_scheduling_period_show, ++ csg_scheduling_period_store); ++ ++/** ++ * fw_timeout_store - Store callback for the fw_timeout sysfs file. ++ * @dev: The device with sysfs file is for ++ * @attr: The attributes of the sysfs file ++ * @buf: The value written to the sysfs file ++ * @count: The number of bytes written to the sysfs file ++ * ++ * This function is called when the fw_timeout sysfs file is written to. It ++ * checks the data written, and if valid updates the reset timeout. ++ * ++ * Return: @count if the function succeeded. An error code on failure. ++ */ ++static ssize_t fw_timeout_store(struct device *dev, ++ struct device_attribute *attr, const char *buf, ++ size_t count) + { +- bl_core_client_unregister(kbdev->buslogger); ++ struct kbase_device *kbdev; ++ int ret; ++ unsigned int fw_timeout; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = kstrtouint(buf, 0, &fw_timeout); ++ if (ret || fw_timeout == 0) { ++ dev_err(kbdev->dev, "%s\n%s\n%u", ++ "Couldn't process fw_timeout write operation.", ++ "Use format 'fw_timeout_ms', and fw_timeout_ms > 0", ++ FIRMWARE_PING_INTERVAL_MS); ++ return -EINVAL; ++ } ++ ++ kbase_csf_scheduler_lock(kbdev); ++ kbdev->csf.fw_timeout_ms = fw_timeout; ++ kbase_csf_scheduler_unlock(kbdev); ++ dev_dbg(kbdev->dev, "Firmware timeout: %ums\n", fw_timeout); ++ ++ return count; + } +-#endif ++ ++/** ++ * fw_timeout_show - Show callback for the firmware timeout sysfs entry. ++ * @dev: The device this sysfs file is for. ++ * @attr: The attributes of the sysfs file. ++ * @buf: The output buffer to receive the GPU information. ++ * ++ * This function is called to get the current reset timeout. ++ * ++ * Return: The number of bytes output to @buf. ++ */ ++static ssize_t fw_timeout_show(struct device *dev, ++ struct device_attribute *attr, char *const buf) ++{ ++ struct kbase_device *kbdev; ++ ssize_t ret; ++ ++ kbdev = to_kbase_device(dev); ++ if (!kbdev) ++ return -ENODEV; ++ ++ ret = scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->csf.fw_timeout_ms); ++ ++ return ret; ++} ++ ++static DEVICE_ATTR(fw_timeout, 0644, fw_timeout_show, fw_timeout_store); ++#endif /* MALI_USE_CSF */ ++ ++static struct attribute *kbase_scheduling_attrs[] = { ++#if !MALI_USE_CSF ++ &dev_attr_serialize_jobs.attr, ++#endif /* !MALI_USE_CSF */ ++ NULL ++}; + + static struct attribute *kbase_attrs[] = { + #ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, ++#if !MALI_USE_CSF + &dev_attr_js_softstop_always.attr, ++#endif /* !MALI_USE_CSF */ + #endif ++#if !MALI_USE_CSF + &dev_attr_js_timeouts.attr, + &dev_attr_soft_job_timeout.attr, ++#endif /* !MALI_USE_CSF */ + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, ++#if MALI_USE_CSF ++ &dev_attr_idle_hysteresis_time.attr, ++#endif + &dev_attr_reset_timeout.attr, ++#if !MALI_USE_CSF + &dev_attr_js_scheduling_period.attr, ++#else ++ &dev_attr_csg_scheduling_period.attr, ++ &dev_attr_fw_timeout.attr, ++#endif /* !MALI_USE_CSF */ + &dev_attr_power_policy.attr, + &dev_attr_core_mask.attr, ++ &dev_attr_gpu_memory.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + &dev_attr_lp_mem_pool_size.attr, + &dev_attr_lp_mem_pool_max_size.attr, ++#if !MALI_USE_CSF + &dev_attr_js_ctx_scheduling_mode.attr, ++#endif /* !MALI_USE_CSF */ ++ NULL ++}; ++ ++static struct attribute *kbase_mempool_attrs[] = { ++ &dev_attr_max_size.attr, ++ &dev_attr_lp_max_size.attr, ++ &dev_attr_ctx_default_max_size.attr, + NULL + }; + ++#define SYSFS_SCHEDULING_GROUP "scheduling" ++static const struct attribute_group kbase_scheduling_attr_group = { ++ .name = SYSFS_SCHEDULING_GROUP, ++ .attrs = kbase_scheduling_attrs, ++}; ++ ++#define SYSFS_MEMPOOL_GROUP "mempool" ++static const struct attribute_group kbase_mempool_attr_group = { ++ .name = SYSFS_MEMPOOL_GROUP, ++ .attrs = kbase_mempool_attrs, ++}; ++ + static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, + }; + ++static struct attribute *ctx_attrs[] = { ++ &dev_attr_ctx_mem_pool_size.attr, ++ NULL ++}; ++ ++static const struct attribute_group kbase_ctx_attr_group = { ++ .attrs = ctx_attrs, ++}; ++ + int kbase_sysfs_init(struct kbase_device *kbdev) + { + int err = 0; +@@ -4056,11 +5230,39 @@ int kbase_sysfs_init(struct kbase_device *kbdev) + kbdev->mdev.mode = 0666; + + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); ++ err += sysfs_create_group(&kbdev->dev->kobj, &kbase_ctx_attr_group); ++ if (err) ++ return err; ++ ++ err = sysfs_create_group(&kbdev->dev->kobj, ++ &kbase_scheduling_attr_group); ++ if (err) { ++ dev_err(kbdev->dev, "Creation of %s sysfs group failed", ++ SYSFS_SCHEDULING_GROUP); ++ sysfs_remove_group(&kbdev->dev->kobj, ++ &kbase_attr_group); ++ return err; ++ } ++ ++ err = sysfs_create_group(&kbdev->dev->kobj, ++ &kbase_mempool_attr_group); ++ if (err) { ++ dev_err(kbdev->dev, "Creation of %s sysfs group failed", ++ SYSFS_MEMPOOL_GROUP); ++ sysfs_remove_group(&kbdev->dev->kobj, ++ &kbase_scheduling_attr_group); ++ sysfs_remove_group(&kbdev->dev->kobj, ++ &kbase_attr_group); ++ } ++ + return err; + } + + void kbase_sysfs_term(struct kbase_device *kbdev) + { ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_ctx_attr_group); ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_mempool_attr_group); ++ sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + put_device(kbdev->dev); + } +@@ -4119,7 +5321,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev) + + if (err) { + if (err == -EPROBE_DEFER) +- dev_err(kbdev->dev, "Device initialization Deferred\n"); ++ dev_info(kbdev->dev, ++ "Device initialization Deferred\n"); + else + dev_err(kbdev->dev, "Device initialization failed\n"); + +@@ -4161,8 +5364,11 @@ static int kbase_device_suspend(struct device *dev) + + kbase_pm_suspend(kbdev); + +-#if defined(CONFIG_MALI_DEVFREQ) && \ +- (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ kbase_pm_metrics_stop(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_DEVFREQ + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->devfreq) { + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); +@@ -4190,8 +5396,11 @@ static int kbase_device_resume(struct device *dev) + + kbase_pm_resume(kbdev); + +-#if defined(CONFIG_MALI_DEVFREQ) && \ +- (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ kbase_pm_metrics_start(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_DEVFREQ + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->devfreq) { + mutex_lock(&kbdev->pm.lock); +@@ -4224,8 +5433,12 @@ static int kbase_device_runtime_suspend(struct device *dev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); +-#if defined(CONFIG_MALI_DEVFREQ) && \ +- (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ kbase_pm_metrics_stop(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_DEVFREQ + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); + #endif +@@ -4263,8 +5476,11 @@ static int kbase_device_runtime_resume(struct device *dev) + dev_dbg(dev, "runtime resume\n"); + } + +-#if defined(CONFIG_MALI_DEVFREQ) && \ +- (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++ kbase_pm_metrics_start(kbdev); ++#endif ++ ++#ifdef CONFIG_MALI_DEVFREQ + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); + #endif +@@ -4317,7 +5533,7 @@ static const struct dev_pm_ops kbase_pm_ops = { + #endif /* KBASE_PM_RUNTIME */ + }; + +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + static const struct of_device_id kbase_dt_ids[] = { + { .compatible = "arm,malit6xx" }, + { .compatible = "arm,mali-midgard" }, +@@ -4332,9 +5548,9 @@ static struct platform_driver kbase_platform_driver = { + .remove = kbase_platform_device_remove, + .driver = { + .name = kbase_drv_name, +- .owner = THIS_MODULE, + .pm = &kbase_pm_ops, + .of_match_table = of_match_ptr(kbase_dt_ids), ++ .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, + }; + +@@ -4342,7 +5558,7 @@ static struct platform_driver kbase_platform_driver = { + * The driver will not provide a shortcut to create the Mali platform device + * anymore when using Device Tree. + */ +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + module_platform_driver(kbase_platform_driver); + #else + +@@ -4377,6 +5593,7 @@ MODULE_LICENSE("GPL"); + MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ + __stringify(BASE_UK_VERSION_MAJOR) "." \ + __stringify(BASE_UK_VERSION_MINOR) ")"); ++MODULE_SOFTDEP("pre: memory_group_manager"); + + #define CREATE_TRACE_POINTS + /* Create the trace points (otherwise we just get code to call a tracepoint) */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h +index e1fffc3..4dc09e4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,18 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +- +-/* +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * + */ + + #ifndef _KBASE_CS_EXPERIMENTAL_H_ +@@ -41,9 +30,6 @@ + */ + static inline void mali_kbase_print_cs_experimental(void) + { +-#if MALI_JIT_PRESSURE_LIMIT +- pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled"); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ + #if MALI_INCREMENTAL_RENDERING + pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); + #endif /* MALI_INCREMENTAL_RENDERING */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c +index cea91bc..d06380d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,9 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +-#include +- + #include + #include "mali_kbase_ctx_sched.h" + #include "tl/mali_kbase_tracepoints.h" +@@ -46,7 +43,8 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev) + int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + + /* These two must be recalculated if nr_hw_address_spaces changes +- * (e.g. for HW workarounds) */ ++ * (e.g. for HW workarounds) ++ */ + kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; + kbdev->as_free = as_present; /* All ASs initially free */ + +@@ -212,6 +210,13 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { + struct kbase_context *kctx; + ++#if MALI_USE_CSF ++ if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { ++ kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, ++ MCU_AS_NR); ++ continue; ++ } ++#endif + kctx = kbdev->as_to_kctx[i]; + if (kctx) { + if (atomic_read(&kctx->refcount)) { +@@ -254,7 +259,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + + found_kctx = kbdev->as_to_kctx[as_nr]; + +- if (found_kctx != NULL) ++ if (!WARN_ON(found_kctx == NULL)) + kbase_ctx_sched_retain_ctx_refcount(found_kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +@@ -264,6 +269,21 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + + struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr) ++{ ++ unsigned long flags; ++ struct kbase_context *found_kctx; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ found_kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return found_kctx; ++} ++ ++struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( ++ struct kbase_device *kbdev, size_t as_nr) + { + struct kbase_context *found_kctx; + +@@ -273,13 +293,14 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + +- found_kctx = kbdev->as_to_kctx[as_nr]; ++ lockdep_assert_held(&kbdev->hwaccess_lock); + +- if (WARN_ON(!found_kctx)) +- return NULL; ++ found_kctx = kbdev->as_to_kctx[as_nr]; + +- if (WARN_ON(atomic_read(&found_kctx->refcount) <= 0)) +- return NULL; ++ if (found_kctx) { ++ if (atomic_read(&found_kctx->refcount) <= 0) ++ found_kctx = NULL; ++ } + + return found_kctx; + } +@@ -342,3 +363,40 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + } ++ ++#if MALI_USE_CSF ++bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) ++{ ++ struct kbase_device *kbdev; ++ bool added_ref = false; ++ unsigned long flags; ++ ++ if (WARN_ON(kctx == NULL)) ++ return added_ref; ++ ++ kbdev = kctx->kbdev; ++ ++ if (WARN_ON(kbdev == NULL)) ++ return added_ref; ++ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && ++ (kctx == kbdev->as_to_kctx[kctx->as_nr])) { ++ atomic_inc(&kctx->refcount); ++ ++ if (kbdev->as_free & (1u << kctx->as_nr)) ++ kbdev->as_free &= ~(1u << kctx->as_nr); ++ ++ KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, ++ kbase_ktrace_get_ctx_refcnt(kctx)); ++ added_ref = true; ++ } ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ return added_ref; ++} ++#endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h +index 1affa71..334724f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_CTX_SCHED_H_ +@@ -26,7 +25,7 @@ + #include + + /** +- * The Context Scheduler manages address space assignment and reference ++ * DOC: The Context Scheduler manages address space assignment and reference + * counting to kbase_context. The interface has been designed to minimise + * interactions between the Job Scheduler and Power Management/MMU to support + * the existing Job Scheduler interface. +@@ -41,7 +40,7 @@ + */ + + /** +- * kbase_ctx_sched_init - Initialise the context scheduler ++ * kbase_ctx_sched_init() - Initialise the context scheduler + * @kbdev: The device for which the context scheduler needs to be initialised + * + * This must be called during device initialisation. The number of hardware +@@ -167,6 +166,21 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr); + ++/** ++ * kbase_ctx_sched_as_to_ctx_nolock - Lookup a context based on its current ++ * address space. ++ * @kbdev: The device for which the returned context must belong ++ * @as_nr: address space assigned to the context of interest ++ * ++ * The following lock must be held by the caller: ++ * * kbase_device::hwaccess_lock ++ * ++ * Return: a valid struct kbase_context on success or NULL on failure, ++ * indicating that no context was found in as_nr. ++ */ ++struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( ++ struct kbase_device *kbdev, size_t as_nr); ++ + /** + * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, + * preventing it from being scheduled out. +@@ -206,4 +220,22 @@ bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); + */ + void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); + ++#if MALI_USE_CSF ++/** ++ * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU ++ * address space slot assigned to it. ++ * ++ * @kctx: Context to be refcounted ++ * ++ * This function takes a reference on the context if it has a GPU address space ++ * slot assigned to it. The address space slot will not be available for ++ * re-assignment until the reference is released. ++ * ++ * Return: true if refcount succeeded and the address space slot will not be ++ * reassigned, false if the refcount failed (because the address space slot ++ * was not assigned). ++ */ ++bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx); ++#endif ++ + #endif /* _KBASE_CTX_SCHED_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c +index 118f787..6d3b109 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include + + static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h +index 2fdb72d..10a3c85 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #ifndef _KBASE_DEBUG_H + #define _KBASE_DEBUG_H + +@@ -43,7 +40,7 @@ + #endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + + /** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ +-typedef void (kbase_debug_assert_hook) (void *); ++typedef void kbase_debug_assert_hook(void *); + + struct kbasep_debug_assert_cb { + kbase_debug_assert_hook *func; +@@ -51,9 +48,9 @@ struct kbasep_debug_assert_cb { + }; + + /** +- * @def KBASEP_DEBUG_PRINT_TRACE +- * @brief Private macro containing the format of the trace to display before every message +- * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME ++ * KBASEP_DEBUG_PRINT_TRACE - Private macro containing the format of the trace ++ * to display before every message @sa KBASE_DEBUG_SKIP_TRACE, ++ * KBASE_DEBUG_SKIP_FUNCTION_NAME + */ + #if !KBASE_DEBUG_SKIP_TRACE + #define KBASEP_DEBUG_PRINT_TRACE \ +@@ -68,21 +65,22 @@ struct kbasep_debug_assert_cb { + #endif + + /** +- * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) +- * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. +- * @param trace location in the code from where the message is printed +- * @param function function from where the message is printed +- * @param ... Format string followed by format arguments. ++ * KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - (Private) system printing ++ * function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. ++ * @trace: location in the code from where the message is printed ++ * @function: function from where the message is printed ++ * @...: Format string followed by format arguments. ++ * + * @note function parameter cannot be concatenated with other strings + */ + /* Select the correct system output function*/ + #ifdef CONFIG_MALI_DEBUG +-#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ +- do { \ +- pr_err("Mali: %s function:%s ", trace, function);\ +- pr_err(__VA_ARGS__);\ +- pr_err("\n");\ +- } while (false) ++#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) \ ++ do { \ ++ pr_err("Mali: %s function:%s ", trace, function); \ ++ pr_err(__VA_ARGS__); \ ++ pr_err("\n"); \ ++ } while (false) + #else + #define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() + #endif +@@ -94,12 +92,12 @@ struct kbasep_debug_assert_cb { + #endif + + /** +- * @def KBASE_DEBUG_ASSERT(expr) +- * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false ++ * KBASE_DEBUG_ASSERT(expr) - Calls @ref KBASE_PRINT_ASSERT and prints the ++ * expression @a expr if @a expr is false ++ * @expr: Boolean expression + * + * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * +- * @param expr Boolean expression + */ + #define KBASE_DEBUG_ASSERT(expr) \ + KBASE_DEBUG_ASSERT_MSG(expr, #expr) +@@ -107,15 +105,15 @@ struct kbasep_debug_assert_cb { + #if KBASE_DEBUG_DISABLE_ASSERTS + #define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() + #else +- /** +- * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) +- * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false +- * +- * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 +- * +- * @param expr Boolean expression +- * @param ... Message to display when @a expr is false, as a format string followed by format arguments. +- */ ++/** ++ * KBASE_DEBUG_ASSERT_MSG() - Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the ++ * given message if @a expr is false ++ * @expr: Boolean expression ++ * @...: Message to display when @a expr is false, as a format string followed ++ * by format arguments. ++ * ++ * This macro does nothing if the flag KBASE_DEBUG_DISABLE_ASSERTS is set to 1 ++ */ + #define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ + do { \ + if (!(expr)) { \ +@@ -127,10 +125,8 @@ struct kbasep_debug_assert_cb { + #endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + + /** +- * @def KBASE_DEBUG_CODE( X ) +- * @brief Executes the code inside the macro only in debug mode +- * +- * @param X Code to compile only in debug mode. ++ * KBASE_DEBUG_CODE( X ) - Executes the code inside the macro only in debug mode ++ * @X: Code to compile only in debug mode. + */ + #ifdef CONFIG_MALI_DEBUG + #define KBASE_DEBUG_CODE(X) X +@@ -141,7 +137,9 @@ struct kbasep_debug_assert_cb { + /** @} */ + + /** +- * @brief Register a function to call on ASSERT ++ * kbase_debug_assert_register_hook - Register a function to call on ASSERT ++ * @func: the function to call when an assert is triggered. ++ * @param: the parameter to pass to \a func when calling it + * + * Such functions will \b only be called during Debug mode, and for debugging + * features \b only. Do not rely on them to be called in general use. +@@ -151,13 +149,12 @@ struct kbasep_debug_assert_cb { + * @note This function is not thread-safe, and should only be used to + * register/deregister once in the module's lifetime. + * +- * @param[in] func the function to call when an assert is triggered. +- * @param[in] param the parameter to pass to \a func when calling it + */ + void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + + /** +- * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() ++ * kbasep_debug_assert_call_hook - Call a debug assert hook previously ++ * registered with kbase_debug_assert_register_hook() + * + * @note This function is not thread-safe with respect to multiple threads + * registering functions and parameters with +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +index dbc774d..4f021b3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,13 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include + #include + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) + { +@@ -518,23 +517,24 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) + /* + * Initialize the relevant data structure per context + */ +-void kbase_debug_job_fault_context_init(struct kbase_context *kctx) ++int kbase_debug_job_fault_context_init(struct kbase_context *kctx) + { + + /* We need allocate double size register range + * Because this memory will keep the register address and value + */ + kctx->reg_dump = vmalloc(0x4000 * 2); +- if (kctx->reg_dump == NULL) +- return; +- +- if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { +- vfree(kctx->reg_dump); +- kctx->reg_dump = NULL; ++ if (kctx->reg_dump != NULL) { ++ if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == ++ false) { ++ vfree(kctx->reg_dump); ++ kctx->reg_dump = NULL; ++ } ++ INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); ++ atomic_set(&kctx->job_fault_count, 0); + } +- INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); +- atomic_set(&kctx->job_fault_count, 0); + ++ return 0; + } + + /* +@@ -549,6 +549,14 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) + { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + ++ /* Return early if the job fault part of the kbase_device is not ++ * initialized yet. An error can happen during the device probe after ++ * the privileged Kbase context was created for the HW counter dumping ++ * but before the job fault part is initialized. ++ */ ++ if (!kctx->kbdev->job_fault_resume_workq) ++ return; ++ + kbase_ctx_remove_pending_event(kctx); + } + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +index ef69627..39aeed0 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUG_JOB_FAULT_H +@@ -54,8 +53,9 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); + * kbase_debug_job_fault_context_init - Initialize the relevant + * data structure per context + * @kctx: KBase context pointer ++ * @return 0 on success + */ +-void kbase_debug_job_fault_context_init(struct kbase_context *kctx); ++int kbase_debug_job_fault_context_init(struct kbase_context *kctx); + + /** + * kbase_debug_job_fault_context_term - Release the relevant +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +index 4788137..5a99b5e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -30,11 +29,7 @@ + #include + #include + +-#ifdef CONFIG_DEBUG_FS +- +-#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE) +-#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) +-#endif ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + struct debug_mem_mapping { + struct list_head node; +@@ -179,6 +174,13 @@ static int debug_mem_zone_open(struct rb_root *rbtree, + /* Empty region - ignore */ + continue; + ++ if (reg->flags & KBASE_REG_PROTECTED) { ++ /* CPU access to protected memory is forbidden - so ++ * skip this GPU virtual region. ++ */ ++ continue; ++ } ++ + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + ret = -ENOMEM; +@@ -222,19 +224,19 @@ static int debug_mem_open(struct inode *i, struct file *file) + kbase_gpu_vm_lock(kctx); + + ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); +- if (0 != ret) { ++ if (ret != 0) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); +- if (0 != ret) { ++ if (ret != 0) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); +- if (0 != ret) { ++ if (ret != 0) { + kbase_gpu_vm_unlock(kctx); + goto out; + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h +index b948b7c..d034832 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2015, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUG_MEM_VIEW_H +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c +index 37e507b..973739f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -46,10 +45,9 @@ + * + * Return: 0 if success, negative error code otherwise. + */ +-static int set_attr_from_string( +- char *const buf, +- void *const array, size_t const nelems, +- kbase_debugfs_helper_set_attr_fn const set_attr_fn) ++static int ++set_attr_from_string(char *const buf, void *const array, size_t const nelems, ++ kbase_debugfs_helper_set_attr_fn *const set_attr_fn) + { + size_t index, err = 0; + char *ptr = buf; +@@ -90,9 +88,62 @@ static int set_attr_from_string( + return err; + } + ++int kbase_debugfs_string_validator(char *const buf) ++{ ++ size_t index; ++ int err = 0; ++ char *ptr = buf; ++ ++ for (index = 0; *ptr; ++index) { ++ unsigned long test_number; ++ size_t len; ++ ++ /* Drop leading spaces */ ++ while (*ptr == ' ') ++ ptr++; ++ ++ /* Strings passed into the validator will be NULL terminated ++ * by nature, so here strcspn only needs to delimit by ++ * newlines, spaces and NULL terminator (delimited natively). ++ */ ++ len = strcspn(ptr, "\n "); ++ if (len == 0) { ++ /* No more values (allow this) */ ++ break; ++ } ++ ++ /* Substitute a nul terminator for a space character to make ++ * the substring valid for kstrtoul, and then replace it back. ++ */ ++ if (ptr[len] == ' ') { ++ ptr[len] = '\0'; ++ err = kstrtoul(ptr, 0, &test_number); ++ ptr[len] = ' '; ++ ++ /* len should only be incremented if there is a valid ++ * number to follow - otherwise this will skip over ++ * the NULL terminator in cases with no ending newline ++ */ ++ len++; ++ } else { ++ /* This would occur at the last element before a space ++ * or a NULL terminator. ++ */ ++ err = kstrtoul(ptr, 0, &test_number); ++ } ++ ++ if (err) ++ break; ++ /* Skip the substring (including any premature nul terminator) ++ */ ++ ptr += len; ++ } ++ return err; ++} ++ + int kbase_debugfs_helper_set_attr_from_string( + const char *const buf, void *const array, size_t const nelems, +- kbase_debugfs_helper_set_attr_fn const set_attr_fn) ++ kbase_debugfs_helper_set_attr_fn *const set_attr_fn) + { + char *const wbuf = kstrdup(buf, GFP_KERNEL); + int err = 0; +@@ -100,6 +151,13 @@ int kbase_debugfs_helper_set_attr_from_string( + if (!wbuf) + return -ENOMEM; + ++ /* validate string before actually writing values */ ++ err = kbase_debugfs_string_validator(wbuf); ++ if (err) { ++ kfree(wbuf); ++ return err; ++ } ++ + err = set_attr_from_string(wbuf, array, nelems, + set_attr_fn); + +@@ -108,9 +166,9 @@ int kbase_debugfs_helper_set_attr_from_string( + } + + ssize_t kbase_debugfs_helper_get_attr_to_string( +- char *const buf, size_t const size, +- void *const array, size_t const nelems, +- kbase_debugfs_helper_get_attr_fn const get_attr_fn) ++ char *const buf, size_t const size, void *const array, ++ size_t const nelems, ++ kbase_debugfs_helper_get_attr_fn *const get_attr_fn) + { + ssize_t total = 0; + size_t index; +@@ -128,10 +186,10 @@ ssize_t kbase_debugfs_helper_get_attr_to_string( + return total; + } + +-int kbase_debugfs_helper_seq_write(struct file *const file, +- const char __user *const ubuf, size_t const count, +- size_t const nelems, +- kbase_debugfs_helper_set_attr_fn const set_attr_fn) ++int kbase_debugfs_helper_seq_write( ++ struct file *const file, const char __user *const ubuf, ++ size_t const count, size_t const nelems, ++ kbase_debugfs_helper_set_attr_fn *const set_attr_fn) + { + const struct seq_file *const sfile = file->private_data; + void *const array = sfile->private; +@@ -154,6 +212,14 @@ int kbase_debugfs_helper_seq_write(struct file *const file, + } + + buf[count] = '\0'; ++ ++ /* validate string before actually writing values */ ++ err = kbase_debugfs_string_validator(buf); ++ if (err) { ++ kfree(buf); ++ return err; ++ } ++ + err = set_attr_from_string(buf, + array, nelems, set_attr_fn); + kfree(buf); +@@ -161,9 +227,9 @@ int kbase_debugfs_helper_seq_write(struct file *const file, + return err; + } + +-int kbase_debugfs_helper_seq_read(struct seq_file *const sfile, +- size_t const nelems, +- kbase_debugfs_helper_get_attr_fn const get_attr_fn) ++int kbase_debugfs_helper_seq_read( ++ struct seq_file *const sfile, size_t const nelems, ++ kbase_debugfs_helper_get_attr_fn *const get_attr_fn) + { + void *const array = sfile->private; + size_t index; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h +index c3c9efa..4c69d8b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DEBUGFS_HELPER_H_ +@@ -31,8 +30,8 @@ + * @index: An element index. The valid range depends on the use-case. + * @value: Attribute value to be set. + */ +-typedef void (*kbase_debugfs_helper_set_attr_fn)( +- void *array, size_t index, size_t value); ++typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, ++ size_t value); + + /** + * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an +@@ -56,7 +55,30 @@ typedef void (*kbase_debugfs_helper_set_attr_fn)( + */ + int kbase_debugfs_helper_set_attr_from_string( + const char *buf, void *array, size_t nelems, +- kbase_debugfs_helper_set_attr_fn set_attr_fn); ++ kbase_debugfs_helper_set_attr_fn *set_attr_fn); ++ ++/** ++ * kbase_debugfs_string_validator - Validate a string to be written to a ++ * debugfs file for any incorrect formats ++ * or wrong values. ++ * ++ * This function is to be used before any writes to debugfs values are done ++ * such that any strings with erroneous values (such as octal 09 or ++ * hexadecimal 0xGH are fully ignored) - without this validation, any correct ++ * values before the first incorrect one will still be entered into the ++ * debugfs file. This essentially iterates the values through kstrtoul to see ++ * if it is valid. ++ * ++ * It is largely similar to set_attr_from_string to iterate through the values ++ * of the input string. This function also requires the input string to be ++ * writable. ++ * ++ * @buf: Null-terminated string to validate. ++ * ++ * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if ++ * any value in the string was wrong or with an incorrect format. ++ */ ++int kbase_debugfs_string_validator(char *const buf); + + /** + * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an +@@ -67,8 +89,7 @@ int kbase_debugfs_helper_set_attr_from_string( + * + * Return: Value of attribute. + */ +-typedef size_t (*kbase_debugfs_helper_get_attr_fn)( +- void *array, size_t index); ++typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index); + + /** + * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string +@@ -89,7 +110,7 @@ typedef size_t (*kbase_debugfs_helper_get_attr_fn)( + */ + ssize_t kbase_debugfs_helper_get_attr_to_string( + char *buf, size_t size, void *array, size_t nelems, +- kbase_debugfs_helper_get_attr_fn get_attr_fn); ++ kbase_debugfs_helper_get_attr_fn *get_attr_fn); + + /** + * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an +@@ -110,8 +131,8 @@ ssize_t kbase_debugfs_helper_get_attr_to_string( + * Return: 0 if success, negative error code otherwise. + */ + int kbase_debugfs_helper_seq_read( +- struct seq_file *const sfile, size_t const nelems, +- kbase_debugfs_helper_get_attr_fn const get_attr_fn); ++ struct seq_file *sfile, size_t nelems, ++ kbase_debugfs_helper_get_attr_fn *get_attr_fn); + + /** + * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an +@@ -132,10 +153,10 @@ int kbase_debugfs_helper_seq_read( + * + * Return: 0 if success, negative error code otherwise. + */ +-int kbase_debugfs_helper_seq_write(struct file *const file, +- const char __user *const ubuf, size_t const count, +- size_t const nelems, +- kbase_debugfs_helper_set_attr_fn const set_attr_fn); ++int kbase_debugfs_helper_seq_write(struct file *file, ++ const char __user *ubuf, size_t count, ++ size_t nelems, ++ kbase_debugfs_helper_set_attr_fn *set_attr_fn); + + #endif /*_KBASE_DEBUGFS_HELPER_H_ */ + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h +index 7056d80..146695c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,17 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_defs.h +- * +- * Defintions (types, defines, etcs) common to Kbase. They are placed here to +- * allow the hierarchy of header files to work. ++ * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here ++ * to allow the hierarchy of header files to work. + */ + + #ifndef _KBASE_DEFS_H_ +@@ -37,10 +32,14 @@ + #include + #include + #include +-#include ++#include + #include + #include +-#include ++#if MALI_USE_CSF ++#include ++#else ++#include ++#endif + #include + + #include +@@ -49,9 +48,6 @@ + #include + #include + +-#ifdef CONFIG_MALI_BUSLOG +-#include +-#endif + + #if defined(CONFIG_SYNC) + #include +@@ -59,7 +55,7 @@ + #include "mali_kbase_fence_defs.h" + #endif + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + #include + #endif /* CONFIG_DEBUG_FS */ + +@@ -75,8 +71,7 @@ + #include + #include + +-#if defined(CONFIG_PM_RUNTIME) || \ +- (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) ++#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM) + #define KBASE_PM_RUNTIME 1 + #endif + +@@ -123,6 +118,11 @@ + */ + #define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) + ++/** ++ * Maximum number of GPU memory region zones ++ */ ++#define KBASE_REG_ZONE_MAX 4ul ++ + #include "mali_kbase_hwaccess_defs.h" + + /* Maximum number of pages of memory that require a permanent mapping, per +@@ -138,24 +138,29 @@ + */ + #define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) + ++#if MALI_USE_CSF ++/* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF ++ * hwcnt backend allocate the ring buffer to communicate with CSF firmware for ++ * HWC dump samples. ++ * To meet the hardware requirement, this number MUST be power of 2, otherwise, ++ * CSF hwcnt backend creation will be failed. ++ */ ++#define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128) ++#endif ++ + /* Maximum number of clock/regulator pairs that may be referenced by + * the device node. + * This is dependent on support for of_property_read_u64_array() in the + * kernel. + */ +-#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \ +- defined(LSK_OPPV2_BACKPORT) + #define BASE_MAX_NR_CLOCKS_REGULATORS (2) +-#else +-#define BASE_MAX_NR_CLOCKS_REGULATORS (1) +-#endif + + /* Forward declarations */ + struct kbase_context; + struct kbase_device; + struct kbase_as; + struct kbase_mmu_setup; +-struct kbase_ipa_model_vinstr_data; ++struct kbase_kinstr_jm; + + /** + * struct kbase_io_access - holds information about 1 register access +@@ -178,11 +183,7 @@ struct kbase_io_access { + * @buf: array of kbase_io_access + */ + struct kbase_io_history { +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool enabled; +-#else +- u32 enabled; +-#endif + + spinlock_t lock; + size_t count; +@@ -242,29 +243,6 @@ struct kbase_fault { + bool protected_mode; + }; + +-/** +- * struct kbase_as - object representing an address space of GPU. +- * @number: Index at which this address space structure is present +- * in an array of address space structures embedded inside the +- * struct kbase_device. +- * @pf_wq: Workqueue for processing work items related to Bus fault +- * and Page fault handling. +- * @work_pagefault: Work item for the Page fault handling. +- * @work_busfault: Work item for the Bus fault handling. +- * @pf_data: Data relating to page fault. +- * @bf_data: Data relating to bus fault. +- * @current_setup: Stores the MMU configuration for this address space. +- */ +-struct kbase_as { +- int number; +- struct workqueue_struct *pf_wq; +- struct work_struct work_pagefault; +- struct work_struct work_busfault; +- struct kbase_fault pf_data; +- struct kbase_fault bf_data; +- struct kbase_mmu_setup current_setup; +-}; +- + /** + * struct kbase_mmu_table - object representing a set of GPU page tables + * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries +@@ -291,7 +269,11 @@ struct kbase_mmu_table { + struct kbase_context *kctx; + }; + ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf_defs.h" ++#else + #include "jm/mali_kbase_jm_defs.h" ++#endif + + static inline int kbase_as_has_bus_fault(struct kbase_as *as, + struct kbase_fault *fault) +@@ -320,71 +302,115 @@ struct kbasep_mem_device { + atomic_t ir_threshold; + }; + ++struct kbase_clk_rate_listener; ++ ++/** ++ * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback ++ * ++ * @listener: Clock frequency change listener. ++ * @clk_index: Index of the clock for which the change has occurred. ++ * @clk_rate_hz: Clock frequency(Hz). ++ * ++ * A callback to call when clock rate changes. The function must not ++ * sleep. No clock rate manager functions must be called from here, as ++ * its lock is taken. ++ */ ++typedef void ++kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, ++ u32 clk_index, u32 clk_rate_hz); ++ ++/** ++ * struct kbase_clk_rate_listener - Clock frequency listener ++ * ++ * @node: List node. ++ * @notify: Callback to be called when GPU frequency changes. ++ */ ++struct kbase_clk_rate_listener { ++ struct list_head node; ++ kbase_clk_rate_listener_on_change_t *notify; ++}; ++ + /** +- * Data stored per device for power management. ++ * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock ++ * rate trace manager. + * +- * This structure contains data for the power management framework. There is one +- * instance of this structure per device in the system. ++ * @gpu_idle: Tracks the idle state of GPU. ++ * @clks: Array of pointer to structures storing data for every ++ * enumerated GPU clock. ++ * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace ++ * operations. ++ * @gpu_clk_rate_trace_write: Pointer to the function that would emit the ++ * tracepoint for the clock rate change. ++ * @listeners: List of listener attached. ++ * @lock: Lock to serialize the actions of GPU clock rate trace ++ * manager. ++ */ ++struct kbase_clk_rate_trace_manager { ++ bool gpu_idle; ++ struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; ++ struct list_head listeners; ++ spinlock_t lock; ++}; ++ ++/** ++ * struct kbase_pm_device_data - Data stored per device for power management. ++ * @lock: The lock protecting Power Management structures accessed outside of ++ * IRQ. ++ * This lock must also be held whenever the GPU is being powered on or ++ * off. ++ * @active_count: The reference count of active contexts on this device. Note ++ * that some code paths keep shaders/the tiler powered whilst this is 0. ++ * Use kbase_pm_is_active() instead to check for such cases. ++ * @suspending: Flag indicating suspending/suspended ++ * @gpu_lost: Flag indicating gpu lost ++ * This structure contains data for the power management framework. There ++ * is one instance of this structure per device in the system. ++ * @zero_active_count_wait: Wait queue set when active_count == 0 ++ * @resume_wait: system resume of GPU device. ++ * @debug_core_mask: Bit masks identifying the available shader cores that are ++ * specified via sysfs. One mask per job slot. ++ * @debug_core_mask_all: Bit masks identifying the available shader cores that ++ * are specified via sysfs. ++ * @callback_power_runtime_init: Callback for initializing the runtime power ++ * management. Return 0 on success, else error code ++ * @callback_power_runtime_term: Callback for terminating the runtime power ++ * management. ++ * @dvfs_period: Time in milliseconds between each dvfs sample ++ * @backend: KBase PM backend data ++ * @arb_vm_state: The state of the arbiter VM machine ++ * @gpu_users_waiting: Used by virtualization to notify the arbiter that there ++ * are users waiting for the GPU so that it can request and resume the ++ * driver. ++ * @clk_rtm: The state of the GPU clock rate trace manager + */ + struct kbase_pm_device_data { +- /** +- * The lock protecting Power Management structures accessed outside of +- * IRQ. +- * +- * This lock must also be held whenever the GPU is being powered on or +- * off. +- */ + struct mutex lock; +- +- /** +- * The reference count of active contexts on this device. Note that +- * some code paths keep shaders/the tiler powered whilst this is 0. Use +- * kbase_pm_is_active() instead to check for such cases. +- */ + int active_count; +- /** Flag indicating suspending/suspended */ + bool suspending; + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- /* Flag indicating gpu lost */ +- bool gpu_lost; ++ atomic_t gpu_lost; + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +- /* Wait queue set when active_count == 0 */ + wait_queue_head_t zero_active_count_wait; ++ wait_queue_head_t resume_wait; + +- /** +- * Bit masks identifying the available shader cores that are specified +- * via sysfs. One mask per job slot. +- */ ++#if MALI_USE_CSF ++ u64 debug_core_mask; ++#else ++ /* One mask per job slot. */ + u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; + u64 debug_core_mask_all; ++#endif /* MALI_USE_CSF */ + +- /** +- * Callback for initializing the runtime power management. +- * +- * @param kbdev The kbase device +- * +- * @return 0 on success, else error code +- */ +- int (*callback_power_runtime_init)(struct kbase_device *kbdev); +- +- /** +- * Callback for terminating the runtime power management. +- * +- * @param kbdev The kbase device +- */ ++ int (*callback_power_runtime_init)(struct kbase_device *kbdev); + void (*callback_power_runtime_term)(struct kbase_device *kbdev); +- +- /* Time in milliseconds between each dvfs sample */ + u32 dvfs_period; +- + struct kbase_pm_backend_data backend; +- + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- /** +- * The state of the arbiter VM machine +- */ + struct kbase_arbiter_vm_state *arb_vm_state; ++ atomic_t gpu_users_waiting; + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ struct kbase_clk_rate_trace_manager clk_rtm; + }; + + /** +@@ -524,7 +550,6 @@ struct kbase_mmu_mode { + unsigned long flags; + }; + +-struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); + struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); + + #define DEVNAME_SIZE 16 +@@ -559,6 +584,32 @@ struct kbase_devfreq_queue_info { + enum kbase_devfreq_work_type acted_type; + }; + ++/** ++ * struct kbase_process - Representing an object of a kbase process instantiated ++ * when the first kbase context is created under it. ++ * @tgid: Thread group ID. ++ * @total_gpu_pages: Total gpu pages allocated across all the contexts ++ * of this process, it accounts for both native allocations ++ * and dma_buf imported allocations. ++ * @kctx_list: List of kbase contexts created for the process. ++ * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree ++ * based on key tgid, kprcs_node is the node link to ++ * &struct_kbase_device.process_root. ++ * @dma_buf_root: RB tree of the dma-buf imported allocations, imported ++ * across all the contexts created for this process. ++ * Used to ensure that pages of allocation are accounted ++ * only once for the process, even if the allocation gets ++ * imported multiple times for the process. ++ */ ++struct kbase_process { ++ pid_t tgid; ++ size_t total_gpu_pages; ++ struct list_head kctx_list; ++ ++ struct rb_node kprcs_node; ++ struct rb_root dma_buf_root; ++}; ++ + /** + * struct kbase_device - Object representing an instance of GPU platform device, + * allocated from the probe method of mali driver. +@@ -568,8 +619,8 @@ struct kbase_devfreq_queue_info { + * issues present in the GPU. + * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW + * issues present in the GPU. +- * @hw_quirks_jm: Configuration to be used for the Job Manager as per +- * the HW issues present in the GPU. ++ * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU ++ * subsystems as per the HW issues present in the GPU. + * @entry: Links the device instance to the global list of GPU + * devices. The list would have as many entries as there + * are GPU device instances. +@@ -586,6 +637,8 @@ struct kbase_devfreq_queue_info { + * @irqs: Array containing IRQ resource info for 3 types of + * interrupts : Job scheduling, MMU & GPU events (like + * power management, cache etc.) ++ * @irqs.irq: irq number ++ * @irqs.flags: irq flags + * @clocks: Pointer to the input clock resources referenced by + * the GPU device node. + * @nr_clocks: Number of clocks set in the clocks array. +@@ -619,6 +672,8 @@ struct kbase_devfreq_queue_info { + * accesses made by the driver. + * @pm: Per device object for storing data for power management + * framework. ++ * @fw_load_lock: Mutex to protect firmware loading in @ref kbase_open. ++ * @csf: CSF object for the GPU device. + * @js_data: Per device object encapsulating the current context of + * Job Scheduler, which is global to the device and is not + * tied to any particular struct kbase_context running on +@@ -646,11 +701,21 @@ struct kbase_devfreq_queue_info { + * @disjoint_event: struct for keeping track of the disjoint information, + * that whether the GPU is in a disjoint state and the + * number of disjoint events that have occurred on GPU. ++ * @disjoint_event.count: disjoint event count ++ * @disjoint_event.state: disjoint event state + * @nr_hw_address_spaces: Number of address spaces actually available in the + * GPU, remains constant after driver initialisation. + * @nr_user_address_spaces: Number of address spaces available to user contexts ++ * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance ++ * counters. + * @hwcnt: Structure used for instrumentation and HW counters + * dumping ++ * @hwcnt.lock: The lock should be used when accessing any of the ++ * following members ++ * @hwcnt.kctx: kbase context ++ * @hwcnt.addr: HW counter address ++ * @hwcnt.addr_bytes: HW counter size in bytes ++ * @hwcnt.backend: Kbase instrumentation backend + * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. + * @hwcnt_gpu_ctx: Context for GPU hardware counter access. + * @hwaccess_lock must be held when calling +@@ -661,6 +726,7 @@ struct kbase_devfreq_queue_info { + * are enabled. If zero, there is no timeline client and + * therefore timeline is disabled. + * @timeline: Timeline context created per device. ++ * @ktrace: kbase device's ktrace + * @trace_lock: Lock to serialize the access to trace buffer. + * @trace_first_out: Index/offset in the trace buffer at which the first + * unread message is present. +@@ -686,6 +752,8 @@ struct kbase_devfreq_queue_info { + * including any contexts that might be created for + * hardware counters. + * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. ++ * @group_max_uid_in_devices: Max value of any queue group UID in any kernel ++ * context in the kbase device. + * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed + * to devfreq_add_device() to add devfreq feature to Mali + * GPU device. +@@ -714,6 +782,7 @@ struct kbase_devfreq_queue_info { + * table in devicetree. + * @num_opps: Number of operating performance points available for the Mali + * GPU device. ++ * @last_devfreq_metrics: last PM metrics + * @devfreq_queue: Per device object for storing data that manages devfreq + * suspend & resume request queue and the related items. + * @devfreq_cooling: Pointer returned on registering devfreq cooling device +@@ -724,6 +793,17 @@ struct kbase_devfreq_queue_info { + * previously entered protected mode. + * @ipa: Top level structure for IPA, containing pointers to both + * configured & fallback models. ++ * @ipa.lock: Access to this struct must be with ipa.lock held ++ * @ipa.configured_model: ipa model to use ++ * @ipa.fallback_model: ipa fallback model ++ * @ipa.last_metrics: Values of the PM utilization metrics from last time ++ * the power model was invoked. The utilization is ++ * calculated as the difference between last_metrics ++ * and the current values. ++ * @ipa.force_fallback_model: true if use of fallback model has been forced by ++ * the User ++ * @ipa.last_sample_time: Records the time when counters, used for dynamic ++ * energy estimation, were last sampled. + * @previous_frequency: Previous frequency of GPU clock used for + * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is + * restored when L2 is powered on. +@@ -732,6 +812,7 @@ struct kbase_devfreq_queue_info { + * @mali_debugfs_directory: Root directory for the debugfs files created by the driver + * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing + * a sub-directory for every context. ++ * @debugfs_instr_directory: Instrumentation debugfs directory + * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault + * has occurred. + * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the +@@ -748,6 +829,8 @@ struct kbase_devfreq_queue_info { + * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list + * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs + * file "read_register". ++ * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be ++ * read through debugfs file "read_register". + * @ctx_num: Total number of contexts created for the device. + * @io_history: Pointer to an object keeping a track of all recent + * register accesses. The history of register accesses +@@ -806,12 +889,40 @@ struct kbase_devfreq_queue_info { + * Job Scheduler + * @l2_size_override: Used to set L2 cache size via device tree blob + * @l2_hash_override: Used to set L2 cache hash via device tree blob ++ * @l2_hash_values_override: true if @l2_hash_values is valid. ++ * @l2_hash_values: Used to set L2 asn_hash via device tree blob ++ * @process_root: rb_tree root node for maintaining a rb_tree of ++ * kbase_process based on key tgid(thread group ID). ++ * @dma_buf_root: rb_tree root node for maintaining a rb_tree of ++ * &struct kbase_dma_buf based on key dma_buf. ++ * We maintain a rb_tree of dma_buf mappings under ++ * kbase_device and kbase_process, one indicates a ++ * mapping and gpu memory usage at device level and ++ * other one at process level. ++ * @total_gpu_pages: Total GPU pages used for the complete GPU device. ++ * @dma_buf_lock: This mutex should be held while accounting for ++ * @total_gpu_pages from imported dma buffers. ++ * @gpu_mem_usage_lock: This spinlock should be held while accounting ++ * @total_gpu_pages for both native and dma-buf imported ++ * allocations. ++ * @dummy_job_wa: struct for dummy job execution workaround for the ++ * GPU hang issue ++ * @dummy_job_wa.ctx: dummy job workaround context ++ * @dummy_job_wa.jc: dummy job workaround job ++ * @dummy_job_wa.slot: dummy job workaround slot ++ * @dummy_job_wa.flags: dummy job workaround flags ++ * @dummy_job_wa_loaded: Flag for indicating that the workaround blob has ++ * been loaded. Protected by @fw_load_lock. ++ * @arb: Pointer to the arbiter device ++ * @pcm_dev: The priority control manager device. ++ * @oom_notifier_block: notifier_block containing kernel-registered out-of- ++ * memory handler. + */ + struct kbase_device { + u32 hw_quirks_sc; + u32 hw_quirks_tiler; + u32 hw_quirks_mmu; +- u32 hw_quirks_jm; ++ u32 hw_quirks_gpu; + + struct list_head entry; + struct device *dev; +@@ -827,7 +938,7 @@ struct kbase_device { + + struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nr_clocks; +-#ifdef CONFIG_REGULATOR ++#if IS_ENABLED(CONFIG_REGULATOR) + struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nr_regulators; + #if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) +@@ -837,16 +948,6 @@ struct kbase_device { + char devname[DEVNAME_SIZE]; + u32 id; + +-#ifdef CONFIG_MALI_NO_MALI +- void *model; +- struct kmem_cache *irq_slab; +- struct workqueue_struct *irq_workq; +- atomic_t serving_job_irq; +- atomic_t serving_gpu_irq; +- atomic_t serving_mmu_irq; +- spinlock_t reg_op_lock; +-#endif /* CONFIG_MALI_NO_MALI */ +- + struct kbase_pm_device_data pm; + + struct kbase_mem_pool_group mem_pools; +@@ -874,8 +975,10 @@ struct kbase_device { + s8 nr_hw_address_spaces; + s8 nr_user_address_spaces; + ++#if MALI_USE_CSF ++ struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; ++#else + struct kbase_hwcnt { +- /* The lock should be used when accessing any of the following members */ + spinlock_t lock; + + struct kbase_context *kctx; +@@ -884,6 +987,7 @@ struct kbase_device { + + struct kbase_instr_backend backend; + } hwcnt; ++#endif + + struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; + struct kbase_hwcnt_context *hwcnt_gpu_ctx; +@@ -906,6 +1010,7 @@ struct kbase_device { + + struct list_head kctx_list; + struct mutex kctx_list_lock; ++ atomic_t group_max_uid_in_devices; + + #ifdef CONFIG_MALI_DEVFREQ + struct devfreq_dev_profile devfreq_profile; +@@ -917,17 +1022,10 @@ struct kbase_device { + struct kbase_devfreq_opp *devfreq_table; + int num_opps; + struct kbasep_pm_metrics last_devfreq_metrics; +- +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + struct kbase_devfreq_queue_info devfreq_queue; +-#endif + +-#ifdef CONFIG_DEVFREQ_THERMAL +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +- struct devfreq_cooling_device *devfreq_cooling; +-#else ++#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) + struct thermal_cooling_device *devfreq_cooling; +-#endif + bool ipa_protection_mode_switched; + struct { + /* Access to this struct must be with ipa.lock held */ +@@ -940,11 +1038,13 @@ struct kbase_device { + * the difference between last_metrics and the current values. + */ + struct kbasep_pm_metrics last_metrics; +- /* Model data to pass to ipa_gpu_active/idle() */ +- struct kbase_ipa_model_vinstr_data *model_data; + + /* true if use of fallback model has been forced by the User */ + bool force_fallback_model; ++ /* Records the time when counters, used for dynamic energy ++ * estimation, were last sampled. ++ */ ++ ktime_t last_sample_time; + } ipa; + #endif /* CONFIG_DEVFREQ_THERMAL */ + #endif /* CONFIG_MALI_DEVFREQ */ +@@ -952,9 +1052,10 @@ struct kbase_device { + + atomic_t job_fault_debug; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *mali_debugfs_directory; + struct dentry *debugfs_ctx_directory; ++ struct dentry *debugfs_instr_directory; + + #ifdef CONFIG_MALI_DEBUG + u64 debugfs_as_read_bitmap; +@@ -968,14 +1069,14 @@ struct kbase_device { + + #if !MALI_CUSTOMER_RELEASE + struct { +- u16 reg_offset; ++ u32 reg_offset; + } regs_dump_debugfs_data; + #endif /* !MALI_CUSTOMER_RELEASE */ + #endif /* CONFIG_DEBUG_FS */ + + atomic_t ctx_num; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_io_history io_history; + #endif /* CONFIG_DEBUG_FS */ + +@@ -985,8 +1086,7 @@ struct kbase_device { + + bool poweroff_pending; + +- +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) ++#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE) + bool infinite_cache_active_default; + #else + u32 infinite_cache_active_default; +@@ -1015,9 +1115,6 @@ struct kbase_device { + + struct work_struct protected_mode_hwcnt_disable_work; + +-#ifdef CONFIG_MALI_BUSLOG +- struct bus_logger_client *buslogger; +-#endif + + bool irq_reset_flush; + +@@ -1029,7 +1126,14 @@ struct kbase_device { + + u8 l2_size_override; + u8 l2_hash_override; ++ bool l2_hash_values_override; ++ u32 l2_hash_values[ASN_HASH_COUNT]; + ++ struct mutex fw_load_lock; ++#if MALI_USE_CSF ++ /* CSF object for the GPU device. */ ++ struct kbase_csf_device csf; ++#else + struct kbasep_js_device_data js_data; + + /* See KBASE_JS_*_PRIORITY_MODE for details. */ +@@ -1042,6 +1146,14 @@ struct kbase_device { + u8 backup_serialize_jobs; + #endif /* CONFIG_MALI_CINSTR_GWT */ + ++#endif /* MALI_USE_CSF */ ++ ++ struct rb_root process_root; ++ struct rb_root dma_buf_root; ++ ++ size_t total_gpu_pages; ++ struct mutex dma_buf_lock; ++ spinlock_t gpu_mem_usage_lock; + + struct { + struct kbase_context *ctx; +@@ -1049,16 +1161,16 @@ struct kbase_device { + int slot; + u64 flags; + } dummy_job_wa; ++ bool dummy_job_wa_loaded; + + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- /* Pointer to the arbiter device */ + struct kbase_arbiter_device arb; + #endif +-}; ++ /* Priority Control Manager device */ ++ struct priority_control_manager_device *pcm_dev; + +-#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ +- (((minor) & 0xFFF) << 8) | \ +- ((0 & 0xFF) << 0)) ++ struct notifier_block oom_notifier_block; ++}; + + /** + * enum kbase_file_state - Initialization state of a file opened by @kbase_open +@@ -1109,7 +1221,92 @@ struct kbase_file { + unsigned long api_version; + atomic_t setup_state; + }; +- ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++/** ++ * enum kbase_context_flags - Flags for kbase contexts ++ * ++ * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit ++ * process on a 64-bit kernel. ++ * ++ * @KCTX_RUNNABLE_REF: Set when context is counted in ++ * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. ++ * ++ * @KCTX_ACTIVE: Set when the context is active. ++ * ++ * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this ++ * context. ++ * ++ * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been ++ * initialized. ++ * ++ * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new ++ * allocations. Existing allocations will not change. ++ * ++ * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. ++ * ++ * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept ++ * scheduled in. ++ * ++ * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. ++ * This is only ever updated whilst the jsctx_mutex is held. ++ * ++ * @KCTX_DYING: Set when the context process is in the process of being evicted. ++ * ++ * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this ++ * context, to disable use of implicit dma-buf fences. This is used to avoid ++ * potential synchronization deadlocks. ++ * ++ * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory ++ * allocations. For 64-bit clients it is enabled by default, and disabled by ++ * default on 32-bit clients. Being able to clear this flag is only used for ++ * testing purposes of the custom zone allocation on 64-bit user-space builds, ++ * where we also require more control than is available through e.g. the JIT ++ * allocation mechanism. However, the 64-bit user-space client must still ++ * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled ++ * from it for job slot 0. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled ++ * from it for job slot 1. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled ++ * from it for job slot 2. This is reset when the context first goes active or ++ * is re-activated on that slot. ++ * ++ * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for ++ * the context due to unhandled page(or bus) fault. It is cleared when the ++ * refcount for the context drops to 0 or on when the address spaces are ++ * re-enabled on GPU reset or power cycle. ++ * ++ * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual ++ * address page limit, so we must take care to not exceed the physical limit ++ * ++ * All members need to be separate bits. This enum is intended for use in a ++ * bitmask where multiple values get OR-ed together. ++ */ ++enum kbase_context_flags { ++ KCTX_COMPAT = 1U << 0, ++ KCTX_RUNNABLE_REF = 1U << 1, ++ KCTX_ACTIVE = 1U << 2, ++ KCTX_PULLED = 1U << 3, ++ KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, ++ KCTX_INFINITE_CACHE = 1U << 5, ++ KCTX_SUBMIT_DISABLED = 1U << 6, ++ KCTX_PRIVILEGED = 1U << 7, ++ KCTX_SCHEDULED = 1U << 8, ++ KCTX_DYING = 1U << 9, ++ KCTX_NO_IMPLICIT_SYNC = 1U << 10, ++ KCTX_FORCE_SAME_VA = 1U << 11, ++ KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, ++ KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, ++ KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, ++ KCTX_AS_DISABLED_ON_FAULT = 1U << 15, ++ KCTX_JPL_ENABLED = 1U << 16, ++}; ++#else + /** + * enum kbase_context_flags - Flags for kbase contexts + * +@@ -1190,6 +1387,7 @@ enum kbase_context_flags { + KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, + }; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + struct kbase_sub_alloc { + struct list_head link; +@@ -1197,6 +1395,21 @@ struct kbase_sub_alloc { + DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); + }; + ++/** ++ * struct kbase_reg_zone - Information about GPU memory region zones ++ * @base_pfn: Page Frame Number in GPU virtual address space for the start of ++ * the Zone ++ * @va_size_pages: Size of the Zone in pages ++ * ++ * Track information about a zone KBASE_REG_ZONE() and related macros. ++ * In future, this could also store the &rb_root that are currently in ++ * &kbase_context ++ */ ++struct kbase_reg_zone { ++ u64 base_pfn; ++ u64 va_size_pages; ++}; ++ + /** + * struct kbase_context - Kernel base context + * +@@ -1247,6 +1460,7 @@ struct kbase_sub_alloc { + * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA + * zone of the GPU virtual address space. Used for GPU-executable + * allocations which don't need the SAME_VA property. ++ * @reg_zone: Zone information for the reg_rbtree_<...> members. + * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for + * SAME_VA allocations to defer the reservation of memory region + * (from the GPU virtual address space) from base_mem_alloc +@@ -1271,6 +1485,7 @@ struct kbase_sub_alloc { + * which actually created the context. This is usually, + * but not necessarily, the same as the thread which + * opened the device file /dev/malixx instance. ++ * @csf: kbase csf context + * @jctx: object encapsulating all the Job dispatcher related state, + * including the array of atoms. + * @used_pages: Keeps a track of the number of 4KB physical pages in use +@@ -1287,6 +1502,8 @@ struct kbase_sub_alloc { + * evictable/reclaimable. + * @evict_list: List head for the list containing the allocations which + * can be evicted or freed up in the shrinker callback. ++ * @evict_nents: Total number of pages allocated by the allocations within ++ * @evict_list (atomic). + * @waiting_soft_jobs: List head for the list containing softjob atoms, which + * are either waiting for the event set operation, or waiting + * for the signaling of input fence or waiting for the GPU +@@ -1297,6 +1514,8 @@ struct kbase_sub_alloc { + * waiting atoms and the waitqueue to process the work item + * queued for the atoms blocked on the signaling of dma-buf + * fences. ++ * @dma_fence.waiting_resource: list head for the list of dma-buf fence ++ * @dma_fence.wq: waitqueue to process the work item queued + * @as_nr: id of the address space being used for the scheduled in + * context. This is effectively part of the Run Pool, because + * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst +@@ -1315,15 +1534,13 @@ struct kbase_sub_alloc { + * at any point. + * Generally the reference count is incremented when the context + * is scheduled in and an atom is pulled from the context's per +- * slot runnable tree. ++ * slot runnable tree in JM GPU or GPU command queue ++ * group is programmed on CSG slot in CSF GPU. + * @mm_update_lock: lock used for handling of special tracking page. + * @process_mm: Pointer to the memory descriptor of the process which + * created the context. Used for accounting the physical + * pages used for GPU allocations, done for the context, + * to the memory consumed by the process. +- * @same_va_end: End address of the SAME_VA zone (in 4KB page units) +- * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) +- * or U64_MAX if the EXEC_VA zone is uninitialized. + * @gpu_va_end: End address of the GPU va space (in 4KB page units) + * @jit_va: Indicates if a JIT_VA zone has been created. + * @mem_profile_data: Buffer containing the profiling information provided by +@@ -1399,6 +1616,16 @@ struct kbase_sub_alloc { + * that were used (i.e. the + * &struct_kbase_va_region.used_pages for regions + * that have had a usage report). ++ * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being ++ * now allocated for just-in-time memory ++ * allocations of a context (across all the ++ * threads). This is supposed to be updated ++ * with @reg_lock held before allocating ++ * the backing pages. This helps ensure that ++ * total physical memory usage for just in ++ * time memory allocation remains within the ++ * @jit_phys_pages_limit in multi-threaded ++ * scenarios. + * @jit_active_head: List containing the just-in-time memory allocations + * which are in use. + * @jit_pool_head: List containing the just-in-time memory allocations +@@ -1425,6 +1652,10 @@ struct kbase_sub_alloc { + * is used to determine the atom's age when it is added to + * the runnable RB-tree. + * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) ++ * @kprcs: Reference to @struct kbase_process that the current ++ * kbase_context belongs to. ++ * @kprcs_link: List link for the list of kbase context maintained ++ * under kbase_process. + * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by + * kbase_context.reg_lock. + * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. +@@ -1435,6 +1666,12 @@ struct kbase_sub_alloc { + * for context scheduling, protected by hwaccess_lock. + * @atoms_count: Number of GPU atoms currently in use, per priority + * @create_flags: Flags used in context creation. ++ * @kinstr_jm: Kernel job manager instrumentation context handle ++ * @tl_kctx_list_node: List item into the device timeline's list of ++ * contexts, for timeline summarization. ++ * @limited_core_mask: The mask that is applied to the affinity in case of atoms ++ * marked with BASE_JD_REQ_LIMITED_CORE_MASK. ++ * @platform_data: Pointer to platform specific per-context data. + * + * A kernel base context is an entity among which the GPU is scheduled. + * Each context has its own GPU address space. +@@ -1453,7 +1690,9 @@ struct kbase_context { + struct list_head event_list; + struct list_head event_coalesce_list; + struct mutex event_mutex; ++#if !MALI_USE_CSF + atomic_t event_closed; ++#endif + struct workqueue_struct *event_workq; + atomic_t event_count; + int event_coalesce_count; +@@ -1470,7 +1709,11 @@ struct kbase_context { + struct rb_root reg_rbtree_same; + struct rb_root reg_rbtree_custom; + struct rb_root reg_rbtree_exec; ++ struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; + ++#if MALI_USE_CSF ++ struct kbase_csf_context csf; ++#else + struct kbase_jd_context jctx; + struct jsctx_queue jsctx_queue + [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; +@@ -1488,6 +1731,7 @@ struct kbase_context { + s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + u32 slots_pullable; + u32 age_count; ++#endif /* MALI_USE_CSF */ + + DECLARE_BITMAP(cookies, BITS_PER_LONG); + struct kbase_va_region *pending_regions[BITS_PER_LONG]; +@@ -1503,6 +1747,7 @@ struct kbase_context { + + struct shrinker reclaim; + struct list_head evict_list; ++ atomic_t evict_nents; + + struct list_head waiting_soft_jobs; + spinlock_t waiting_soft_jobs_lock; +@@ -1519,12 +1764,10 @@ struct kbase_context { + + spinlock_t mm_update_lock; + struct mm_struct __rcu *process_mm; +- u64 same_va_end; +- u64 exec_va_start; + u64 gpu_va_end; + bool jit_va; + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + char *mem_profile_data; + size_t mem_profile_size; + struct mutex mem_profile_lock; +@@ -1545,10 +1788,11 @@ struct kbase_context { + u8 jit_current_allocations_per_bin[256]; + u8 jit_version; + u8 jit_group_id; +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + u64 jit_phys_pages_limit; + u64 jit_current_phys_pressure; +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++ u64 jit_phys_pages_to_be_allocated; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + struct list_head jit_active_head; + struct list_head jit_pool_head; + struct list_head jit_destroy_head; +@@ -1559,6 +1803,9 @@ struct kbase_context { + + u8 trim_level; + ++ struct kbase_process *kprcs; ++ struct list_head kprcs_link; ++ + #ifdef CONFIG_MALI_CINSTR_GWT + bool gwt_enabled; + bool gwt_was_enabled; +@@ -1567,6 +1814,17 @@ struct kbase_context { + #endif + + base_context_create_flags create_flags; ++ ++#if !MALI_USE_CSF ++ struct kbase_kinstr_jm *kinstr_jm; ++#endif ++ struct list_head tl_kctx_list_node; ++ ++ u64 limited_core_mask; ++ ++#if !MALI_USE_CSF ++ void *platform_data; ++#endif + }; + + #ifdef CONFIG_MALI_CINSTR_GWT +@@ -1653,29 +1911,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) + /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ + #define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 + +-/* JobDescriptorHeader - taken from the architecture specifications, the layout +- * is currently identical for all GPU archs. */ +-struct job_descriptor_header { +- u32 exception_status; +- u32 first_incomplete_task; +- u64 fault_pointer; +- u8 job_descriptor_size : 1; +- u8 job_type : 7; +- u8 job_barrier : 1; +- u8 _reserved_01 : 1; +- u8 _reserved_1 : 1; +- u8 _reserved_02 : 1; +- u8 _reserved_03 : 1; +- u8 _reserved_2 : 1; +- u8 _reserved_04 : 1; +- u8 _reserved_05 : 1; +- u16 job_index; +- u16 job_dependency_index_1; +- u16 job_dependency_index_2; +- union { +- u64 _64; +- u32 _32; +- } next_job; +-}; +- + #endif /* _KBASE_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c +index b5ac414..7d6e475 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +index 25acbcb..69ff8cc 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,26 +17,21 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as + * it will be set there. + */ + #include "mali_kbase_dma_fence.h" +- + #include + #include + #include + #include +-#include ++#include + #include + #include + #include + #include +- + #include + + static void +@@ -59,7 +55,11 @@ static int + kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) + { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *content_res = NULL; ++#else ++ struct dma_resv *content_res = NULL; ++#endif + unsigned int content_res_idx = 0; + unsigned int r; + int err = 0; +@@ -205,7 +205,7 @@ out: + } + + static void +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) + #else + kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +@@ -225,12 +225,19 @@ kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) + kbase_dma_fence_queue_work(katom); + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + static int + kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, + struct reservation_object *resv, + bool exclusive) ++#else ++static int ++kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, ++ struct dma_resv *resv, ++ bool exclusive) ++#endif + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *excl_fence = NULL; + struct fence **shared_fences = NULL; + #else +@@ -240,7 +247,12 @@ kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, + unsigned int shared_count = 0; + int err, i; + +- err = reservation_object_get_fences_rcu(resv, ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) ++ err = reservation_object_get_fences_rcu( ++#else ++ err = dma_resv_get_fences_rcu( ++#endif ++ resv, + &excl_fence, + &shared_count, + &shared_fences); +@@ -294,9 +306,15 @@ out: + return err; + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive) ++#else ++void kbase_dma_fence_add_reservation(struct dma_resv *resv, ++ struct kbase_dma_fence_resv_info *info, ++ bool exclusive) ++#endif + { + unsigned int i; + +@@ -317,7 +335,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info) + { + int err, i; +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; + #else + struct dma_fence *fence; +@@ -346,10 +364,17 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + } + + for (i = 0; i < info->dma_fence_resv_count; i++) { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *obj = info->resv_objs[i]; +- ++#else ++ struct dma_resv *obj = info->resv_objs[i]; ++#endif + if (!test_bit(i, info->dma_fence_excl_bitmap)) { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + err = reservation_object_reserve_shared(obj); ++#else ++ err = dma_resv_reserve_shared(obj, 0); ++#endif + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d reserving space for shared fence.\n", err); +@@ -363,7 +388,11 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + goto end; + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + reservation_object_add_shared_fence(obj, fence); ++#else ++ dma_resv_add_shared_fence(obj, fence); ++#endif + } else { + err = kbase_dma_fence_add_reservation_callback(katom, obj, true); + if (err) { +@@ -372,7 +401,11 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + goto end; + } + ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + reservation_object_add_excl_fence(obj, fence); ++#else ++ dma_resv_add_excl_fence(obj, fence); ++#endif + } + } + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h +index 2a4d6fc..38d3581 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DMA_FENCE_H_ +@@ -26,10 +25,14 @@ + #ifdef CONFIG_MALI_DMA_FENCE + + #include ++#include ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + #include ++#else ++#include ++#endif + #include + +- + /* Forward declaration from mali_kbase_defs.h */ + struct kbase_jd_atom; + struct kbase_context; +@@ -45,7 +48,11 @@ struct kbase_context; + * reservation objects. + */ + struct kbase_dma_fence_resv_info { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object **resv_objs; ++#else ++ struct dma_resv **resv_objs; ++#endif + unsigned int dma_fence_resv_count; + unsigned long *dma_fence_excl_bitmap; + }; +@@ -60,9 +67,15 @@ struct kbase_dma_fence_resv_info { + * reservation_objects. At the same time keeps track of which objects require + * exclusive access in dma_fence_excl_bitmap. + */ ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive); ++#else ++void kbase_dma_fence_add_reservation(struct dma_resv *resv, ++ struct kbase_dma_fence_resv_info *info, ++ bool exclusive); ++#endif + + /** + * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs +@@ -122,8 +135,7 @@ void kbase_dma_fence_term(struct kbase_context *kctx); + */ + int kbase_dma_fence_init(struct kbase_context *kctx); + +- +-#else /* CONFIG_MALI_DMA_FENCE */ ++#else /* !CONFIG_MALI_DMA_FENCE */ + /* Dummy functions for when dma-buf fence isn't enabled. */ + + static inline int kbase_dma_fence_init(struct kbase_context *kctx) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c +index 188e53b..1e91ba0 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -25,7 +24,7 @@ + */ + + #include +-#include ++#include + #include + + #include +@@ -282,6 +281,8 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) + int err; + struct kbase_context *kctx; + ++ lockdep_assert_held(&kbdev->fw_load_lock); ++ + if (!wa_blob_load_needed(kbdev)) + return 0; + +@@ -427,6 +428,10 @@ void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) + { + struct kbase_context *wa_ctx; + ++ /* return if the dummy job has not been loaded */ ++ if (kbdev->dummy_job_wa_loaded == false) ++ return; ++ + /* Can be safely called even if the file wasn't created on probe */ + sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h +index 5bbe37d..8713ba1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_DUMMY_JOB_WORKAROUND_ +@@ -31,6 +30,34 @@ + KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) + ++#if MALI_USE_CSF ++ ++static inline int kbase_dummy_job_wa_load(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++ return 0; ++} ++ ++static inline void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++} ++ ++static inline int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, ++ u64 cores) ++{ ++ CSTD_UNUSED(kbdev); ++ CSTD_UNUSED(cores); ++ return 0; ++} ++ ++static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) ++{ ++ CSTD_UNUSED(kbdev); ++ return false; ++} ++ ++#else + + int kbase_dummy_job_wa_load(struct kbase_device *kbdev); + void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); +@@ -41,5 +68,6 @@ static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) + return (kbdev->dummy_job_wa.ctx != NULL); + } + ++#endif /* MALI_USE_CSF */ + + #endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c +new file mode 100644 +index 0000000..1e584de +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c +@@ -0,0 +1,98 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_dvfs_debugfs.h" ++#include ++#include ++ ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++ ++/** ++ * kbasep_dvfs_utilization_debugfs_show() - Print the DVFS utilization info ++ * ++ * @file: The seq_file for printing to ++ * @data: The debugfs dentry private data, a pointer to kbase_context ++ * ++ * Return: Negative error code or 0 on success. ++ */ ++static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *data) ++{ ++ struct kbase_device *kbdev = file->private; ++ ++#if MALI_USE_CSF ++ seq_printf(file, "busy_time: %u idle_time: %u protm_time: %u\n", ++ kbdev->pm.backend.metrics.values.time_busy, ++ kbdev->pm.backend.metrics.values.time_idle, ++ kbdev->pm.backend.metrics.values.time_in_protm); ++#else ++ seq_printf(file, "busy_time: %u idle_time: %u\n", ++ kbdev->pm.backend.metrics.values.time_busy, ++ kbdev->pm.backend.metrics.values.time_idle); ++#endif ++ ++ return 0; ++} ++ ++static int kbasep_dvfs_utilization_debugfs_open(struct inode *in, ++ struct file *file) ++{ ++ return single_open(file, kbasep_dvfs_utilization_debugfs_show, ++ in->i_private); ++} ++ ++static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = { ++ .open = kbasep_dvfs_utilization_debugfs_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) ++{ ++ struct dentry *file; ++#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) ++ const mode_t mode = 0444; ++#else ++ const mode_t mode = 0400; ++#endif ++ ++ if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) ++ return; ++ ++ file = debugfs_create_file("dvfs_utilization", mode, ++ kbdev->mali_debugfs_directory, kbdev, ++ &kbasep_dvfs_utilization_debugfs_fops); ++ ++ if (IS_ERR_OR_NULL(file)) { ++ dev_warn(kbdev->dev, ++ "Unable to create dvfs debugfs entry"); ++ } ++} ++ ++#else ++/* ++ * Stub functions for when debugfs is disabled ++ */ ++void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h +new file mode 100644 +index 0000000..8334db7 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_DVFS_DEBUGFS_H_ ++#define _KBASE_DVFS_DEBUGFS_H_ ++ ++/* Forward declaration */ ++struct kbase_device; ++ ++/** ++ * kbase_dvfs_status_debugfs_init() - Create a debugfs entry for DVFS queries ++ * ++ * @kbdev: Pointer to the GPU device for which to create the debugfs entry ++ */ ++void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev); ++ ++#endif /* _KBASE_DVFS_DEBUGFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c +index c8b8f22..910c511 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include + #include + #include +@@ -45,7 +42,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru + KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); + + katom->status = KBASE_JD_ATOM_STATE_UNUSED; +- dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); ++ dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom); + wake_up(&katom->completed); + + return data; +@@ -82,7 +79,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve + + mutex_unlock(&ctx->event_mutex); + +- dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); ++ dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); + uevent->event_code = atom->event_code; + + uevent->atom_number = (atom - ctx->jctx.atoms); +@@ -154,7 +151,8 @@ static int kbase_event_coalesce(struct kbase_context *kctx) + const int event_count = kctx->event_coalesce_count; + + /* Join the list of pending events onto the tail of the main list +- and reset it */ ++ * and reset it ++ */ + list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); + kctx->event_coalesce_count = 0; + +@@ -166,7 +164,17 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) + { + struct kbase_device *kbdev = ctx->kbdev; + +- dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); ++ dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); ++ ++ if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { ++ dev_warn(kbdev->dev, ++ "%s: Atom %d (%pK) not completed (status %d)\n", ++ __func__, ++ kbase_jd_atom_id(atom->kctx, atom), ++ atom->kctx, ++ atom->status); ++ return; ++ } + + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { + if (atom->event_code == BASE_JD_EVENT_DONE) { +@@ -227,7 +235,7 @@ int kbase_event_init(struct kbase_context *kctx) + kctx->event_coalesce_count = 0; + kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); + +- if (NULL == kctx->event_workq) ++ if (kctx->event_workq == NULL) + return -EINVAL; + + return 0; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c +index 7a715b3..01557cd 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,79 +17,18 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include + #include +-#include + #include + #include + + /* Spin lock protecting all Mali fences as fence->lock. */ + static DEFINE_SPINLOCK(kbase_fence_lock); + +-static const char * +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +-kbase_fence_get_driver_name(struct fence *fence) +-#else +-kbase_fence_get_driver_name(struct dma_fence *fence) +-#endif +-{ +- return kbase_drv_name; +-} +- +-static const char * +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +-kbase_fence_get_timeline_name(struct fence *fence) +-#else +-kbase_fence_get_timeline_name(struct dma_fence *fence) +-#endif +-{ +- return kbase_timeline_name; +-} +- +-static bool +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +-kbase_fence_enable_signaling(struct fence *fence) +-#else +-kbase_fence_enable_signaling(struct dma_fence *fence) +-#endif +-{ +- return true; +-} +- +-static void +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +-kbase_fence_fence_value_str(struct fence *fence, char *str, int size) +-#else +-kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) +-#endif +-{ +-#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) +- snprintf(str, size, "%u", fence->seqno); +-#else +- snprintf(str, size, "%llu", fence->seqno); +-#endif +-} +- +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +-const struct fence_ops kbase_fence_ops = { +- .wait = fence_default_wait, +-#else +-const struct dma_fence_ops kbase_fence_ops = { +- .wait = dma_fence_default_wait, +-#endif +- .get_driver_name = kbase_fence_get_driver_name, +- .get_timeline_name = kbase_fence_get_timeline_name, +- .enable_signaling = kbase_fence_enable_signaling, +- .fence_value_str = kbase_fence_fence_value_str +-}; +- +- +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence * + kbase_fence_out_new(struct kbase_jd_atom *katom) + #else +@@ -96,7 +36,7 @@ struct dma_fence * + kbase_fence_out_new(struct kbase_jd_atom *katom) + #endif + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; + #else + struct dma_fence *fence; +@@ -157,7 +97,7 @@ kbase_fence_free_callbacks(struct kbase_jd_atom *katom) + return res; + } + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + int + kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, +@@ -211,4 +151,3 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom, + + return err; + } +- +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h +index 8e7024e..37823d5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_FENCE_H_ +@@ -35,7 +34,7 @@ + #include "mali_kbase_fence_defs.h" + #include "mali_kbase.h" + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + extern const struct fence_ops kbase_fence_ops; + #else + extern const struct dma_fence_ops kbase_fence_ops; +@@ -49,7 +48,7 @@ extern const struct dma_fence_ops kbase_fence_ops; + * @node: List head for linking this callback to the katom + */ + struct kbase_fence_cb { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence_cb fence_cb; + struct fence *fence; + #else +@@ -66,7 +65,7 @@ struct kbase_fence_cb { + * + * return: A new fence object on success, NULL on failure. + */ +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); + #else + struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); +@@ -88,6 +87,7 @@ struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); + #endif + + ++#if !MALI_USE_CSF + /** + * kbase_fence_out_remove() - Removes the output fence from atom + * @katom: Atom to remove output fence for +@@ -168,7 +168,7 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, + * Return: 0 on success: fence was either already signaled, or callback was + * set up. Negative error code is returned on error. + */ +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + int kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, + fence_func_t callback); +@@ -269,6 +269,7 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); + */ + #define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + ++#endif /* !MALI_USE_CSF */ + + /** + * kbase_fence_put() - Releases a reference to a fence +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h +index 607a95c..7a150bd 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_FENCE_DEFS_H_ +@@ -28,11 +27,9 @@ + * This file hides the compatibility issues with this for the rest the driver + */ + +-#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) +- + #include + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + + #include + +@@ -55,7 +52,7 @@ + + #include + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) + #define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ + (a)->status ?: 1 \ + : 0) +@@ -63,6 +60,4 @@ + + #endif /* < 4.10.0 */ + +-#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ +- + #endif /* _KBASE_FENCE_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c +new file mode 100644 +index 0000000..14ddf03 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c +@@ -0,0 +1,83 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++static const char * ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_get_driver_name(struct fence *fence) ++#else ++kbase_fence_get_driver_name(struct dma_fence *fence) ++#endif ++{ ++ return kbase_drv_name; ++} ++ ++static const char * ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_get_timeline_name(struct fence *fence) ++#else ++kbase_fence_get_timeline_name(struct dma_fence *fence) ++#endif ++{ ++ return kbase_timeline_name; ++} ++ ++static bool ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_enable_signaling(struct fence *fence) ++#else ++kbase_fence_enable_signaling(struct dma_fence *fence) ++#endif ++{ ++ return true; ++} ++ ++static void ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++kbase_fence_fence_value_str(struct fence *fence, char *str, int size) ++#else ++kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) ++#endif ++{ ++#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) ++ snprintf(str, size, "%u", fence->seqno); ++#else ++ snprintf(str, size, "%llu", fence->seqno); ++#endif ++} ++ ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) ++const struct fence_ops kbase_fence_ops = { ++ .wait = fence_default_wait, ++#else ++const struct dma_fence_ops kbase_fence_ops = { ++ .wait = dma_fence_default_wait, ++#endif ++ .get_driver_name = kbase_fence_get_driver_name, ++ .get_timeline_name = kbase_fence_get_timeline_name, ++ .enable_signaling = kbase_fence_enable_signaling, ++ .fence_value_str = kbase_fence_fence_value_str ++}; ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h +index 6428f08..88c96e0 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* NB taken from gator */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +index 93f1565..6eaae83 100755 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,24 +17,23 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + #include + +-#ifdef CONFIG_DEBUG_FS +-/** Show callback for the @c gpu_memory debugfs file. ++#if IS_ENABLED(CONFIG_DEBUG_FS) ++/** ++ * kbasep_gpu_memory_seq_show - Show callback for the @c gpu_memory debugfs file ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * + * This function is called to get the contents of the @c gpu_memory debugfs + * file. This is a report of current gpu memory usage. + * +- * @param sfile The debugfs entry +- * @param data Data associated with the entry +- * +- * @return 0 if successfully prints data in debugfs entry file +- * -1 if it encountered an error ++ * Return: ++ * * 0 if successfully prints data in debugfs entry file ++ * * -1 if it encountered an error + */ + + static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +index 28a871a..6d5423f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014, 2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_gpu_memory_debugfs.h +- * Header file for gpu_memory entry in debugfs ++ * DOC: Header file for gpu_memory entry in debugfs + * + */ + +@@ -34,8 +30,20 @@ + #include + #include + ++/* kbase_io_history_add - add new entry to the register access history ++ * ++ * @h: Pointer to the history data structure ++ * @addr: Register address ++ * @value: The value that is either read from or written to the register ++ * @write: 1 if it's a register write, 0 if it's a read ++ */ ++void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, ++ u32 value, u8 write); ++ + /** +- * @brief Initialize gpu_memory debugfs entry ++ * kbasep_gpu_memory_debugfs_init - Initialize gpu_memory debugfs entry ++ * ++ * @kbdev: Device pointer + */ + void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +index ae2458f..e4d52c9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +@@ -1,12 +1,12 @@ +-// SPDX-License-Identifier: GPL-2.0 ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -17,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Base kernel property query APIs + */ +@@ -32,9 +28,9 @@ + #include + #include + #include +-#include "mali_kbase_ioctl.h" ++#include + #include +-#include ++#include + #include + #include + +@@ -48,7 +44,7 @@ static void kbase_gpuprops_construct_coherent_groups( + u64 first_set, first_set_prev; + u32 num_groups = 0; + +- KBASE_DEBUG_ASSERT(NULL != props); ++ KBASE_DEBUG_ASSERT(props != NULL); + + props->coherency_info.coherency = props->raw_props.mem_features; + props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); +@@ -107,6 +103,71 @@ static void kbase_gpuprops_construct_coherent_groups( + props->coherency_info.num_groups = num_groups; + } + ++/** ++ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources ++ * @kbdev: The &struct kbase_device structure for the device ++ * @curr_config: The &struct curr_config_props structure to receive the result ++ * ++ * Fill the &struct curr_config_props structure with values from the GPU ++ * configuration registers. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, ++ struct curr_config_props * const curr_config) ++{ ++ struct kbase_current_config_regdump curr_config_regdump; ++ int err; ++ ++ if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) ++ return -EINVAL; ++ ++ /* If update not needed just return. */ ++ if (!curr_config->update_needed) ++ return 0; ++ ++ /* Dump relevant registers */ ++ err = kbase_backend_gpuprops_get_curr_config(kbdev, ++ &curr_config_regdump); ++ if (err) ++ return err; ++ ++ curr_config->l2_slices = ++ KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; ++ ++ curr_config->l2_present = ++ ((u64) curr_config_regdump.l2_present_hi << 32) + ++ curr_config_regdump.l2_present_lo; ++ ++ curr_config->shader_present = ++ ((u64) curr_config_regdump.shader_present_hi << 32) + ++ curr_config_regdump.shader_present_lo; ++ ++ curr_config->num_cores = hweight64(curr_config->shader_present); ++ ++ curr_config->update_needed = false; ++ ++ return 0; ++} ++ ++/** ++ * kbase_gpuprops_req_curr_config_update - Request Current Config Update ++ * @kbdev: The &struct kbase_device structure for the device ++ * ++ * Requests the current configuration to be updated next time the ++ * kbase_gpuprops_get_curr_config_props() is called. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) ++{ ++ if (WARN_ON(!kbdev)) ++ return -EINVAL; ++ ++ kbdev->gpu_props.curr_config.update_needed = true; ++ return 0; ++} ++ + /** + * kbase_gpuprops_get_props - Get the GPU configuration + * @gpu_props: The &struct base_gpu_props structure +@@ -124,8 +185,8 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, + int i; + int err; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); +- KBASE_DEBUG_ASSERT(NULL != gpu_props); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(gpu_props != NULL); + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get(kbdev, ®dump); +@@ -166,6 +227,10 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, + gpu_props->raw_props.thread_features = regdump.thread_features; + gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; + ++ gpu_props->raw_props.gpu_features = ++ ((u64) regdump.gpu_features_hi << 32) + ++ regdump.gpu_features_lo; ++ + return 0; + } + +@@ -182,6 +247,59 @@ void kbase_gpuprops_update_core_props_gpu_id( + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); + } + ++/** ++ * kbase_gpuprops_update_max_config_props - Updates the max config properties in ++ * the base_gpu_props. ++ * @base_props: The &struct base_gpu_props structure ++ * @kbdev: The &struct kbase_device structure for the device ++ * ++ * Updates the &struct base_gpu_props structure with the max config properties. ++ */ ++static void kbase_gpuprops_update_max_config_props( ++ struct base_gpu_props * const base_props, struct kbase_device *kbdev) ++{ ++ int l2_n = 0; ++ ++ if (WARN_ON(!kbdev) || WARN_ON(!base_props)) ++ return; ++ ++ /* return if the max_config is not set during arbif initialization */ ++ if (kbdev->gpu_props.max_config.core_mask == 0) ++ return; ++ ++ /* ++ * Set the base_props with the maximum config values to ensure that the ++ * user space will always be based on the maximum resources available. ++ */ ++ base_props->l2_props.num_l2_slices = ++ kbdev->gpu_props.max_config.l2_slices; ++ base_props->raw_props.shader_present = ++ kbdev->gpu_props.max_config.core_mask; ++ /* ++ * Update l2_present in the raw data to be consistent with the ++ * max_config.l2_slices number. ++ */ ++ base_props->raw_props.l2_present = 0; ++ for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { ++ base_props->raw_props.l2_present <<= 1; ++ base_props->raw_props.l2_present |= 0x1; ++ } ++ /* ++ * Update the coherency_info data using just one core group. For ++ * architectures where the max_config is provided by the arbiter it is ++ * not necessary to split the shader core groups in different coherent ++ * groups. ++ */ ++ base_props->coherency_info.coherency = ++ base_props->raw_props.mem_features; ++ base_props->coherency_info.num_core_groups = 1; ++ base_props->coherency_info.num_groups = 1; ++ base_props->coherency_info.group[0].core_mask = ++ kbdev->gpu_props.max_config.core_mask; ++ base_props->coherency_info.group[0].num_cores = ++ hweight32(kbdev->gpu_props.max_config.core_mask); ++} ++ + /** + * kbase_gpuprops_calculate_props - Calculate the derived properties + * @gpu_props: The &struct base_gpu_props structure +@@ -195,7 +313,6 @@ static void kbase_gpuprops_calculate_props( + { + int i; + u32 gpu_id; +- u32 product_id; + + /* Populate the base_gpu_props structure */ + kbase_gpuprops_update_core_props_gpu_id(gpu_props); +@@ -218,7 +335,8 @@ static void kbase_gpuprops_calculate_props( + + /* Field with number of l2 slices is added to MEM_FEATURES register + * since t76x. Below code assumes that for older GPU reserved bits will +- * be read as zero. */ ++ * be read as zero. ++ */ + gpu_props->l2_props.num_l2_slices = + KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + +@@ -251,9 +369,19 @@ static void kbase_gpuprops_calculate_props( + * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. + */ + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; +- product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; +- product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + ++#if MALI_USE_CSF ++ gpu_props->thread_props.max_registers = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, ++ 0U, 22); ++ gpu_props->thread_props.impl_tech = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, ++ 22U, 2); ++ gpu_props->thread_props.max_task_queue = ++ KBASE_UBFX32(gpu_props->raw_props.thread_features, ++ 24U, 8); ++ gpu_props->thread_props.max_thread_group_split = 0; ++#else + if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) { + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, +@@ -279,6 +407,7 @@ static void kbase_gpuprops_calculate_props( + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 30U, 2); + } ++#endif + + /* If values are not specified, then use defaults */ + if (gpu_props->thread_props.max_registers == 0) { +@@ -286,8 +415,30 @@ static void kbase_gpuprops_calculate_props( + gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; + gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } +- /* Initialize the coherent_group structure for each group */ +- kbase_gpuprops_construct_coherent_groups(gpu_props); ++ ++ /* ++ * If the maximum resources allocated information is available it is ++ * necessary to update the base_gpu_props with the max_config info to ++ * the userspace. This is applicable to systems that receive this ++ * information from the arbiter. ++ */ ++ if (kbdev->gpu_props.max_config.core_mask) ++ /* Update the max config properties in the base_gpu_props */ ++ kbase_gpuprops_update_max_config_props(gpu_props, ++ kbdev); ++ else ++ /* Initialize the coherent_group structure for each group */ ++ kbase_gpuprops_construct_coherent_groups(gpu_props); ++} ++ ++void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, ++ const struct max_config_props *max_config) ++{ ++ if (WARN_ON(!kbdev) || WARN_ON(!max_config)) ++ return; ++ ++ kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices; ++ kbdev->gpu_props.max_config.core_mask = max_config->core_mask; + } + + void kbase_gpuprops_set(struct kbase_device *kbdev) +@@ -295,7 +446,8 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) + struct kbase_gpu_props *gpu_props; + struct gpu_raw_gpu_props *raw; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); ++ if (WARN_ON(!kbdev)) ++ return; + gpu_props = &kbdev->gpu_props; + raw = &gpu_props->props.raw_props; + +@@ -315,9 +467,19 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) + gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); + + gpu_props->num_cores = hweight64(raw->shader_present); +- gpu_props->num_core_groups = hweight64(raw->l2_present); ++ gpu_props->num_core_groups = ++ gpu_props->props.coherency_info.num_core_groups; + gpu_props->num_address_spaces = hweight32(raw->as_present); + gpu_props->num_job_slots = hweight32(raw->js_present); ++ ++ /* ++ * Current configuration is used on HW interactions so that the maximum ++ * config is just used for user space avoiding interactions with parts ++ * of the hardware that might not be allocated to the kbase instance at ++ * that moment. ++ */ ++ kbase_gpuprops_req_curr_config_update(kbdev); ++ kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); + } + + int kbase_gpuprops_set_features(struct kbase_device *kbdev) +@@ -355,13 +517,34 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev) + * in sysfs. + */ + static u8 override_l2_size; +-module_param(override_l2_size, byte, 0); ++module_param(override_l2_size, byte, 0000); + MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing"); + + static u8 override_l2_hash; +-module_param(override_l2_hash, byte, 0); ++module_param(override_l2_hash, byte, 0000); + MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); + ++static u32 l2_hash_values[ASN_HASH_COUNT] = { ++ 0, ++}; ++static int num_override_l2_hash_values; ++module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); ++MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); ++ ++/* Definitions for range of supported user defined hash functions for GPUs ++ * that support L2_CONFIG and not ASN_HASH features. Supported hash function ++ * range from 0b1000-0b1111 inclusive. Selection of any other values will ++ * lead to undefined behavior. ++ */ ++#define USER_DEFINED_HASH_LO ((u8)0x08) ++#define USER_DEFINED_HASH_HI ((u8)0x0F) ++ ++enum l2_config_override_result { ++ L2_CONFIG_OVERRIDE_FAIL = -1, ++ L2_CONFIG_OVERRIDE_NONE, ++ L2_CONFIG_OVERRIDE_OK, ++}; ++ + /** + * kbase_read_l2_config_from_dt - Read L2 configuration + * @kbdev: The kbase device for which to get the L2 configuration. +@@ -370,30 +553,67 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); + * Override values in module parameters take priority over override values in + * device tree. + * +- * Return: true if either size or hash was overridden, false if no overrides +- * were found. ++ * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly ++ * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. ++ * L2_CONFIG_OVERRIDE_FAIL otherwise. + */ +-static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev) ++static enum l2_config_override_result ++kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) + { + struct device_node *np = kbdev->dev->of_node; + + if (!np) +- return false; ++ return L2_CONFIG_OVERRIDE_NONE; + + if (override_l2_size) + kbdev->l2_size_override = override_l2_size; + else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) + kbdev->l2_size_override = 0; + +- if (override_l2_hash) ++ /* Check overriding value is supported, if not will result in ++ * undefined behavior. ++ */ ++ if (override_l2_hash >= USER_DEFINED_HASH_LO && ++ override_l2_hash <= USER_DEFINED_HASH_HI) + kbdev->l2_hash_override = override_l2_hash; + else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) + kbdev->l2_hash_override = 0; + +- if (kbdev->l2_size_override || kbdev->l2_hash_override) +- return true; ++ kbdev->l2_hash_values_override = false; ++ if (num_override_l2_hash_values) { ++ int i; ++ ++ kbdev->l2_hash_values_override = true; ++ for (i = 0; i < num_override_l2_hash_values; i++) ++ kbdev->l2_hash_values[i] = l2_hash_values[i]; ++ } else if (!of_property_read_u32_array(np, "l2-hash-values", ++ kbdev->l2_hash_values, ++ ASN_HASH_COUNT)) ++ kbdev->l2_hash_values_override = true; ++ ++ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && ++ (kbdev->l2_hash_override)) { ++ dev_err(kbdev->dev, "l2-hash not supported\n"); ++ return L2_CONFIG_OVERRIDE_FAIL; ++ } ++ ++ if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && ++ (kbdev->l2_hash_values_override)) { ++ dev_err(kbdev->dev, "l2-hash-values not supported\n"); ++ return L2_CONFIG_OVERRIDE_FAIL; ++ } ++ ++ if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) { ++ dev_err(kbdev->dev, ++ "both l2-hash & l2-hash-values not supported\n"); ++ return L2_CONFIG_OVERRIDE_FAIL; ++ } ++ ++ if (kbdev->l2_size_override || kbdev->l2_hash_override || ++ kbdev->l2_hash_values_override) ++ return L2_CONFIG_OVERRIDE_OK; + +- return false; ++ return L2_CONFIG_OVERRIDE_NONE; + } + + int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) +@@ -405,8 +625,25 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) + struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; + + /* Check for L2 cache size & hash overrides */ +- if (!kbase_read_l2_config_from_dt(kbdev)) +- return 0; ++ switch (kbase_read_l2_config_from_dt(kbdev)) { ++ case L2_CONFIG_OVERRIDE_FAIL: ++ err = -EIO; ++ goto exit; ++ case L2_CONFIG_OVERRIDE_NONE: ++ goto exit; ++ default: ++ break; ++ } ++ ++ /* pm.active_count is expected to be 1 here, which is set in ++ * kbase_hwaccess_pm_powerup(). ++ */ ++ WARN_ON(kbdev->pm.active_count != 1); ++ /* The new settings for L2 cache can only be applied when it is ++ * off, so first do the power down. ++ */ ++ kbase_pm_context_idle(kbdev); ++ kbase_pm_wait_for_desired_state(kbdev); + + /* Need L2 to get powered to reflect to L2_FEATURES */ + kbase_pm_context_active(kbdev); +@@ -417,21 +654,21 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) + /* Dump L2_FEATURES register */ + err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); + if (err) +- goto idle_gpu; ++ goto exit; + + dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", +- regdump.l2_features); ++ regdump.l2_features); ++ dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", ++ regdump.l2_config); ++ + + /* Update gpuprops with reflected L2_FEATURES */ + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->l2_props.log2_cache_size = + KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); +- +-idle_gpu: +- /* Let GPU idle */ +- kbase_pm_context_idle(kbdev); + } + ++exit: + return err; + } + +@@ -511,7 +748,7 @@ static struct { + PROP(RAW_THREAD_FEATURES, raw_props.thread_features), + PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), + PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), +- ++ PROP(RAW_GPU_FEATURES, raw_props.gpu_features), + PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), + PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), + PROP(COHERENCY_COHERENCY, coherency_info.coherency), +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h +index 5eee794..b20b99b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2015, 2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,29 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * +- * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. +- * + */ + +- +- + /** +- * @file mali_kbase_gpuprops.h +- * Base kernel property query APIs ++ * DOC: Base kernel property query APIs + */ + + #ifndef _KBASE_GPUPROPS_H_ +@@ -64,11 +46,10 @@ struct kbase_device; + (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + + /** +- * @brief Set up Kbase GPU properties. ++ * kbase_gpuprops_set - Set up Kbase GPU properties. ++ * @kbdev: The struct kbase_device structure for the device + * + * Set up Kbase GPU properties with information from the GPU registers +- * +- * @param kbdev The struct kbase_device structure for the device + */ + void kbase_gpuprops_set(struct kbase_device *kbdev); + +@@ -89,6 +70,8 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev); + * @kbdev: Device pointer + * + * This function updates l2_features and the log2 cache size. ++ * The function expects GPU to be powered up and value of pm.active_count ++ * to be 1. + * + * Return: Zero on success, Linux error code for failure + */ +@@ -132,4 +115,38 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev); + void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props); + ++/** ++ * kbase_gpuprops_set_max_config - Set the max config information ++ * @kbdev: Device pointer ++ * @max_config: Maximum configuration data to be updated ++ * ++ * This function sets max_config in the kbase_gpu_props. ++ */ ++void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, ++ const struct max_config_props *max_config); ++ ++/** ++ * kbase_gpuprops_get_curr_config_props - Get the current allocated resources ++ * @kbdev: The &struct kbase_device structure for the device ++ * @curr_config: The &struct curr_config_props structure to receive the result ++ * ++ * Fill the &struct curr_config_props structure with values from the GPU ++ * configuration registers. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, ++ struct curr_config_props * const curr_config); ++ ++/** ++ * kbase_gpuprops_req_curr_config_update - Request Current Config Update ++ * @kbdev: The &struct kbase_device structure for the device ++ * ++ * Requests the current configuration to be updated next time the ++ * kbase_gpuprops_get_curr_config_props() is called. ++ * ++ * Return: Zero on success, Linux error code on failure ++ */ ++int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); ++ + #endif /* _KBASE_GPUPROPS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +index ec6f1c3..02705a0 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,21 +17,16 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_gpuprops_types.h +- * Base kernel property query APIs ++ * DOC: Base kernel property query APIs + */ + + #ifndef _KBASE_GPUPROPS_TYPES_H_ + #define _KBASE_GPUPROPS_TYPES_H_ + +-#include "mali_base_kernel.h" ++#include + + #define KBASE_GPU_SPEED_MHZ 123 + #define KBASE_GPU_PC_SIZE_LOG2 24U +@@ -38,6 +34,7 @@ + struct kbase_gpuprops_regdump { + u32 gpu_id; + u32 l2_features; ++ u32 l2_config; + u32 core_features; + u32 tiler_features; + u32 mem_features; +@@ -60,6 +57,30 @@ struct kbase_gpuprops_regdump { + u32 stack_present_lo; + u32 stack_present_hi; + u32 coherency_features; ++ u32 gpu_features_lo; ++ u32 gpu_features_hi; ++}; ++ ++/** ++ * struct kbase_current_config_regdump - Register dump for current resources ++ * allocated to the GPU. ++ * @mem_features: Memory system features. Contains information about the ++ * features of the memory system. Used here to get the L2 slice ++ * count. ++ * @shader_present_lo: Shader core present bitmap. Low word. ++ * @shader_present_hi: Shader core present bitmap. High word. ++ * @l2_present_lo: L2 cache present bitmap. Low word. ++ * @l2_present_hi: L2 cache present bitmap. High word. ++ * ++ * Register dump structure used to store the resgisters data realated to the ++ * current resources allocated to the GPU. ++ */ ++struct kbase_current_config_regdump { ++ u32 mem_features; ++ u32 shader_present_lo; ++ u32 shader_present_hi; ++ u32 l2_present_lo; ++ u32 l2_present_hi; + }; + + struct kbase_gpu_cache_props { +@@ -76,6 +97,50 @@ struct kbase_gpu_mmu_props { + u8 pa_bits; + }; + ++/** ++ * struct max_config_props - Properties based on the maximum resources ++ * available. ++ * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU ++ * during runtime. ++ * @padding: Padding to a multiple of 64 bits. ++ * @core_mask: Largest core mask bitmap that can be assigned to the GPU during ++ * runtime. ++ * ++ * Properties based on the maximum resources available (not necessarly ++ * allocated at that moment). Used to provide the maximum configuration to the ++ * userspace allowing the applications to allocate enough resources in case the ++ * real allocated resources change. ++ */ ++struct max_config_props { ++ u8 l2_slices; ++ u8 padding[3]; ++ u32 core_mask; ++}; ++ ++/** ++ * struct curr_config_props - Properties based on the current resources ++ * allocated to the GPU. ++ * @l2_present: Current L2 present bitmap that is allocated to the GPU. ++ * @shader_present: Current shader present bitmap that is allocated to the GPU. ++ * @num_cores: Current number of shader cores allocated to the GPU. ++ * @l2_slices: Current number of L2 slices allocated to the GPU. ++ * @update_needed: Defines if it is necessary to re-read the registers to ++ * update the current allocated resources. ++ * @padding: Padding to a multiple of 64 bits. ++ * ++ * Properties based on the current resource available. Used for operations with ++ * hardware interactions to avoid using userspace data that can be based on ++ * the maximum resource available. ++ */ ++struct curr_config_props { ++ u64 l2_present; ++ u64 shader_present; ++ u16 num_cores; ++ u8 l2_slices; ++ bool update_needed; ++ u8 padding[4]; ++}; ++ + struct kbase_gpu_props { + /* kernel-only properties */ + u8 num_cores; +@@ -88,6 +153,12 @@ struct kbase_gpu_props { + struct kbase_gpu_mem_props mem; + struct kbase_gpu_mmu_props mmu; + ++ /* Properties based on the current resource available */ ++ struct curr_config_props curr_config; ++ ++ /* Properties based on the maximum resource available */ ++ struct max_config_props max_config; ++ + /* Properties shared with userspace */ + struct base_gpu_props props; + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c +index 6a47c9d..2a20a3d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_gwt.h" +@@ -71,6 +70,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) + INIT_LIST_HEAD(&kctx->gwt_current_list); + INIT_LIST_HEAD(&kctx->gwt_snapshot_list); + ++#if !MALI_USE_CSF + /* If GWT is enabled using new vector dumping format + * from user space, back up status of the job serialization flag and + * use full serialisation of jobs for dumping. +@@ -80,6 +80,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) + kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | + KBASE_SERIALIZE_INTER_SLOT; + ++#endif + /* Mark gwt enabled before making pages read only in case a + write page fault is triggered while we're still in this loop. + (kbase_gpu_vm_lock() doesn't prevent this!) +@@ -113,7 +114,9 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx) + kfree(pos); + } + ++#if !MALI_USE_CSF + kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; ++#endif + + kbase_gpu_gwt_setup_pages(kctx, ~0UL); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h +index 7e7746e..30de43d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,13 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #if !defined(_KBASE_GWT_H) + #define _KBASE_GWT_H + + #include +-#include ++#include + + /** + * kbase_gpu_gwt_start - Start the GPU write tracking +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c +index f8a9248..7ad583c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Run-time work-arounds helpers + */ +@@ -68,6 +65,9 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) + case GPU_ID2_PRODUCT_TBEX: + features = base_hw_features_tBEx; + break; ++ case GPU_ID2_PRODUCT_TBAX: ++ features = base_hw_features_tBAx; ++ break; + case GPU_ID2_PRODUCT_TDUX: + features = base_hw_features_tDUx; + break; +@@ -81,14 +81,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) + case GPU_ID2_PRODUCT_TVAX: + features = base_hw_features_tVAx; + break; +- case GPU_ID2_PRODUCT_TTUX: +- /* Fallthrough */ +- case GPU_ID2_PRODUCT_LTUX: +- features = base_hw_features_tTUx; +- break; +- case GPU_ID2_PRODUCT_TE2X: +- features = base_hw_features_tE2x; +- break; + default: + features = base_hw_features_generic; + break; +@@ -140,103 +132,99 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( + }; + + static const struct base_hw_product base_hw_products[] = { +- {GPU_ID2_PRODUCT_TMIX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 1), +- base_hw_issues_tMIx_r0p0_05dev0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, +- {U32_MAX /* sentinel value */, NULL} } }, +- +- {GPU_ID2_PRODUCT_THEX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, +- {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, +- {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, +- {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TSIX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, +- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, +- {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TDVX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TNOX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TGOX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, +- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TTRX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, +- {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, +- {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TNAX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1}, +- {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_LBEX, +- {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, +- {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TBEX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, +- {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, +- {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TDUX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TODX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_LODX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TGRX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TVAX, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TTUX, +- {{GPU_ID2_VERSION_MAKE(2, 0, 0), base_hw_issues_tTUx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_LTUX, +- {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0}, +- {U32_MAX, NULL} } }, +- +- {GPU_ID2_PRODUCT_TE2X, +- {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0}, +- {U32_MAX, NULL} } }, ++ { GPU_ID2_PRODUCT_TMIX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 1), ++ base_hw_issues_tMIx_r0p0_05dev0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, ++ { U32_MAX /* sentinel value */, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_THEX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, ++ { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TSIX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TDVX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TNOX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TGOX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TTRX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TNAX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_LBEX, ++ { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, ++ { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TBEX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, ++ { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TBAX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TDUX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TODX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, ++ { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_LODX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TGRX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0 }, ++ { U32_MAX, NULL } } }, ++ ++ { GPU_ID2_PRODUCT_TVAX, ++ { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, ++ { U32_MAX, NULL } } }, + }; + + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; +@@ -269,8 +257,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( + } + + /* Check whether this is a candidate for most recent +- known version not later than the actual +- version. */ ++ * known version not later than the actual version. ++ */ + if ((version > product->map[v].version) && + (product->map[v].version >= fallback_version)) { + #if MALI_CUSTOMER_RELEASE +@@ -287,7 +275,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( + + if ((issues == NULL) && (fallback_issues != NULL)) { + /* Fall back to the issue set of the most recent known +- version not later than the actual version. */ ++ * version not later than the actual version. ++ */ + issues = fallback_issues; + + #if MALI_CUSTOMER_RELEASE +@@ -340,7 +329,8 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) + + #if !MALI_CUSTOMER_RELEASE + /* The GPU ID might have been replaced with the last +- known version of the same GPU. */ ++ * known version of the same GPU. ++ */ + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + #endif + } else { +@@ -374,6 +364,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) + case GPU_ID2_PRODUCT_TBEX: + issues = base_hw_issues_model_tBEx; + break; ++ case GPU_ID2_PRODUCT_TBAX: ++ issues = base_hw_issues_model_tBAx; ++ break; + case GPU_ID2_PRODUCT_TDUX: + issues = base_hw_issues_model_tDUx; + break; +@@ -387,13 +380,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) + case GPU_ID2_PRODUCT_TVAX: + issues = base_hw_issues_model_tVAx; + break; +- case GPU_ID2_PRODUCT_TTUX: +- case GPU_ID2_PRODUCT_LTUX: +- issues = base_hw_issues_model_tTUx; +- break; +- case GPU_ID2_PRODUCT_TE2X: +- issues = base_hw_issues_model_tE2x; +- break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h +index f386b16..6c04a23 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file +- * Run-time work-arounds helpers ++ * DOC: Run-time work-arounds helpers + */ + + #ifndef _KBASE_HW_H_ +@@ -33,13 +29,17 @@ + #include "mali_kbase_defs.h" + + /** +- * @brief Tell whether a work-around should be enabled ++ * Tell whether a work-around should be enabled ++ * @kbdev: Device pointer ++ * @issue: issue to be checked + */ + #define kbase_hw_has_issue(kbdev, issue)\ + test_bit(issue, &(kbdev)->hw_issues_mask[0]) + + /** +- * @brief Tell whether a feature is supported ++ * Tell whether a feature is supported ++ * @kbdev: Device pointer ++ * @feature: feature to be checked + */ + #define kbase_hw_has_feature(kbdev, feature)\ + test_bit(feature, &(kbdev)->hw_features_mask[0]) +@@ -63,7 +63,8 @@ + int kbase_hw_set_issues_mask(struct kbase_device *kbdev); + + /** +- * @brief Set the features mask depending on the GPU ID ++ * Set the features mask depending on the GPU ID ++ * @kbdev: Device pointer + */ + void kbase_hw_set_features_mask(struct kbase_device *kbdev); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h +index 89df251..0da4eb2 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * HW access backend common APIs + */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +index 124a2d9..62a6ec5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,20 +17,16 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /** +- * @file mali_kbase_hwaccess_gpu_defs.h +- * HW access common definitions ++ * DOC: HW access common definitions + */ + + #ifndef _KBASE_HWACCESS_DEFS_H_ + #define _KBASE_HWACCESS_DEFS_H_ + +-#include ++#include + + /** + * struct kbase_hwaccess_data - object encapsulating the GPU backend specific +@@ -43,7 +40,9 @@ + * @backend: GPU backend specific data for HW access layer + */ + struct kbase_hwaccess_data { ++#if !MALI_USE_CSF + struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; ++#endif + + struct kbase_backend_data backend; + }; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +index 3ae0dbe..71ccc91 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,25 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * +- * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. +- * + */ + +- + /** + * Base kernel property query backend APIs + */ +@@ -55,6 +39,23 @@ + int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + ++/** ++ * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with ++ * relevant GPU properties read from ++ * the GPU registers. ++ * @kbdev: Device pointer. ++ * @curr_config_regdump: Pointer to struct kbase_current_config_regdump ++ * structure. ++ * ++ * The caller should ensure that GPU remains powered-on during this function and ++ * the caller must ensure this function returns success before using the values ++ * returned in the curr_config_regdump in any part of the kernel. ++ * ++ * Return: Zero for succeess or a Linux error code ++ */ ++int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, ++ struct kbase_current_config_regdump *curr_config_regdump); ++ + /** + * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read + * from GPU +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h +index be85491..f836953 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * HW Access instrumentation common APIs + */ +@@ -29,27 +26,26 @@ + #ifndef _KBASE_HWACCESS_INSTR_H_ + #define _KBASE_HWACCESS_INSTR_H_ + +-#include ++#include + + /** + * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. + * @dump_buffer: GPU address to write counters to. + * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. +- * @jm_bm: counters selection bitmask (JM). ++ * @fe_bm: counters selection bitmask (Front End). + * @shader_bm: counters selection bitmask (Shader). + * @tiler_bm: counters selection bitmask (Tiler). + * @mmu_l2_bm: counters selection bitmask (MMU_L2). +- * @use_secondary: use secondary performance counters set for applicable +- * counter blocks. ++ * @counter_set: the performance counter set to use. + */ + struct kbase_instr_hwcnt_enable { + u64 dump_buffer; + u64 dump_buffer_bytes; +- u32 jm_bm; ++ u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; +- bool use_secondary; ++ u8 counter_set; + }; + + /** +@@ -139,7 +135,7 @@ int kbase_instr_backend_init(struct kbase_device *kbdev); + */ + void kbase_instr_backend_term(struct kbase_device *kbdev); + +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS ++#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + /** + * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the + * hardware counter set. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +index 3d5934e..8689647 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * HW access job manager common APIs + */ +@@ -31,7 +29,7 @@ + /** + * kbase_backend_run_atom() - Run an atom on the GPU + * @kbdev: Device pointer +- * @atom: Atom to run ++ * @katom: Atom to run + * + * Caller must hold the HW access lock + */ +@@ -154,6 +152,7 @@ void kbase_backend_cache_clean(struct kbase_device *kbdev, + void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + ++#if !MALI_USE_CSF + /** + * kbase_backend_complete_wq_post_sched - Perform backend-specific actions + * required on completing an atom, after +@@ -166,6 +165,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, + */ + void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req); ++#endif /* !MALI_USE_CSF */ + + /** + * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +index bbaf6ea..36bbe2d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,14 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /** +- * @file mali_kbase_hwaccess_pm.h +- * HW access power manager common APIs ++ * DOC: HW access power manager common APIs + */ + + #ifndef _KBASE_HWACCESS_PM_H_ +@@ -32,7 +29,7 @@ + #include + #include + +-#include ++#include + + /* Forward definition - see mali_kbase.h */ + struct kbase_device; +@@ -80,24 +77,21 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + * the time this function returns, regardless of whether or not the active power + * policy asks for the GPU to be powered off. + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ + void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); + + /** + * Perform any backend-specific actions to suspend the GPU + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ + void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); + + /** + * Perform any backend-specific actions to resume the GPU from a suspend + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ + void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + +@@ -105,8 +99,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + * Perform any required actions for activating the GPU. Called when the first + * context goes active. + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ + void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + +@@ -114,35 +107,43 @@ void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + * Perform any required actions for idling the GPU. Called when the last + * context goes idle. + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ + void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); + +- ++#if MALI_USE_CSF ++/** ++ * Set the debug core mask. ++ * ++ * This determines which cores the power manager is allowed to use. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @new_core_mask: The core mask to use ++ */ ++void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, ++ u64 new_core_mask); ++#else + /** + * Set the debug core mask. + * + * This determines which cores the power manager is allowed to use. + * +- * @param kbdev The kbase device structure for the device (must be a +- * valid pointer) +- * @param new_core_mask_js0 The core mask to use for job slot 0 +- * @param new_core_mask_js0 The core mask to use for job slot 1 +- * @param new_core_mask_js0 The core mask to use for job slot 2 ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @new_core_mask_js0: The core mask to use for job slot 0 ++ * @new_core_mask_js1: The core mask to use for job slot 1 ++ * @new_core_mask_js2: The core mask to use for job slot 2 + */ + void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2); +- ++#endif /* MALI_USE_CSF */ + + /** + * Get the current policy. + * + * Returns the policy that is currently active. + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * @return The current policy + */ +@@ -152,10 +153,9 @@ const struct kbase_pm_ca_policy + /** + * Change the policy to the one specified. + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) +- * @param policy The policy to change to (valid pointer returned from +- * @ref kbase_pm_ca_list_policies) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @policy: The policy to change to (valid pointer returned from ++ * @ref kbase_pm_ca_list_policies) + */ + void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_ca_policy *policy); +@@ -163,23 +163,20 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + /** + * Retrieve a static list of the available policies. + * +- * @param[out] policies An array pointer to take the list of policies. This may +- * be NULL. The contents of this array must not be +- * modified. ++ * @policies: An array pointer to take the list of policies. This may be NULL. ++ * The contents of this array must not be modified. + * + * @return The number of policies + */ + int + kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); + +- + /** + * Get the current policy. + * + * Returns the policy that is currently active. + * +- * @param kbdev The kbase device structure for the device (must be a valid +- * pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * @return The current policy + */ +@@ -188,9 +185,9 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + /** + * Change the policy to the one specified. + * +- * @param kbdev The kbase device structure for the device (must be a valid ++ * @kbdev: The kbase device structure for the device (must be a valid + * pointer) +- * @param policy The policy to change to (valid pointer returned from ++ * @policy: The policy to change to (valid pointer returned from + * @ref kbase_pm_list_policies) + */ + void kbase_pm_set_policy(struct kbase_device *kbdev, +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h +index a61e5b9..8a4ece4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014,2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,13 +16,6 @@ + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- */ +- +- +-/** + * + */ + +@@ -30,13 +24,27 @@ + + /** + * kbase_backend_get_gpu_time() - Get current GPU time ++ * @kbdev: Device pointer ++ * @cycle_counter: Pointer to u64 to store cycle counter in. ++ * @system_time: Pointer to u64 to store system time in ++ * @ts: Pointer to struct timespec to store current monotonic ++ * time in ++ */ ++void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, ++ u64 *system_time, struct timespec64 *ts); ++ ++/** ++ * kbase_backend_get_gpu_time_norequest() - Get current GPU time without ++ * request/release cycle counter + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +-void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, +- u64 *system_time, struct timespec64 *ts); ++void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, ++ u64 *cycle_counter, ++ u64 *system_time, ++ struct timespec64 *ts); + + #endif /* _KBASE_BACKEND_TIME_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c +index 14ec5cb..ea4893d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -28,9 +27,6 @@ + #include "mali_kbase_hwcnt_accumulator.h" + #include "mali_kbase_hwcnt_backend.h" + #include "mali_kbase_hwcnt_types.h" +-#include "mali_malisw.h" +-#include "mali_kbase_debug.h" +-#include "mali_kbase_linux.h" + + #include + #include +@@ -51,6 +47,7 @@ enum kbase_hwcnt_accum_state { + + /** + * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. ++ * @metadata: Pointer to immutable hwcnt metadata. + * @backend: Pointer to created counter backend. + * @state: The current state of the accumulator. + * - State transition from disabled->enabled or +@@ -89,6 +86,7 @@ enum kbase_hwcnt_accum_state { + * accum_lock. + */ + struct kbase_hwcnt_accumulator { ++ const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_backend *backend; + enum kbase_hwcnt_accum_state state; + struct kbase_hwcnt_enable_map enable_map; +@@ -117,6 +115,10 @@ struct kbase_hwcnt_accumulator { + * state_lock. + * - Can be read while holding either lock. + * @accum: Hardware counter accumulator structure. ++ * @wq: Centralized workqueue for users of hardware counters to ++ * submit async hardware counter related work. Never directly ++ * called, but it's expected that a lot of the functions in this ++ * API will end up called from the enqueued async work. + */ + struct kbase_hwcnt_context { + const struct kbase_hwcnt_backend_interface *iface; +@@ -125,6 +127,7 @@ struct kbase_hwcnt_context { + struct mutex accum_lock; + bool accum_inited; + struct kbase_hwcnt_accumulator accum; ++ struct workqueue_struct *wq; + }; + + int kbase_hwcnt_context_init( +@@ -138,7 +141,7 @@ int kbase_hwcnt_context_init( + + hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); + if (!hctx) +- return -ENOMEM; ++ goto err_alloc_hctx; + + hctx->iface = iface; + spin_lock_init(&hctx->state_lock); +@@ -146,11 +149,21 @@ int kbase_hwcnt_context_init( + mutex_init(&hctx->accum_lock); + hctx->accum_inited = false; + ++ hctx->wq = ++ alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); ++ if (!hctx->wq) ++ goto err_alloc_workqueue; ++ + *out_hctx = hctx; + + return 0; ++ ++ destroy_workqueue(hctx->wq); ++err_alloc_workqueue: ++ kfree(hctx); ++err_alloc_hctx: ++ return -ENOMEM; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); + + void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) + { +@@ -159,9 +172,13 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) + + /* Make sure we didn't leak the accumulator */ + WARN_ON(hctx->accum_inited); ++ ++ /* We don't expect any work to be pending on this workqueue. ++ * Regardless, this will safely drain and complete the work. ++ */ ++ destroy_workqueue(hctx->wq); + kfree(hctx); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); + + /** + * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. +@@ -197,22 +214,23 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) + if (errcode) + goto error; + ++ hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info); + hctx->accum.state = ACCUM_STATE_ERROR; + +- errcode = kbase_hwcnt_enable_map_alloc( +- hctx->iface->metadata, &hctx->accum.enable_map); ++ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, ++ &hctx->accum.enable_map); + if (errcode) + goto error; + + hctx->accum.enable_map_any_enabled = false; + +- errcode = kbase_hwcnt_dump_buffer_alloc( +- hctx->iface->metadata, &hctx->accum.accum_buf); ++ errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, ++ &hctx->accum.accum_buf); + if (errcode) + goto error; + +- errcode = kbase_hwcnt_enable_map_alloc( +- hctx->iface->metadata, &hctx->accum.scratch_map); ++ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, ++ &hctx->accum.scratch_map); + if (errcode) + goto error; + +@@ -242,6 +260,7 @@ static void kbasep_hwcnt_accumulator_disable( + bool backend_enabled = false; + struct kbase_hwcnt_accumulator *accum; + unsigned long flags; ++ u64 dump_time_ns; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); +@@ -271,7 +290,7 @@ static void kbasep_hwcnt_accumulator_disable( + goto disable; + + /* Try and accumulate before disabling */ +- errcode = hctx->iface->dump_request(accum->backend); ++ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); + if (errcode) + goto disable; + +@@ -365,8 +384,8 @@ static int kbasep_hwcnt_accumulator_dump( + WARN_ON(!hctx); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); +- WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); +- WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); ++ WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata)); ++ WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata)); + WARN_ON(!hctx->accum_inited); + lockdep_assert_held(&hctx->accum_lock); + +@@ -419,23 +438,16 @@ static int kbasep_hwcnt_accumulator_dump( + + /* Initiate the dump if the backend is enabled. */ + if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { +- /* Disable pre-emption, to make the timestamp as accurate as +- * possible. +- */ +- preempt_disable(); +- { ++ if (dump_buf) { ++ errcode = hctx->iface->dump_request( ++ accum->backend, &dump_time_ns); ++ dump_requested = true; ++ } else { + dump_time_ns = hctx->iface->timestamp_ns( +- accum->backend); +- if (dump_buf) { +- errcode = hctx->iface->dump_request( + accum->backend); +- dump_requested = true; +- } else { +- errcode = hctx->iface->dump_clear( +- accum->backend); +- } ++ errcode = hctx->iface->dump_clear(accum->backend); + } +- preempt_enable(); ++ + if (errcode) + goto error; + } else { +@@ -615,7 +627,6 @@ int kbase_hwcnt_accumulator_acquire( + + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); + + void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) + { +@@ -650,7 +661,6 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); + + void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) + { +@@ -669,7 +679,6 @@ void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) + + mutex_unlock(&hctx->accum_lock); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); + + bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) + { +@@ -698,7 +707,6 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) + + return atomic_disabled; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); + + void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) + { +@@ -718,7 +726,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) + + spin_unlock_irqrestore(&hctx->state_lock, flags); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); + + const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx) +@@ -726,9 +733,17 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + if (!hctx) + return NULL; + +- return hctx->iface->metadata; ++ return hctx->iface->metadata(hctx->iface->info); ++} ++ ++bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, ++ struct work_struct *work) ++{ ++ if (WARN_ON(!hctx) || WARN_ON(!work)) ++ return false; ++ ++ return queue_work(hctx->wq, work); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); + + int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, +@@ -745,8 +760,8 @@ int kbase_hwcnt_accumulator_set_counters( + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + +- if ((new_map->metadata != hctx->iface->metadata) || +- (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) ++ if ((new_map->metadata != hctx->accum.metadata) || ++ (dump_buf && (dump_buf->metadata != hctx->accum.metadata))) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); +@@ -758,7 +773,6 @@ int kbase_hwcnt_accumulator_set_counters( + + return errcode; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); + + int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, +@@ -774,7 +788,7 @@ int kbase_hwcnt_accumulator_dump( + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + +- if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) ++ if (dump_buf && (dump_buf->metadata != hctx->accum.metadata)) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); +@@ -786,7 +800,6 @@ int kbase_hwcnt_accumulator_dump( + + return errcode; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); + + u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) + { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h +index eb82ea4..4887eaa 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h +index b7aa0e1..0b5a188 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -41,11 +40,25 @@ struct kbase_hwcnt_dump_buffer; + struct kbase_hwcnt_backend_info; + + /* +- * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter +- * backend, used to perform dumps. ++ * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter ++ * backend, used to perform dumps. + */ + struct kbase_hwcnt_backend; + ++/* ++ * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter ++ * metadata that describes the layout ++ * of the counter data structures. ++ * @info: Non-NULL pointer to backend info. ++ * ++ * Multiple calls to this function with the same info are guaranteed to return ++ * the same metadata object each time. ++ * ++ * Return: Non-NULL pointer to immutable hardware counter metadata. ++ */ ++typedef const struct kbase_hwcnt_metadata * ++kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info); ++ + /** + * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. + * @info: Non-NULL pointer to backend info. +@@ -56,16 +69,15 @@ struct kbase_hwcnt_backend; + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_init_fn)( +- const struct kbase_hwcnt_backend_info *info, +- struct kbase_hwcnt_backend **out_backend); ++typedef int ++kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend); + + /** + * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. + * @backend: Pointer to backend to be terminated. + */ +-typedef void (*kbase_hwcnt_backend_term_fn)( +- struct kbase_hwcnt_backend *backend); ++typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); + + /** + * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend +@@ -74,8 +86,8 @@ typedef void (*kbase_hwcnt_backend_term_fn)( + * + * Return: Backend timestamp in nanoseconds. + */ +-typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( +- struct kbase_hwcnt_backend *backend); ++typedef u64 ++kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend); + + /** + * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the +@@ -90,7 +102,7 @@ typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_dump_enable_fn)( ++typedef int kbase_hwcnt_backend_dump_enable_fn( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +@@ -106,7 +118,7 @@ typedef int (*kbase_hwcnt_backend_dump_enable_fn)( + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( ++typedef int kbase_hwcnt_backend_dump_enable_nolock_fn( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +@@ -118,8 +130,8 @@ typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( + * If the backend is already disabled, does nothing. + * Any undumped counter values since the last dump get will be lost. + */ +-typedef void (*kbase_hwcnt_backend_dump_disable_fn)( +- struct kbase_hwcnt_backend *backend); ++typedef void ++kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); + + /** + * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped +@@ -130,21 +142,24 @@ typedef void (*kbase_hwcnt_backend_dump_disable_fn)( + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_dump_clear_fn)( +- struct kbase_hwcnt_backend *backend); ++typedef int ++kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend); + + /** + * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter + * dump. + * @backend: Non-NULL pointer to backend. ++ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was ++ * requested will be written out to on success. + * + * If the backend is not enabled or another dump is already in progress, + * returns an error. + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_dump_request_fn)( +- struct kbase_hwcnt_backend *backend); ++typedef int ++kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns); + + /** + * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested +@@ -155,8 +170,8 @@ typedef int (*kbase_hwcnt_backend_dump_request_fn)( + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_dump_wait_fn)( +- struct kbase_hwcnt_backend *backend); ++typedef int ++kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend); + + /** + * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the +@@ -168,24 +183,25 @@ typedef int (*kbase_hwcnt_backend_dump_wait_fn)( + * @accumulate: True if counters should be accumulated into dump_buffer, rather + * than copied. + * +- * If the backend is not enabled, returns an error. +- * If a dump is in progress (i.e. dump_wait has not yet returned successfully) +- * then the resultant contents of the dump buffer will be undefined. ++ * The resultant contents of the dump buffer are only well defined if a prior ++ * call to dump_wait returned successfully, and a new dump has not yet been ++ * requested by a call to dump_request. + * + * Return: 0 on success, else error code. + */ +-typedef int (*kbase_hwcnt_backend_dump_get_fn)( +- struct kbase_hwcnt_backend *backend, +- struct kbase_hwcnt_dump_buffer *dump_buffer, +- const struct kbase_hwcnt_enable_map *enable_map, +- bool accumulate); ++typedef int ++kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, ++ struct kbase_hwcnt_dump_buffer *dump_buffer, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ bool accumulate); + + /** + * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual + * interface. +- * @metadata: Immutable hardware counter metadata. + * @info: Immutable info used to initialise an instance of the + * backend. ++ * @metadata: Function ptr to get the immutable hardware counter ++ * metadata. + * @init: Function ptr to initialise an instance of the backend. + * @term: Function ptr to terminate an instance of the backend. + * @timestamp_ns: Function ptr to get the current backend timestamp. +@@ -200,18 +216,18 @@ typedef int (*kbase_hwcnt_backend_dump_get_fn)( + * buffer. + */ + struct kbase_hwcnt_backend_interface { +- const struct kbase_hwcnt_metadata *metadata; + const struct kbase_hwcnt_backend_info *info; +- kbase_hwcnt_backend_init_fn init; +- kbase_hwcnt_backend_term_fn term; +- kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; +- kbase_hwcnt_backend_dump_enable_fn dump_enable; +- kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; +- kbase_hwcnt_backend_dump_disable_fn dump_disable; +- kbase_hwcnt_backend_dump_clear_fn dump_clear; +- kbase_hwcnt_backend_dump_request_fn dump_request; +- kbase_hwcnt_backend_dump_wait_fn dump_wait; +- kbase_hwcnt_backend_dump_get_fn dump_get; ++ kbase_hwcnt_backend_metadata_fn *metadata; ++ kbase_hwcnt_backend_init_fn *init; ++ kbase_hwcnt_backend_term_fn *term; ++ kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns; ++ kbase_hwcnt_backend_dump_enable_fn *dump_enable; ++ kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock; ++ kbase_hwcnt_backend_dump_disable_fn *dump_disable; ++ kbase_hwcnt_backend_dump_clear_fn *dump_clear; ++ kbase_hwcnt_backend_dump_request_fn *dump_request; ++ kbase_hwcnt_backend_dump_wait_fn *dump_wait; ++ kbase_hwcnt_backend_dump_get_fn *dump_get; + }; + + #endif /* _KBASE_HWCNT_BACKEND_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c +new file mode 100644 +index 0000000..58b5e72 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c +@@ -0,0 +1,1864 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_hwcnt_backend_csf.h" ++#include "mali_kbase_hwcnt_gpu.h" ++#include "mali_kbase_hwcnt_types.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef BASE_MAX_NR_CLOCKS_REGULATORS ++#define BASE_MAX_NR_CLOCKS_REGULATORS 2 ++#endif ++ ++/** ++ * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is ++ * an error. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are ++ * waiting for an ACK, this ACK could come from either PRFCNT_ACK, ++ * PROTMODE_ENTER_ACK, or if an error occurs. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert ++ * immediately after receiving the ACK, so we know which index corresponds to ++ * the buffer we requested. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and ++ * now we have kicked off the worker. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now ++ * we have kicked off the worker to accumulate up to that insert and then copy ++ * the delta to the user buffer to prepare for dump_get(). ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. ++ * ++ * Valid state transitions: ++ * IDLE -> REQUESTED (on dump request) ++ * REQUESTED -> QUERYING_INSERT (on dump ack) ++ * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) ++ * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) ++ * ACCUMULATING -> COMPLETED (on accumulation completion) ++ * COMPLETED -> REQUESTED (on dump request) ++ * COMPLETED -> IDLE (on disable) ++ * ANY -> IDLE (on error) ++ */ ++enum kbase_hwcnt_backend_csf_dump_state { ++ KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, ++ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED, ++}; ++ ++/** ++ * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend ++ * is disabled. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in ++ * progress, waiting for firmware acknowledgment. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged, ++ * enable is done. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in ++ * progress, waiting for firmware acknowledgment. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been ++ * acknowledged, waiting for dump workers to be finished. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An ++ * unrecoverable error happened, waiting for dump workers to be finished. ++ * ++ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error ++ * happened, and dump workers have finished, waiting for reset. ++ * ++ * Valid state transitions: ++ * DISABLED -> TRANSITIONING_TO_ENABLED (on enable) ++ * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack) ++ * ENABLED -> TRANSITIONING_TO_DISABLED (on disable) ++ * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack) ++ * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed) ++ * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error) ++ * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable ++ * error) ++ * UNRECOVERABLE_ERROR -> DISABLED (on before reset) ++ */ ++enum kbase_hwcnt_backend_csf_enable_state { ++ KBASE_HWCNT_BACKEND_CSF_DISABLED, ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED, ++ KBASE_HWCNT_BACKEND_CSF_ENABLED, ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED, ++ KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR, ++}; ++ ++/** ++ * struct kbase_hwcnt_backend_csf_info - Information used to create an instance ++ * of a CSF hardware counter backend. ++ * @backend: Pointer to access CSF backend. ++ * @fw_in_protected_mode: True if FW is running in protected mode, else ++ * false. ++ * @unrecoverable_error_happened: True if an recoverable error happened, else ++ * false. ++ * @csf_if: CSF interface object pointer. ++ * @ring_buf_cnt: Dump buffer count in the ring buffer. ++ * @counter_set: The performance counter set to use. ++ * @metadata: Hardware counter metadata. ++ * @prfcnt_info: Performance counter information. ++ */ ++struct kbase_hwcnt_backend_csf_info { ++ struct kbase_hwcnt_backend_csf *backend; ++ bool fw_in_protected_mode; ++ bool unrecoverable_error_happened; ++ struct kbase_hwcnt_backend_csf_if *csf_if; ++ u32 ring_buf_cnt; ++ enum kbase_hwcnt_set counter_set; ++ const struct kbase_hwcnt_metadata *metadata; ++ struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; ++}; ++ ++/** ++ * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout ++ * information. ++ * @fe_cnt: Front end block count. ++ * @tiler_cnt: Tiler block count. ++ * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. ++ * @shader_cnt: Shader Core block count. ++ * @block_cnt: Total block count (sum of all other block counts). ++ * @shader_avail_mask: Bitmap of all shader cores in the system. ++ * @offset_enable_mask: Offset of enable mask in the block. ++ * @headers_per_block: Header size per block. ++ * @counters_per_block: Counters size per block. ++ * @values_per_block: Total size per block. ++ */ ++struct kbase_hwcnt_csf_physical_layout { ++ size_t fe_cnt; ++ size_t tiler_cnt; ++ size_t mmu_l2_cnt; ++ size_t shader_cnt; ++ size_t block_cnt; ++ u64 shader_avail_mask; ++ size_t offset_enable_mask; ++ size_t headers_per_block; ++ size_t counters_per_block; ++ size_t values_per_block; ++}; ++ ++/** ++ * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend. ++ * @info: CSF Info used to create the backend. ++ * @dump_state: The dumping state of the backend. ++ * @enable_state: The CSF backend internal enabled state. ++ * @insert_index_to_accumulate: The insert index in the ring buffer which need ++ * to accumulate up to. ++ * @enable_state_waitq: Wait queue object used to notify the enable ++ * changing flag is done. ++ * @to_user_buf: HWC sample buffer for client user. ++ * @accum_buf: HWC sample buffer used as an internal ++ * accumulator. ++ * @old_sample_buf: HWC sample buffer to save the previous values ++ * for delta calculation. ++ * @ring_buf: Opaque pointer for ring buffer object. ++ * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. ++ * @clk_enable_map: The enable map specifying enabled clock domains. ++ * @cycle_count_elapsed: Cycle count elapsed for a given sample period. ++ * @prev_cycle_count: Previous cycle count to calculate the cycle ++ * count for sample period. ++ * @phys_layout: Physical memory layout information of HWC ++ * sample buffer. ++ * @dump_completed: Completion signaled by the dump worker when ++ * it is completed accumulating up to the ++ * insert_index_to_accumulate. ++ * Should be initialized to the "complete" state. ++ * @hwc_dump_workq: Single threaded work queue for HWC workers ++ * execution. ++ * @hwc_dump_work: Worker to accumulate samples. ++ * @hwc_threshold_work: Worker for consuming available samples when ++ * threshold interrupt raised. ++ */ ++struct kbase_hwcnt_backend_csf { ++ struct kbase_hwcnt_backend_csf_info *info; ++ enum kbase_hwcnt_backend_csf_dump_state dump_state; ++ enum kbase_hwcnt_backend_csf_enable_state enable_state; ++ u32 insert_index_to_accumulate; ++ wait_queue_head_t enable_state_waitq; ++ u32 *to_user_buf; ++ u32 *accum_buf; ++ u32 *old_sample_buf; ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; ++ void *ring_buf_cpu_base; ++ u64 clk_enable_map; ++ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ struct kbase_hwcnt_csf_physical_layout phys_layout; ++ struct completion dump_completed; ++ struct workqueue_struct *hwc_dump_workq; ++ struct work_struct hwc_dump_work; ++ struct work_struct hwc_threshold_work; ++}; ++ ++static bool kbasep_hwcnt_backend_csf_backend_exists( ++ struct kbase_hwcnt_backend_csf_info *csf_info) ++{ ++ WARN_ON(!csf_info); ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ return (csf_info->backend != NULL); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count ++ * tracking. ++ * ++ * @backend_csf: Non-NULL pointer to backend. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ */ ++static void kbasep_hwcnt_backend_csf_cc_initial_sample( ++ struct kbase_hwcnt_backend_csf *backend_csf, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ u64 clk_enable_map = enable_map->clk_enable_map; ++ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ size_t clk; ++ ++ /* Read cycle count from CSF interface for both clock domains. */ ++ backend_csf->info->csf_if->get_gpu_cycle_count( ++ backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); ++ ++ kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) { ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) ++ backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; ++ } ++ ++ /* Keep clk_enable_map for dump_request. */ ++ backend_csf->clk_enable_map = clk_enable_map; ++} ++ ++static void ++kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ size_t clk; ++ ++ backend_csf->info->csf_if->assert_lock_held( ++ backend_csf->info->csf_if->ctx); ++ ++ backend_csf->info->csf_if->get_gpu_cycle_count( ++ backend_csf->info->csf_if->ctx, cycle_counts, ++ backend_csf->clk_enable_map); ++ ++ kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) { ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ backend_csf->clk_enable_map, clk)) { ++ backend_csf->cycle_count_elapsed[clk] = ++ cycle_counts[clk] - ++ backend_csf->prev_cycle_count[clk]; ++ backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; ++ } ++ } ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ ++static u64 ++kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ ++ if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) ++ return 0; ++ ++ return backend_csf->info->csf_if->timestamp_ns( ++ backend_csf->info->csf_if->ctx); ++} ++ ++/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to ++ * guarantee headers are ++ * enabled if any counter is ++ * required. ++ *@phys_enable_map: HWC physical enable map to be processed. ++ */ ++static void kbasep_hwcnt_backend_csf_process_enable_map( ++ struct kbase_hwcnt_physical_enable_map *phys_enable_map) ++{ ++ WARN_ON(!phys_enable_map); ++ ++ /* Enable header if any counter is required from user, the header is ++ * controlled by bit 0 of the enable mask. ++ */ ++ if (phys_enable_map->fe_bm) ++ phys_enable_map->fe_bm |= 1; ++ ++ if (phys_enable_map->tiler_bm) ++ phys_enable_map->tiler_bm |= 1; ++ ++ if (phys_enable_map->mmu_l2_bm) ++ phys_enable_map->mmu_l2_bm |= 1; ++ ++ if (phys_enable_map->shader_bm) ++ phys_enable_map->shader_bm |= 1; ++} ++ ++static void kbasep_hwcnt_backend_csf_init_layout( ++ const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, ++ struct kbase_hwcnt_csf_physical_layout *phys_layout) ++{ ++ WARN_ON(!prfcnt_info); ++ WARN_ON(!phys_layout); ++ ++ phys_layout->fe_cnt = 1; ++ phys_layout->tiler_cnt = 1; ++ phys_layout->mmu_l2_cnt = prfcnt_info->l2_count; ++ phys_layout->shader_cnt = fls64(prfcnt_info->core_mask); ++ phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt + ++ phys_layout->mmu_l2_cnt + ++ phys_layout->shader_cnt; ++ ++ phys_layout->shader_avail_mask = prfcnt_info->core_mask; ++ ++ phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; ++ phys_layout->values_per_block = ++ prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_BYTES; ++ phys_layout->counters_per_block = ++ phys_layout->values_per_block - phys_layout->headers_per_block; ++ phys_layout->offset_enable_mask = KBASE_HWCNT_V5_PRFCNT_EN_HEADER; ++} ++ ++static void kbasep_hwcnt_backend_csf_reset_internal_buffers( ++ struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ memset(backend_csf->to_user_buf, 0, ++ backend_csf->info->prfcnt_info.dump_bytes); ++ memset(backend_csf->accum_buf, 0, ++ backend_csf->info->prfcnt_info.dump_bytes); ++ memset(backend_csf->old_sample_buf, 0, ++ backend_csf->info->prfcnt_info.dump_bytes); ++} ++ ++static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( ++ struct kbase_hwcnt_backend_csf *backend_csf, u32 *sample) ++{ ++ u32 block_idx; ++ const struct kbase_hwcnt_csf_physical_layout *phys_layout; ++ u32 *block_buf; ++ ++ phys_layout = &backend_csf->phys_layout; ++ ++ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { ++ block_buf = sample + block_idx * phys_layout->values_per_block; ++ block_buf[phys_layout->offset_enable_mask] = 0; ++ } ++} ++ ++static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( ++ struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ u32 idx; ++ u32 *sample; ++ char *cpu_dump_base; ++ size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; ++ ++ cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; ++ ++ for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { ++ sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; ++ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( ++ backend_csf, sample); ++ } ++} ++ ++static void kbasep_hwcnt_backend_csf_update_user_sample( ++ struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ /* Copy the data into the sample and wait for the user to get it. */ ++ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, ++ backend_csf->info->prfcnt_info.dump_bytes); ++ ++ /* After copied data into user sample, clear the accumulator values to ++ * prepare for the next accumulator, such as the next request or ++ * threshold. ++ */ ++ memset(backend_csf->accum_buf, 0, ++ backend_csf->info->prfcnt_info.dump_bytes); ++} ++ ++static void kbasep_hwcnt_backend_csf_accumulate_sample( ++ const struct kbase_hwcnt_csf_physical_layout *phys_layout, ++ size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf, ++ const u32 *new_sample_buf, bool clearing_samples) ++{ ++ size_t block_idx, ctr_idx; ++ const u32 *old_block = old_sample_buf; ++ const u32 *new_block = new_sample_buf; ++ u32 *acc_block = accum_buf; ++ ++ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { ++ const u32 old_enable_mask = ++ old_block[phys_layout->offset_enable_mask]; ++ const u32 new_enable_mask = ++ new_block[phys_layout->offset_enable_mask]; ++ ++ if (new_enable_mask == 0) { ++ /* Hardware block was unavailable or we didn't turn on ++ * any counters. Do nothing. ++ */ ++ } else { ++ /* Hardware block was available and it had some counters ++ * enabled. We need to update the accumulation buffer. ++ */ ++ ++ /* Unconditionally copy the headers. */ ++ memcpy(acc_block, new_block, ++ phys_layout->headers_per_block * ++ KBASE_HWCNT_VALUE_BYTES); ++ ++ /* Accumulate counter samples ++ * ++ * When accumulating samples we need to take into ++ * account whether the counter sampling method involves ++ * clearing counters back to zero after each sample is ++ * taken. ++ * ++ * The intention for CSF was that all HW should use ++ * counters which wrap to zero when their maximum value ++ * is reached. This, combined with non-clearing ++ * sampling, enables multiple concurrent users to ++ * request samples without interfering with each other. ++ * ++ * However some early HW may not support wrapping ++ * counters, for these GPUs counters must be cleared on ++ * sample to avoid loss of data due to counters ++ * saturating at their maximum value. ++ */ ++ if (!clearing_samples) { ++ if (old_enable_mask == 0) { ++ /* Hardware block was previously ++ * unavailable. Accumulate the new ++ * counters only, as we know previous ++ * values are zeroes. ++ */ ++ for (ctr_idx = ++ phys_layout ++ ->headers_per_block; ++ ctr_idx < ++ phys_layout->values_per_block; ++ ctr_idx++) { ++ acc_block[ctr_idx] += ++ new_block[ctr_idx]; ++ } ++ } else { ++ /* Hardware block was previously ++ * available. Accumulate the delta ++ * between old and new counter values. ++ */ ++ for (ctr_idx = ++ phys_layout ++ ->headers_per_block; ++ ctr_idx < ++ phys_layout->values_per_block; ++ ctr_idx++) { ++ acc_block[ctr_idx] += ++ new_block[ctr_idx] - ++ old_block[ctr_idx]; ++ } ++ } ++ } else { ++ for (ctr_idx = phys_layout->headers_per_block; ++ ctr_idx < phys_layout->values_per_block; ++ ctr_idx++) { ++ acc_block[ctr_idx] += ++ new_block[ctr_idx]; ++ } ++ } ++ } ++ old_block += phys_layout->values_per_block; ++ new_block += phys_layout->values_per_block; ++ acc_block += phys_layout->values_per_block; ++ } ++ ++ WARN_ON(old_block != ++ old_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); ++ WARN_ON(new_block != ++ new_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); ++ WARN_ON(acc_block != accum_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); ++ (void)dump_bytes; ++} ++ ++static void kbasep_hwcnt_backend_csf_accumulate_samples( ++ struct kbase_hwcnt_backend_csf *backend_csf, u32 extract_index_to_start, ++ u32 insert_index_to_stop) ++{ ++ u32 raw_idx; ++ unsigned long flags; ++ u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; ++ const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; ++ const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; ++ bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; ++ u32 *old_sample_buf = backend_csf->old_sample_buf; ++ u32 *new_sample_buf; ++ ++ if (extract_index_to_start == insert_index_to_stop) ++ /* No samples to accumulate. Early out. */ ++ return; ++ ++ /* Sync all the buffers to CPU side before read the data. */ ++ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, ++ extract_index_to_start, ++ insert_index_to_stop, true); ++ ++ /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ ++ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; ++ raw_idx++) { ++ /* The logical "&" acts as a modulo operation since buf_count ++ * must be a power of two. ++ */ ++ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); ++ ++ new_sample_buf = ++ (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; ++ ++ kbasep_hwcnt_backend_csf_accumulate_sample( ++ &backend_csf->phys_layout, buf_dump_bytes, ++ backend_csf->accum_buf, old_sample_buf, new_sample_buf, ++ clearing_samples); ++ ++ old_sample_buf = new_sample_buf; ++ } ++ ++ /* Save the newest buffer as the old buffer for next time. */ ++ memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); ++ ++ /* Reset the prfcnt_en header on each sample before releasing them. */ ++ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; ++ raw_idx++) { ++ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); ++ u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; ++ ++ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( ++ backend_csf, sample); ++ } ++ ++ /* Sync zeroed buffers to avoid coherency issues on future use. */ ++ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, ++ extract_index_to_start, ++ insert_index_to_stop, false); ++ ++ /* After consuming all samples between extract_idx and insert_idx, ++ * set the raw extract index to insert_idx so that the sample buffers ++ * can be released back to the ring buffer pool. ++ */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ backend_csf->info->csf_if->set_extract_index( ++ backend_csf->info->csf_if->ctx, insert_index_to_stop); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++} ++ ++static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ struct kbase_hwcnt_backend_csf *backend_csf, ++ enum kbase_hwcnt_backend_csf_enable_state new_state) ++{ ++ backend_csf->info->csf_if->assert_lock_held( ++ backend_csf->info->csf_if->ctx); ++ ++ if (backend_csf->enable_state != new_state) { ++ backend_csf->enable_state = new_state; ++ ++ wake_up(&backend_csf->enable_state_waitq); ++ } ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. ++ * @work: Work structure. ++ * ++ * To accumulate all available samples in the ring buffer when a request has ++ * been done. ++ * ++ */ ++static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ u32 insert_index_to_acc; ++ u32 extract_index; ++ u32 insert_index; ++ ++ WARN_ON(!work); ++ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, ++ hwc_dump_work); ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ /* Assert the backend is not destroyed. */ ++ WARN_ON(backend_csf != backend_csf->info->backend); ++ ++ /* The backend was disabled or had an error while the worker was being ++ * launched. ++ */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ WARN_ON(backend_csf->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ ++ WARN_ON(backend_csf->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); ++ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; ++ insert_index_to_acc = backend_csf->insert_index_to_accumulate; ++ ++ /* Read the raw extract and insert indexes from the CSF interface. */ ++ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, ++ &extract_index, &insert_index); ++ ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ /* Accumulate up to the insert we grabbed at the prfcnt request ++ * interrupt. ++ */ ++ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, ++ insert_index_to_acc); ++ ++ /* Copy to the user buffer so if a threshold interrupt fires ++ * between now and get(), the accumulations are untouched. ++ */ ++ kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); ++ ++ /* Dump done, set state back to COMPLETED for next request. */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ /* Assert the backend is not destroyed. */ ++ WARN_ON(backend_csf != backend_csf->info->backend); ++ ++ /* The backend was disabled or had an error while we were accumulating. ++ */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ WARN_ON(backend_csf->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ ++ WARN_ON(backend_csf->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); ++ ++ /* Our work here is done - set the wait object and unblock waiters. */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; ++ complete_all(&backend_csf->dump_completed); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker. ++ * ++ * @work: Work structure. ++ * ++ * Called when a HWC threshold interrupt raised to consume all available samples ++ * in the ring buffer. ++ */ ++static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ u32 extract_index; ++ u32 insert_index; ++ ++ WARN_ON(!work); ++ ++ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, ++ hwc_threshold_work); ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ ++ /* Assert the backend is not destroyed. */ ++ WARN_ON(backend_csf != backend_csf->info->backend); ++ ++ /* Read the raw extract and insert indexes from the CSF interface. */ ++ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, ++ &extract_index, &insert_index); ++ ++ /* The backend was disabled or had an error while the worker was being ++ * launched. ++ */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ ++ /* Early out if we are not in the IDLE state or COMPLETED state, as this ++ * means a concurrent dump is in progress and we don't want to ++ * interfere. ++ */ ++ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && ++ (backend_csf->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ /* Accumulate everything we possibly can. We grabbed the insert index ++ * immediately after we acquired the lock but before we checked whether ++ * a concurrent dump was triggered. This ensures that if a concurrent ++ * dump was triggered between releasing the lock and now, we know for a ++ * fact that our insert will not exceed the concurrent dump's ++ * insert_to_accumulate, so we don't risk accumulating too much data. ++ */ ++ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, ++ insert_index); ++ ++ /* No need to wake up anything since it is not a user dump request. */ ++} ++ ++static void kbase_hwcnt_backend_csf_submit_dump_worker( ++ struct kbase_hwcnt_backend_csf_info *csf_info) ++{ ++ u32 extract_index; ++ ++ WARN_ON(!csf_info); ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); ++ WARN_ON(csf_info->backend->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_ENABLED); ++ WARN_ON(csf_info->backend->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); ++ ++ /* Save insert index now so that the dump worker only accumulates the ++ * HWC data associated with this request. Extract index is not stored ++ * as that needs to be checked when accumulating to prevent re-reading ++ * buffers that have already been read and returned to the GPU. ++ */ ++ csf_info->csf_if->get_indexes( ++ csf_info->csf_if->ctx, &extract_index, ++ &csf_info->backend->insert_index_to_accumulate); ++ csf_info->backend->dump_state = ++ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; ++ ++ /* Submit the accumulator task into the work queue. */ ++ queue_work(csf_info->backend->hwc_dump_workq, ++ &csf_info->backend->hwc_dump_work); ++} ++ ++static void kbasep_hwcnt_backend_csf_get_physical_enable( ++ struct kbase_hwcnt_backend_csf *backend_csf, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ struct kbase_hwcnt_backend_csf_if_enable *enable) ++{ ++ enum kbase_hwcnt_physical_set phys_counter_set; ++ struct kbase_hwcnt_physical_enable_map phys_enable_map; ++ ++ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); ++ ++ /* process the enable_map to guarantee the block header is enabled which ++ * is needed for delta calculation. ++ */ ++ kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); ++ ++ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, ++ backend_csf->info->counter_set); ++ ++ /* Use processed enable_map to enable HWC in HW level. */ ++ enable->fe_bm = phys_enable_map.fe_bm; ++ enable->shader_bm = phys_enable_map.shader_bm; ++ enable->tiler_bm = phys_enable_map.tiler_bm; ++ enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; ++ enable->counter_set = phys_counter_set; ++ enable->clk_enable_map = enable_map->clk_enable_map; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ ++static int kbasep_hwcnt_backend_csf_dump_enable_nolock( ++ struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ struct kbase_hwcnt_backend_csf_if_enable enable; ++ ++ if (!backend_csf || !enable_map || ++ (enable_map->metadata != backend_csf->info->metadata)) ++ return -EINVAL; ++ ++ backend_csf->info->csf_if->assert_lock_held( ++ backend_csf->info->csf_if->ctx); ++ ++ kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, ++ &enable); ++ ++ /* enable_state should be DISABLED before we transfer it to enabled */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) ++ return -EIO; ++ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); ++ ++ backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf, &enable); ++ ++ kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); ++ ++ return 0; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ ++static int kbasep_hwcnt_backend_csf_dump_enable( ++ struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ int errcode; ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ ++ if (!backend_csf) ++ return -EINVAL; ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, ++ enable_map); ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ return errcode; ++} ++ ++static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( ++ struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) ++{ ++ backend_csf->info->csf_if->assert_lock_held( ++ backend_csf->info->csf_if->ctx); ++ ++ while ((backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || ++ (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, *lock_flags); ++ ++ wait_event( ++ backend_csf->enable_state_waitq, ++ (backend_csf->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && ++ (backend_csf->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, ++ lock_flags); ++ } ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ ++static void ++kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ bool do_disable = false; ++ ++ WARN_ON(!backend_csf); ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ ++ /* Make sure we wait until any previous enable or disable have completed ++ * before doing anything. ++ */ ++ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, ++ &flags); ++ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || ++ backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { ++ /* If we are already disabled or in an unrecoverable error ++ * state, there is nothing for us to do. ++ */ ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return; ++ } ++ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ /* Only disable if we were previously enabled - in all other ++ * cases the call to disable will have already been made. ++ */ ++ do_disable = true; ++ } ++ ++ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); ++ WARN_ON(!completion_done(&backend_csf->dump_completed)); ++ ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ /* Block until any async work has completed. We have transitioned out of ++ * the ENABLED state so we can guarantee no new work will concurrently ++ * be submitted. ++ */ ++ flush_workqueue(backend_csf->hwc_dump_workq); ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ ++ if (do_disable) ++ backend_csf->info->csf_if->dump_disable( ++ backend_csf->info->csf_if->ctx); ++ ++ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, ++ &flags); ++ ++ switch (backend_csf->enable_state) { ++ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); ++ break; ++ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); ++ break; ++ default: ++ WARN_ON(true); ++ break; ++ } ++ ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ /* After disable, zero the header of all buffers in the ring buffer back ++ * to 0 to prepare for the next enable. ++ */ ++ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); ++ ++ /* Sync zeroed buffers to avoid coherency issues on future use. */ ++ backend_csf->info->csf_if->ring_buf_sync( ++ backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, ++ backend_csf->info->ring_buf_cnt, false); ++ ++ /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare ++ * for next enable. ++ */ ++ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ ++static int ++kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ bool do_request = false; ++ ++ if (!backend_csf) ++ return -EINVAL; ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ ++ /* If we're transitioning to enabled there's nothing to accumulate, and ++ * the user dump buffer is already zeroed. We can just short circuit to ++ * the DUMP_COMPLETED state. ++ */ ++ if (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { ++ backend_csf->dump_state = ++ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; ++ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); ++ kbasep_hwcnt_backend_csf_cc_update(backend_csf); ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return 0; ++ } ++ ++ /* Otherwise, make sure we're already enabled. */ ++ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ return -EIO; ++ } ++ ++ /* Make sure that this is either the first request since enable or the ++ * previous dump has completed, so we can avoid midway through a dump. ++ */ ++ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && ++ (backend_csf->dump_state != ++ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { ++ backend_csf->info->csf_if->unlock( ++ backend_csf->info->csf_if->ctx, flags); ++ /* HWC is disabled or another dump is ongoing, or we are on ++ * fault. ++ */ ++ return -EIO; ++ } ++ ++ /* Reset the completion so dump_wait() has something to wait on. */ ++ reinit_completion(&backend_csf->dump_completed); ++ ++ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && ++ !backend_csf->info->fw_in_protected_mode) { ++ /* Only do the request if we are fully enabled and not in ++ * protected mode. ++ */ ++ backend_csf->dump_state = ++ KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; ++ do_request = true; ++ } else { ++ /* Skip the request and waiting for ack and go straight to ++ * checking the insert and kicking off the worker to do the dump ++ */ ++ backend_csf->dump_state = ++ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; ++ } ++ ++ /* CSF firmware might enter protected mode now, but still call request. ++ * That is fine, as we changed state while holding the lock, so the ++ * protected mode enter function will query the insert and launch the ++ * dumping worker. ++ * At some point we will get the dump request ACK saying a dump is done, ++ * but we can ignore it if we are not in the REQUESTED state and process ++ * it in next round dumping worker. ++ */ ++ ++ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); ++ kbasep_hwcnt_backend_csf_cc_update(backend_csf); ++ ++ if (do_request) ++ backend_csf->info->csf_if->dump_request( ++ backend_csf->info->csf_if->ctx); ++ else ++ kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); ++ ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ return 0; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ ++static int ++kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ int errcode; ++ ++ if (!backend_csf) ++ return -EINVAL; ++ ++ wait_for_completion(&backend_csf->dump_completed); ++ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ /* Make sure the last dump actually succeeded. */ ++ errcode = (backend_csf->dump_state == ++ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ? ++ 0 : ++ -EIO; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ return errcode; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ ++static int ++kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ int errcode; ++ u64 ts; ++ ++ if (!backend_csf) ++ return -EINVAL; ++ ++ /* Request a dump so we can clear all current counters. */ ++ errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts); ++ if (!errcode) ++ /* Wait for the manual dump or auto dump to be done and ++ * accumulator to be updated. ++ */ ++ errcode = kbasep_hwcnt_backend_csf_dump_wait(backend); ++ ++ return errcode; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ ++static int kbasep_hwcnt_backend_csf_dump_get( ++ struct kbase_hwcnt_backend *backend, ++ struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) ++{ ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ int ret; ++ size_t clk; ++ ++ if (!backend_csf || !dst || !dst_enable_map || ++ (backend_csf->info->metadata != dst->metadata) || ++ (dst_enable_map->metadata != dst->metadata)) ++ return -EINVAL; ++ ++ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { ++ if (!kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk)) ++ continue; ++ ++ /* Extract elapsed cycle count for each clock domain. */ ++ dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk]; ++ } ++ ++ /* We just return the user buffer without checking the current state, ++ * as it is undefined to call this function without a prior succeeding ++ * one to dump_wait(). ++ */ ++ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, ++ dst_enable_map, accumulate); ++ ++ return ret; ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend. ++ * @backend_csf: Pointer to CSF backend to destroy. ++ * ++ * Can be safely called on a backend in any state of partial construction. ++ * ++ */ ++static void ++kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ if (!backend_csf) ++ return; ++ ++ destroy_workqueue(backend_csf->hwc_dump_workq); ++ ++ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf); ++ ++ kfree(backend_csf->accum_buf); ++ backend_csf->accum_buf = NULL; ++ ++ kfree(backend_csf->old_sample_buf); ++ backend_csf->old_sample_buf = NULL; ++ ++ kfree(backend_csf->to_user_buf); ++ backend_csf->to_user_buf = NULL; ++ ++ kfree(backend_csf); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance. ++ * ++ * @csf_info: Non-NULL pointer to backend info. ++ * @out_backend: Non-NULL pointer to where backend is stored on success. ++ * Return: 0 on success, else error code. ++ */ ++static int ++kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, ++ struct kbase_hwcnt_backend_csf **out_backend) ++{ ++ struct kbase_hwcnt_backend_csf *backend_csf = NULL; ++ int errcode = -ENOMEM; ++ ++ WARN_ON(!csf_info); ++ WARN_ON(!out_backend); ++ ++ backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL); ++ if (!backend_csf) ++ goto alloc_error; ++ ++ backend_csf->info = csf_info; ++ kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, ++ &backend_csf->phys_layout); ++ ++ backend_csf->accum_buf = ++ kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); ++ if (!backend_csf->accum_buf) ++ goto err_alloc_acc_buf; ++ ++ backend_csf->old_sample_buf = ++ kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); ++ if (!backend_csf->old_sample_buf) ++ goto err_alloc_pre_sample_buf; ++ ++ backend_csf->to_user_buf = ++ kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); ++ if (!backend_csf->to_user_buf) ++ goto err_alloc_user_sample_buf; ++ ++ errcode = csf_info->csf_if->ring_buf_alloc( ++ csf_info->csf_if->ctx, csf_info->ring_buf_cnt, ++ &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); ++ if (errcode) ++ goto err_ring_buf_alloc; ++ ++ /* Zero all performance enable header to prepare for first enable. */ ++ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); ++ ++ /* Sync zeroed buffers to avoid coherency issues on use. */ ++ backend_csf->info->csf_if->ring_buf_sync( ++ backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, ++ backend_csf->info->ring_buf_cnt, false); ++ ++ init_completion(&backend_csf->dump_completed); ++ ++ init_waitqueue_head(&backend_csf->enable_state_waitq); ++ ++ /* Allocate a single threaded work queue for dump worker and threshold ++ * worker. ++ */ ++ backend_csf->hwc_dump_workq = ++ alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); ++ if (!backend_csf->hwc_dump_workq) ++ goto err_alloc_workqueue; ++ ++ INIT_WORK(&backend_csf->hwc_dump_work, ++ kbasep_hwcnt_backend_csf_dump_worker); ++ INIT_WORK(&backend_csf->hwc_threshold_work, ++ kbasep_hwcnt_backend_csf_threshold_worker); ++ ++ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ ++ *out_backend = backend_csf; ++ return 0; ++ ++ destroy_workqueue(backend_csf->hwc_dump_workq); ++err_alloc_workqueue: ++ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, ++ backend_csf->ring_buf); ++err_ring_buf_alloc: ++ kfree(backend_csf->to_user_buf); ++ backend_csf->to_user_buf = NULL; ++err_alloc_user_sample_buf: ++ kfree(backend_csf->old_sample_buf); ++ backend_csf->old_sample_buf = NULL; ++err_alloc_pre_sample_buf: ++ kfree(backend_csf->accum_buf); ++ backend_csf->accum_buf = NULL; ++err_alloc_acc_buf: ++ kfree(backend_csf); ++alloc_error: ++ return errcode; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_init_fn */ ++static int ++kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf = NULL; ++ struct kbase_hwcnt_backend_csf_info *csf_info = ++ (struct kbase_hwcnt_backend_csf_info *)info; ++ int errcode; ++ bool success = false; ++ ++ if (!info || !out_backend) ++ return -EINVAL; ++ ++ /* Create the backend. */ ++ errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf); ++ if (errcode) ++ return errcode; ++ ++ /* If it was not created before, attach it to csf_info. ++ * Use spin lock to avoid concurrent initialization. ++ */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ if (csf_info->backend == NULL) { ++ csf_info->backend = backend_csf; ++ *out_backend = (struct kbase_hwcnt_backend *)backend_csf; ++ success = true; ++ if (csf_info->unrecoverable_error_happened) ++ backend_csf->enable_state = ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; ++ } ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ /* Destroy the new created backend if the backend has already created ++ * before. In normal case, this won't happen if the client call init() ++ * function properly. ++ */ ++ if (!success) { ++ kbasep_hwcnt_backend_csf_destroy(backend_csf); ++ return -EBUSY; ++ } ++ ++ return 0; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_term_fn */ ++static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf *backend_csf = ++ (struct kbase_hwcnt_backend_csf *)backend; ++ ++ if (!backend) ++ return; ++ ++ kbasep_hwcnt_backend_csf_dump_disable(backend); ++ ++ /* Set the backend in csf_info to NULL so we won't handle any external ++ * notification anymore since we are terminating. ++ */ ++ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); ++ backend_csf->info->backend = NULL; ++ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, ++ flags); ++ ++ kbasep_hwcnt_backend_csf_destroy(backend_csf); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. ++ * @info: Pointer to info to destroy. ++ * ++ * Can be safely called on a backend info in any state of partial construction. ++ * ++ */ ++static void kbasep_hwcnt_backend_csf_info_destroy( ++ const struct kbase_hwcnt_backend_csf_info *info) ++{ ++ if (!info) ++ return; ++ ++ /* The backend should be destroyed before the info object destroy. */ ++ WARN_ON(info->backend != NULL); ++ ++ /* The metadata should be destroyed before the info object destroy. */ ++ WARN_ON(info->metadata != NULL); ++ ++ kfree(info); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info. ++ * ++ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure ++ * used to create backend interface. ++ * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. ++ * MUST be power of 2. ++ * @out_info: Non-NULL pointer to where info is stored on success. ++ * @return 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_csf_info_create( ++ struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, ++ const struct kbase_hwcnt_backend_csf_info **out_info) ++{ ++ struct kbase_hwcnt_backend_csf_info *info = NULL; ++ ++ WARN_ON(!csf_if); ++ WARN_ON(!out_info); ++ WARN_ON(!is_power_of_2(ring_buf_cnt)); ++ ++ info = kzalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) ++ return -ENOMEM; ++ ++#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) ++ info->counter_set = KBASE_HWCNT_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ info->counter_set = KBASE_HWCNT_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ info->counter_set = KBASE_HWCNT_SET_PRIMARY; ++#endif ++ ++ info->backend = NULL; ++ info->csf_if = csf_if; ++ info->ring_buf_cnt = ring_buf_cnt; ++ info->fw_in_protected_mode = false; ++ info->unrecoverable_error_happened = false; ++ ++ *out_info = info; ++ ++ return 0; ++} ++ ++/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */ ++static const struct kbase_hwcnt_metadata * ++kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) ++{ ++ if (!info) ++ return NULL; ++ ++ WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata); ++ ++ return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; ++} ++ ++static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( ++ struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ bool do_disable = false; ++ ++ backend_csf->info->csf_if->assert_lock_held( ++ backend_csf->info->csf_if->ctx); ++ ++ /* We are already in or transitioning to the unrecoverable error state. ++ * Early out. ++ */ ++ if ((backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || ++ (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) ++ return; ++ ++ /* If we are disabled, we know we have no pending workers, so skip the ++ * waiting state. ++ */ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); ++ return; ++ } ++ ++ /* Trigger a disable only if we are not already transitioning to ++ * disabled, we don't want to disable twice if an unrecoverable error ++ * happens while we are disabling. ++ */ ++ do_disable = (backend_csf->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); ++ ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); ++ ++ /* Transition the dump to the IDLE state and unblock any waiters. The ++ * IDLE state signifies an error. ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ ++ /* Trigger a disable only if we are not already transitioning to ++ * disabled, - we don't want to disable twice if an unrecoverable error ++ * happens while we are disabling. ++ */ ++ if (do_disable) ++ backend_csf->info->csf_if->dump_disable( ++ backend_csf->info->csf_if->ctx); ++} ++ ++static void kbasep_hwcnt_backend_csf_handle_recoverable_error( ++ struct kbase_hwcnt_backend_csf *backend_csf) ++{ ++ backend_csf->info->csf_if->assert_lock_held( ++ backend_csf->info->csf_if->ctx); ++ ++ switch (backend_csf->enable_state) { ++ case KBASE_HWCNT_BACKEND_CSF_DISABLED: ++ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: ++ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: ++ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: ++ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: ++ /* Already disabled or disabling, or in an unrecoverable error. ++ * Nothing to be done to handle the error. ++ */ ++ return; ++ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: ++ /* A seemingly recoverable error that occurs while we are ++ * transitioning to enabled is probably unrecoverable. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( ++ backend_csf); ++ return; ++ case KBASE_HWCNT_BACKEND_CSF_ENABLED: ++ /* Start transitioning to the disabled state. We can't wait for ++ * it as this recoverable error might be triggered from an ++ * interrupt. The wait will be done in the eventual call to ++ * disable(). ++ */ ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); ++ /* Transition the dump to the IDLE state and unblock any ++ * waiters. The IDLE state signifies an error. ++ */ ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; ++ complete_all(&backend_csf->dump_completed); ++ ++ backend_csf->info->csf_if->dump_disable( ++ backend_csf->info->csf_if->ctx); ++ return; ++ } ++} ++ ++void kbase_hwcnt_backend_csf_protm_entered( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info = ++ (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ csf_info->fw_in_protected_mode = true; ++ ++ /* Call on_prfcnt_sample() to trigger collection of the protected mode ++ * entry auto-sample if there is currently a pending dump request. ++ */ ++ kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); ++} ++ ++void kbase_hwcnt_backend_csf_protm_exited( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ csf_info->fw_in_protected_mode = false; ++} ++ ++void kbase_hwcnt_backend_csf_on_unrecoverable_error( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); ++ csf_info->unrecoverable_error_happened = true; ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); ++ return; ++ } ++ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); ++ ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); ++} ++ ++void kbase_hwcnt_backend_csf_on_before_reset( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ unsigned long flags; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); ++ csf_info->unrecoverable_error_happened = false; ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); ++ return; ++ } ++ backend_csf = csf_info->backend; ++ ++ if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && ++ (backend_csf->enable_state != ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { ++ /* Before a reset occurs, we must either have been disabled ++ * (else we lose data) or we should have encountered an ++ * unrecoverable error. Either way, we will have disabled the ++ * interface and waited for any workers that might have still ++ * been in flight. ++ * If not in these states, fire off one more disable to make ++ * sure everything is turned off before the power is pulled. ++ * We can't wait for this disable to complete, but it doesn't ++ * really matter, the power is being pulled. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( ++ csf_info->backend); ++ } ++ ++ /* A reset is the only way to exit the unrecoverable error state */ ++ if (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); ++ } ++ ++ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); ++} ++ ++void kbase_hwcnt_backend_csf_on_prfcnt_sample( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; ++ ++ /* If the current state is not REQUESTED, this HWC sample will be ++ * skipped and processed in next dump_request. ++ */ ++ if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) ++ return; ++ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; ++ ++ kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); ++} ++ ++void kbase_hwcnt_backend_csf_on_prfcnt_threshold( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; ++ ++ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ++ /* Submit the threshold work into the work queue to consume the ++ * available samples. ++ */ ++ queue_work(backend_csf->hwc_dump_workq, ++ &backend_csf->hwc_threshold_work); ++} ++ ++void kbase_hwcnt_backend_csf_on_prfcnt_overflow( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ ++ /* Called when an overflow occurs. We treat this as a recoverable error, ++ * so we start transitioning to the disabled state. ++ * We could try and handle it while enabled, but in a real system we ++ * never expect an overflow to occur so there is no point implementing ++ * complex recovery code when we can just turn ourselves off instead for ++ * a while. ++ */ ++ kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); ++} ++ ++void kbase_hwcnt_backend_csf_on_prfcnt_enable( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; ++ ++ if (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); ++ } else if (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_ENABLED) { ++ /* Unexpected, but we are already in the right state so just ++ * ignore it. ++ */ ++ } else { ++ /* Unexpected state change, assume everything is broken until ++ * we reset. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( ++ csf_info->backend); ++ } ++} ++ ++void kbase_hwcnt_backend_csf_on_prfcnt_disable( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_backend_csf *backend_csf; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); ++ ++ /* Early out if the backend does not exist. */ ++ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) ++ return; ++ backend_csf = csf_info->backend; ++ ++ if (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { ++ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( ++ backend_csf, ++ KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); ++ } else if (backend_csf->enable_state == ++ KBASE_HWCNT_BACKEND_CSF_DISABLED) { ++ /* Unexpected, but we are already in the right state so just ++ * ignore it. ++ */ ++ } else { ++ /* Unexpected state change, assume everything is broken until ++ * we reset. ++ */ ++ kbasep_hwcnt_backend_csf_handle_unrecoverable_error( ++ csf_info->backend); ++ } ++} ++ ++int kbase_hwcnt_backend_csf_metadata_init( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ int errcode; ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ struct kbase_hwcnt_gpu_info gpu_info; ++ ++ if (!iface) ++ return -EINVAL; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ ++ WARN_ON(!csf_info->csf_if->get_prfcnt_info); ++ ++ csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, ++ &csf_info->prfcnt_info); ++ ++ /* The clock domain counts should not exceed the number of maximum ++ * number of clock regulators. ++ */ ++ if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) ++ return -EIO; ++ ++ gpu_info.l2_count = csf_info->prfcnt_info.l2_count; ++ gpu_info.core_mask = csf_info->prfcnt_info.core_mask; ++ gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; ++ gpu_info.prfcnt_values_per_block = ++ csf_info->prfcnt_info.prfcnt_block_size / ++ KBASE_HWCNT_VALUE_BYTES; ++ errcode = kbase_hwcnt_csf_metadata_create( ++ &gpu_info, csf_info->counter_set, &csf_info->metadata); ++ if (errcode) ++ return errcode; ++ ++ /* ++ * Dump abstraction size should be exactly the same size and layout as ++ * the physical dump size, for backwards compatibility. ++ */ ++ WARN_ON(csf_info->prfcnt_info.dump_bytes != ++ csf_info->metadata->dump_buf_bytes); ++ ++ return 0; ++} ++ ++void kbase_hwcnt_backend_csf_metadata_term( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ struct kbase_hwcnt_backend_csf_info *csf_info; ++ ++ if (!iface) ++ return; ++ ++ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; ++ if (csf_info->metadata) { ++ kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); ++ csf_info->metadata = NULL; ++ } ++} ++ ++int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, ++ u32 ring_buf_cnt, ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ int errcode; ++ const struct kbase_hwcnt_backend_csf_info *info = NULL; ++ ++ if (!iface || !csf_if) ++ return -EINVAL; ++ ++ /* The buffer count must be power of 2 */ ++ if (!is_power_of_2(ring_buf_cnt)) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, ++ &info); ++ if (errcode) ++ return errcode; ++ ++ iface->info = (struct kbase_hwcnt_backend_info *)info; ++ iface->metadata = kbasep_hwcnt_backend_csf_metadata; ++ iface->init = kbasep_hwcnt_backend_csf_init; ++ iface->term = kbasep_hwcnt_backend_csf_term; ++ iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; ++ iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; ++ iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; ++ iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable; ++ iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear; ++ iface->dump_request = kbasep_hwcnt_backend_csf_dump_request; ++ iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait; ++ iface->dump_get = kbasep_hwcnt_backend_csf_dump_get; ++ ++ return 0; ++} ++ ++void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) ++{ ++ if (!iface) ++ return; ++ ++ kbasep_hwcnt_backend_csf_info_destroy( ++ (const struct kbase_hwcnt_backend_csf_info *)iface->info); ++ memset(iface, 0, sizeof(*iface)); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h +new file mode 100644 +index 0000000..ce1af9a +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h +@@ -0,0 +1,162 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/** ++ * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF ++ * backend. ++ */ ++ ++#ifndef _KBASE_HWCNT_BACKEND_CSF_H_ ++#define _KBASE_HWCNT_BACKEND_CSF_H_ ++ ++#include "mali_kbase_hwcnt_backend.h" ++#include "mali_kbase_hwcnt_backend_csf_if.h" ++ ++/** ++ * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend ++ * interface. ++ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure ++ * used to create backend interface. ++ * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring ++ * buffer, MUST be power of 2. ++ * @iface: Non-NULL pointer to backend interface structure that is filled ++ * in on creation success. ++ * ++ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, ++ u32 ring_buf_cnt, ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF ++ * hardware counter backend. ++ * @iface: Non-NULL pointer to backend interface structure ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_backend_csf_metadata_init( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF ++ * hardware counter backend. ++ * @iface: Non-NULL pointer to backend interface structure. ++ */ ++void kbase_hwcnt_backend_csf_metadata_term( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend ++ * interface. ++ * @iface: Pointer to interface to destroy. ++ * ++ * Can be safely called on an all-zeroed interface, or on an already destroyed ++ * interface. ++ */ ++void kbase_hwcnt_backend_csf_destroy( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive ++ * notification that protected mode ++ * has been entered. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_protm_entered( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive ++ * notification that protected mode has ++ * been exited. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_protm_exited( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function ++ * called when unrecoverable ++ * errors are detected. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ * ++ * This should be called on encountering errors that can only be recovered from ++ * with reset, or that may put HWC logic in state that could result in hang. For ++ * example, on bus error, or when FW becomes unresponsive. ++ */ ++void kbase_hwcnt_backend_csf_on_unrecoverable_error( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be ++ * called immediately before a ++ * reset. Takes us out of the ++ * unrecoverable error state, if we ++ * were in it. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_on_before_reset( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample ++ * complete interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_on_prfcnt_sample( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter ++ * buffer reach threshold ++ * interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_on_prfcnt_threshold( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer ++ * overflow interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_on_prfcnt_overflow( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled ++ * interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_on_prfcnt_enable( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++/** ++ * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter ++ * disabled interrupt handler. ++ * @iface: Non-NULL pointer to HWC backend interface. ++ */ ++void kbase_hwcnt_backend_csf_on_prfcnt_disable( ++ struct kbase_hwcnt_backend_interface *iface); ++ ++#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h +new file mode 100644 +index 0000000..f6387c2 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h +@@ -0,0 +1,311 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * Virtual interface for CSF hardware counter backend. ++ */ ++ ++#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_ ++#define _KBASE_HWCNT_BACKEND_CSF_IF_H_ ++ ++#include ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if_ctx - Opaque pointer to a CSF interface ++ * context. ++ */ ++struct kbase_hwcnt_backend_csf_if_ctx; ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if_ring_buf - Opaque pointer to a CSF ++ * interface ring buffer. ++ */ ++struct kbase_hwcnt_backend_csf_if_ring_buf; ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection ++ * structure. ++ * @fe_bm: Front End counters selection bitmask. ++ * @shader_bm: Shader counters selection bitmask. ++ * @tiler_bm: Tiler counters selection bitmask. ++ * @mmu_l2_bm: MMU_L2 counters selection bitmask. ++ * @counter_set: The performance counter set to enable. ++ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle ++ * counter for a given clock domain. ++ */ ++struct kbase_hwcnt_backend_csf_if_enable { ++ u32 fe_bm; ++ u32 shader_bm; ++ u32 tiler_bm; ++ u32 mmu_l2_bm; ++ u8 counter_set; ++ u64 clk_enable_map; ++}; ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter ++ * information. ++ * @dump_bytes: Bytes of GPU memory required to perform a performance ++ * counter dump. ++ * @prfcnt_block_size Bytes of each performance counter block. ++ * @l2_count: The MMU L2 cache count. ++ * @core_mask: Shader core mask. ++ * @clk_cnt: Clock domain count in the system. ++ * @clearing_samples: Indicates whether counters are cleared after each sample ++ * is taken. ++ */ ++struct kbase_hwcnt_backend_csf_if_prfcnt_info { ++ size_t dump_bytes; ++ size_t prfcnt_block_size; ++ size_t l2_count; ++ u64 core_mask; ++ u8 clk_cnt; ++ bool clearing_samples; ++}; ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the ++ * backend spinlock is ++ * held. ++ * @ctx: Non-NULL pointer to a CSF context. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. ++ * ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @flags: Pointer to the memory location that would store the previous ++ * interrupt state. ++ */ ++typedef void ++kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long *flags); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. ++ * ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @flags: Previously stored interrupt state when Scheduler interrupt ++ * spinlock was acquired. ++ */ ++typedef void ++kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long flags); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance ++ * counter information. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @prfcnt_info: Non-NULL pointer to struct where performance counter ++ * information should be stored. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer ++ * for CSF interface. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @buf_count: The buffer count in the ring buffer to be allocated, ++ * MUST be power of 2. ++ * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is ++ * stored when success. ++ * @ring_buf: Non-NULL pointer to where ring buffer is stored when success. ++ * ++ * A ring buffer is needed by the CSF interface to do manual HWC sample and ++ * automatic HWC samples, the buffer count in the ring buffer MUST be power ++ * of 2 to meet the hardware requirement. ++ * ++ * Return: 0 on success, else error code. ++ */ ++typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, ++ void **cpu_dump_base, ++ struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers ++ * memory. ++ * @ctx: Non-NULL pointer to a CSF context. ++ * @ring_buf: Non-NULL pointer to the ring buffer. ++ * @buf_index_first: The first buffer index in the ring buffer to be synced, ++ * inclusive. ++ * @buf_index_last: The last buffer index in the ring buffer to be synced, ++ * exclusive. ++ * @for_cpu: The direction of sync to be applied, set to true when CPU ++ * cache needs invalidating before reading the buffer, and set ++ * to false after CPU writes to flush these before this memory ++ * is overwritten by the GPU. ++ * ++ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU ++ * are correctly observed. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ u32 buf_index_first, u32 buf_index_last, bool for_cpu); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for ++ * the CSF interface. ++ * ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current ++ * timestamp of the CSF ++ * interface. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * ++ * Return: CSF interface timestamp in nanoseconds. ++ */ ++typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware ++ * counter in CSF interface. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. ++ * @enable: Non-NULL pointer to the enable map of HWC. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ struct kbase_hwcnt_backend_csf_if_enable *enable); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter ++ * in CSF interface. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. ++ * ++ * @ctx: Non-NULL pointer to the interface context. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_dump_request_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and ++ * insert indexes of the ++ * ring buffer. ++ * ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @extract_index: Non-NULL pointer where current extract index to be saved. ++ * @insert_index: Non-NULL pointer where current insert index to be saved. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, ++ u32 *insert_index); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract ++ * index of the ring ++ * buffer. ++ * ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @extract_index: New extract index to be set. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); ++ ++/** ++ * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current ++ * GPU cycle count. ++ * @ctx: Non-NULL pointer to a CSF interface context. ++ * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, ++ * the array size should be at least as big as the number of ++ * clock domains returned by get_prfcnt_info interface. ++ * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock ++ * domain. ++ * ++ * Requires lock to be taken before calling. ++ */ ++typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, ++ u64 clk_enable_map); ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual ++ * interface. ++ * @ctx: CSF interface context. ++ * @assert_lock_held: Function ptr to assert backend spinlock is held. ++ * @lock: Function ptr to acquire backend spinlock. ++ * @unlock: Function ptr to release backend spinlock. ++ * @get_prfcnt_info: Function ptr to get performance counter related ++ * information. ++ * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. ++ * @ring_buf_sync: Function ptr to sync ring buffer to CPU. ++ * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. ++ * @timestamp_ns: Function ptr to get the current CSF interface ++ * timestamp. ++ * @dump_enable: Function ptr to enable dumping. ++ * @dump_enable_nolock: Function ptr to enable dumping while the ++ * backend-specific spinlock is already held. ++ * @dump_disable: Function ptr to disable dumping. ++ * @dump_request: Function ptr to request a dump. ++ * @get_indexes: Function ptr to get extract and insert indexes of the ++ * ring buffer. ++ * @set_extract_index: Function ptr to set extract index of ring buffer. ++ * @get_gpu_cycle_count: Function ptr to get the GPU cycle count. ++ */ ++struct kbase_hwcnt_backend_csf_if { ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx; ++ kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; ++ kbase_hwcnt_backend_csf_if_lock_fn *lock; ++ kbase_hwcnt_backend_csf_if_unlock_fn *unlock; ++ kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; ++ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; ++ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; ++ kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; ++ kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; ++ kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; ++ kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; ++ kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; ++ kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; ++ kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; ++ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; ++}; ++ ++#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c +new file mode 100644 +index 0000000..979299f +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c +@@ -0,0 +1,787 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * CSF GPU HWC backend firmware interface APIs. ++ */ ++ ++#include ++#include ++#include ++#include "mali_kbase_hwcnt_gpu.h" ++#include "mali_kbase_hwcnt_types.h" ++#include ++ ++#include "csf/mali_kbase_csf_firmware.h" ++#include "mali_kbase_hwcnt_backend_csf_if_fw.h" ++#include "mali_kbase_hwaccess_time.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ ++#include ++#include "mali_kbase_ccswe.h" ++ ++ ++/** The number of nanoseconds in a second. */ ++#define NSECS_IN_SEC 1000000000ull /* ns */ ++ ++/* Ring buffer virtual address start at 4GB */ ++#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface ++ * used to save the manual and ++ * auto HWC samples from ++ * firmware. ++ * @gpu_dump_base: Starting GPU base address of the ring buffer. ++ * @cpu_dump_base: Starting CPU address for the mapping. ++ * @buf_count: Buffer count in the ring buffer, MUST be power of 2. ++ * @as_nr: Address space number for the memory mapping. ++ * @phys: Physical memory allocation used by the mapping. ++ * @num_pages: Size of the mapping, in memory pages. ++ */ ++struct kbase_hwcnt_backend_csf_if_fw_ring_buf { ++ u64 gpu_dump_base; ++ void *cpu_dump_base; ++ size_t buf_count; ++ u32 as_nr; ++ struct tagged_addr *phys; ++ size_t num_pages; ++}; ++ ++/** ++ * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF ++ * interface, used to communicate ++ * with firmware. ++ * @kbdev: KBase device. ++ * @buf_bytes: The size in bytes for each buffer in the ring buffer. ++ * @clk_cnt: The number of clock domains in the system. ++ * The maximum is 64. ++ * @clk_enable_map: Bitmask of enabled clocks ++ * @rate_listener: Clock rate listener callback state. ++ * @ccswe_shader_cores: Shader cores cycle count software estimator. ++ */ ++struct kbase_hwcnt_backend_csf_if_fw_ctx { ++ struct kbase_device *kbdev; ++ size_t buf_bytes; ++ u8 clk_cnt; ++ u64 clk_enable_map; ++ struct kbase_clk_rate_listener rate_listener; ++ struct kbase_ccswe ccswe_shader_cores; ++}; ++ ++static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; ++ ++ WARN_ON(!ctx); ++ ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++} ++ ++static void ++kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ unsigned long *flags) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; ++ ++ WARN_ON(!ctx); ++ ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; ++ ++ kbase_csf_scheduler_spin_lock(kbdev, flags); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_unlock( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; ++ ++ WARN_ON(!ctx); ++ ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; ++ ++ kbase_csf_scheduler_spin_lock_assert_held(kbdev); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback ++ * ++ * @rate_listener: Callback state ++ * @clk_index: Clock index ++ * @clk_rate_hz: Clock frequency(hz) ++ */ ++static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change( ++ struct kbase_clk_rate_listener *rate_listener, u32 clk_index, ++ u32 clk_rate_hz) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ container_of(rate_listener, ++ struct kbase_hwcnt_backend_csf_if_fw_ctx, ++ rate_listener); ++ u64 timestamp_ns; ++ ++ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) ++ return; ++ ++ timestamp_ns = ktime_get_raw_ns(); ++ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, ++ clk_rate_hz); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking ++ * ++ * @fw_ctx: Non-NULL pointer to CSF firmware interface context. ++ * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ */ ++static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map) ++{ ++ struct kbase_device *kbdev = fw_ctx->kbdev; ++ ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { ++ /* software estimation for non-top clock domains */ ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ const struct kbase_clk_data *clk_data = ++ rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; ++ u32 cur_freq; ++ unsigned long flags; ++ u64 timestamp_ns; ++ ++ timestamp_ns = ktime_get_raw_ns(); ++ ++ spin_lock_irqsave(&rtm->lock, flags); ++ ++ cur_freq = (u32)clk_data->clock_val; ++ kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); ++ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, ++ timestamp_ns, cur_freq); ++ ++ kbase_clk_rate_trace_manager_subscribe_no_lock( ++ rtm, &fw_ctx->rate_listener); ++ ++ spin_unlock_irqrestore(&rtm->lock, flags); ++ } ++ ++ fw_ctx->clk_enable_map = clk_enable_map; ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking ++ * ++ * @fw_ctx: Non-NULL pointer to CSF firmware interface context. ++ */ ++static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) ++{ ++ struct kbase_device *kbdev = fw_ctx->kbdev; ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ u64 clk_enable_map = fw_ctx->clk_enable_map; ++ ++ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, ++ KBASE_CLOCK_DOMAIN_SHADER_CORES)) ++ kbase_clk_rate_trace_manager_unsubscribe( ++ rtm, &fw_ctx->rate_listener); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; ++ struct kbase_device *kbdev; ++ u32 prfcnt_size; ++ u32 prfcnt_hw_size = 0; ++ u32 prfcnt_fw_size = 0; ++ u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * ++ KBASE_HWCNT_VALUE_BYTES; ++ ++ WARN_ON(!ctx); ++ WARN_ON(!prfcnt_info); ++ ++ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ kbdev = fw_ctx->kbdev; ++ prfcnt_size = kbdev->csf.global_iface.prfcnt_size; ++ prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; ++ prfcnt_fw_size = (prfcnt_size >> 16) << 8; ++ fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; ++ ++ ++ prfcnt_info->dump_bytes = fw_ctx->buf_bytes; ++ prfcnt_info->prfcnt_block_size = prfcnt_block_size; ++ prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; ++ prfcnt_info->core_mask = ++ kbdev->gpu_props.props.coherency_info.group[0].core_mask; ++ ++ prfcnt_info->clk_cnt = fw_ctx->clk_cnt; ++ prfcnt_info->clearing_samples = true; ++ ++ /* Block size must be multiple of counter size. */ ++ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_BYTES) != ++ 0); ++ /* Total size must be multiple of block size. */ ++ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != ++ 0); ++} ++ ++static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, ++ void **cpu_dump_base, ++ struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) ++{ ++ struct kbase_device *kbdev; ++ struct tagged_addr *phys; ++ struct page **page_list; ++ void *cpu_addr; ++ int ret; ++ int i; ++ size_t num_pages; ++ u64 flags; ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; ++ ++ pgprot_t cpu_map_prot = PAGE_KERNEL; ++ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; ++ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ ++ WARN_ON(!ctx); ++ WARN_ON(!cpu_dump_base); ++ WARN_ON(!out_ring_buf); ++ ++ kbdev = fw_ctx->kbdev; ++ ++ /* The buffer count must be power of 2 */ ++ if (!is_power_of_2(buf_count)) ++ return -EINVAL; ++ ++ /* alignment failure */ ++ if (gpu_va_base & (2048 - 1)) ++ return -EINVAL; ++ ++ fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); ++ if (!fw_ring_buf) ++ return -ENOMEM; ++ ++ num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); ++ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); ++ if (!phys) ++ goto phys_alloc_error; ++ ++ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); ++ if (!page_list) ++ goto page_list_alloc_error; ++ ++ /* Get physical page for the buffer */ ++ ret = kbase_mem_pool_alloc_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, ++ phys, false); ++ if (ret != num_pages) ++ goto phys_mem_pool_alloc_error; ++ ++ /* Get the CPU virtual address */ ++ for (i = 0; i < num_pages; i++) ++ page_list[i] = as_page(phys[i]); ++ ++ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); ++ if (!cpu_addr) ++ goto vmap_error; ++ ++ flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); ++ ++ /* Update MMU table */ ++ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, ++ gpu_va_base >> PAGE_SHIFT, phys, num_pages, ++ flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); ++ if (ret) ++ goto mmu_insert_failed; ++ ++ kfree(page_list); ++ ++ fw_ring_buf->gpu_dump_base = gpu_va_base; ++ fw_ring_buf->cpu_dump_base = cpu_addr; ++ fw_ring_buf->phys = phys; ++ fw_ring_buf->num_pages = num_pages; ++ fw_ring_buf->buf_count = buf_count; ++ fw_ring_buf->as_nr = MCU_AS_NR; ++ ++ *cpu_dump_base = fw_ring_buf->cpu_dump_base; ++ *out_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; ++ ++ ++ return 0; ++ ++mmu_insert_failed: ++ vunmap(cpu_addr); ++vmap_error: ++ kbase_mem_pool_free_pages( ++ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, ++ phys, false, false); ++phys_mem_pool_alloc_error: ++ kfree(page_list); ++page_list_alloc_error: ++ kfree(phys); ++phys_alloc_error: ++ kfree(fw_ring_buf); ++ return -ENOMEM; ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ u32 buf_index_first, u32 buf_index_last, bool for_cpu) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ size_t i; ++ size_t pg_first; ++ size_t pg_last; ++ u64 start_address; ++ u64 stop_address; ++ u32 ring_buf_index_first; ++ u32 ring_buf_index_last; ++ ++ WARN_ON(!ctx); ++ WARN_ON(!ring_buf); ++ ++ /* The index arguments for this function form an inclusive, exclusive ++ * range. ++ * However, when masking back to the available buffers we will make this ++ * inclusive at both ends so full flushes are not 0 -> 0. ++ */ ++ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); ++ ring_buf_index_last = ++ (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); ++ ++ /* The start address is the offset of the first buffer. */ ++ start_address = fw_ctx->buf_bytes * ring_buf_index_first; ++ pg_first = start_address >> PAGE_SHIFT; ++ ++ /* The stop address is the last byte in the final buffer. */ ++ stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; ++ pg_last = stop_address >> PAGE_SHIFT; ++ ++ /* Check whether the buffer range wraps. */ ++ if (start_address > stop_address) { ++ /* sync the first part to the end of ring buffer. */ ++ for (i = pg_first; i < fw_ring_buf->num_pages; i++) { ++ struct page *pg = as_page(fw_ring_buf->phys[i]); ++ ++ if (for_cpu) { ++ kbase_sync_single_for_cpu(fw_ctx->kbdev, ++ kbase_dma_addr(pg), ++ PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ } else { ++ kbase_sync_single_for_device(fw_ctx->kbdev, ++ kbase_dma_addr(pg), ++ PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ } ++ } ++ ++ /* second part starts from page 0. */ ++ pg_first = 0; ++ } ++ ++ for (i = pg_first; i <= pg_last; i++) { ++ struct page *pg = as_page(fw_ring_buf->phys[i]); ++ ++ if (for_cpu) { ++ kbase_sync_single_for_cpu(fw_ctx->kbdev, ++ kbase_dma_addr(pg), PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ } else { ++ kbase_sync_single_for_device(fw_ctx->kbdev, ++ kbase_dma_addr(pg), ++ PAGE_SIZE, ++ DMA_BIDIRECTIONAL); ++ } ++ } ++} ++ ++static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++{ ++ CSTD_UNUSED(ctx); ++ return ktime_get_raw_ns(); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ ++ if (!fw_ring_buf) ++ return; ++ ++ if (fw_ring_buf->phys) { ++ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; ++ ++ WARN_ON(kbase_mmu_teardown_pages( ++ fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, ++ gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages, ++ MCU_AS_NR)); ++ ++ vunmap(fw_ring_buf->cpu_dump_base); ++ ++ kbase_mem_pool_free_pages( ++ &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], ++ fw_ring_buf->num_pages, fw_ring_buf->phys, false, ++ false); ++ ++ kfree(fw_ring_buf->phys); ++ ++ kfree(fw_ring_buf); ++ } ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, ++ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, ++ struct kbase_hwcnt_backend_csf_if_enable *enable) ++{ ++ u32 prfcnt_config; ++ struct kbase_device *kbdev; ++ struct kbase_csf_global_iface *global_iface; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = ++ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; ++ ++ WARN_ON(!ctx); ++ WARN_ON(!ring_buf); ++ WARN_ON(!enable); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ kbdev = fw_ctx->kbdev; ++ global_iface = &kbdev->csf.global_iface; ++ ++ /* Configure */ ++ prfcnt_config = fw_ring_buf->buf_count; ++ prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; ++ ++ /* Configure the ring buffer base address */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, ++ fw_ring_buf->as_nr); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, ++ fw_ring_buf->gpu_dump_base & U32_MAX); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, ++ fw_ring_buf->gpu_dump_base >> 32); ++ ++ /* Set extract position to 0 */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); ++ ++ /* Configure the enable bitmap */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, ++ enable->fe_bm); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, ++ enable->shader_bm); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, ++ enable->mmu_l2_bm); ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, ++ enable->tiler_bm); ++ ++ /* Configure the HWC set and buffer size */ ++ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, ++ prfcnt_config); ++ ++ kbdev->csf.hwcnt.enable_pending = true; ++ ++ /* Unmask the interrupts */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, ++ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); ++ ++ /* Enable the HWC */ ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, ++ (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), ++ GLB_REQ_PRFCNT_ENABLE_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, ++ GLB_PRFCNT_CONFIG); ++ ++ kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, ++ enable->clk_enable_map); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++{ ++ struct kbase_device *kbdev; ++ struct kbase_csf_global_iface *global_iface; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ ++ WARN_ON(!ctx); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ kbdev = fw_ctx->kbdev; ++ global_iface = &kbdev->csf.global_iface; ++ ++ /* Disable the HWC */ ++ kbdev->csf.hwcnt.enable_pending = true; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, ++ GLB_REQ_PRFCNT_ENABLE_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++ ++ /* mask the interrupts */ ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, 0, ++ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, 0, ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); ++ kbase_csf_firmware_global_input_mask( ++ global_iface, GLB_ACK_IRQ_MASK, 0, ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); ++ ++ /* In case we have a previous request in flight when the disable ++ * happens. ++ */ ++ kbdev->csf.hwcnt.request_pending = false; ++ ++ kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_dump_request( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx) ++{ ++ u32 glb_req; ++ struct kbase_device *kbdev; ++ struct kbase_csf_global_iface *global_iface; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ ++ WARN_ON(!ctx); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ kbdev = fw_ctx->kbdev; ++ global_iface = &kbdev->csf.global_iface; ++ ++ /* Trigger dumping */ ++ kbdev->csf.hwcnt.request_pending = true; ++ glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); ++ glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; ++ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, ++ GLB_REQ_PRFCNT_SAMPLE_MASK); ++ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, ++ u32 *insert_index) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ ++ WARN_ON(!ctx); ++ WARN_ON(!extract_index); ++ WARN_ON(!insert_index); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ *extract_index = kbase_csf_firmware_global_input_read( ++ &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); ++ *insert_index = kbase_csf_firmware_global_output( ++ &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ ++ WARN_ON(!ctx); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ /* Set the raw extract index to release the buffer back to the ring ++ * buffer. ++ */ ++ kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, ++ GLB_PRFCNT_EXTRACT, extract_idx); ++} ++ ++static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( ++ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, ++ u64 clk_enable_map) ++{ ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; ++ u8 clk; ++ u64 timestamp_ns = ktime_get_raw_ns(); ++ ++ WARN_ON(!ctx); ++ WARN_ON(!cycle_counts); ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); ++ ++ for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { ++ if (!(clk_enable_map & (1ull << clk))) ++ continue; ++ ++ if (clk == KBASE_CLOCK_DOMAIN_TOP) { ++ /* Read cycle count for top clock domain. */ ++ kbase_backend_get_gpu_time_norequest( ++ fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL); ++ } else { ++ /* Estimate cycle count for non-top clock domain. */ ++ cycle_counts[clk] = kbase_ccswe_cycle_at( ++ &fw_ctx->ccswe_shader_cores, timestamp_ns); ++ } ++ } ++} ++ ++/** ++ * kbasep_hwcnt_backedn_csf_if_fw_cts_destroy() - Destroy a CSF FW interface context. ++ * ++ * @fw_ctx: Pointer to context to destroy. ++ */ ++static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) ++{ ++ if (!fw_ctx) ++ return; ++ ++ kfree(fw_ctx); ++} ++ ++/** ++ * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. ++ * ++ * @kbdev: Non_NULL pointer to kbase device. ++ * @out_ctx: Non-NULL pointer to where info is stored on success. ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_csf_if_fw_ctx_create( ++ struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) ++{ ++ u8 clk; ++ int errcode = -ENOMEM; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; ++ ++ WARN_ON(!kbdev); ++ WARN_ON(!out_ctx); ++ ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ goto error; ++ ++ ctx->kbdev = kbdev; ++ ++ /* Determine the number of available clock domains. */ ++ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { ++ if (kbdev->pm.clk_rtm.clks[clk] == NULL) ++ break; ++ } ++ ctx->clk_cnt = clk; ++ ++ ctx->clk_enable_map = 0; ++ kbase_ccswe_init(&ctx->ccswe_shader_cores); ++ ctx->rate_listener.notify = ++ kbasep_hwcnt_backend_csf_if_fw_on_freq_change; ++ ++ *out_ctx = ctx; ++ ++ return 0; ++error: ++ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); ++ return errcode; ++} ++ ++void kbase_hwcnt_backend_csf_if_fw_destroy( ++ struct kbase_hwcnt_backend_csf_if *if_fw) ++{ ++ if (!if_fw) ++ return; ++ ++ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( ++ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); ++ memset(if_fw, 0, sizeof(*if_fw)); ++} ++ ++int kbase_hwcnt_backend_csf_if_fw_create( ++ struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw) ++{ ++ int errcode; ++ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; ++ ++ if (!kbdev || !if_fw) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); ++ if (errcode) ++ return errcode; ++ ++ if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; ++ if_fw->assert_lock_held = ++ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; ++ if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; ++ if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; ++ if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; ++ if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; ++ if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; ++ if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; ++ if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; ++ if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; ++ if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; ++ if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; ++ if_fw->get_gpu_cycle_count = ++ kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; ++ if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; ++ if_fw->set_extract_index = ++ kbasep_hwcnt_backend_csf_if_fw_set_extract_index; ++ ++ return 0; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h +new file mode 100644 +index 0000000..b69668b +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h +@@ -0,0 +1,50 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW ++ */ ++ ++#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ ++#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ ++ ++#include "mali_kbase_hwcnt_backend_csf_if.h" ++ ++/** ++ * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface ++ * of hardware counter backend. ++ * @kbdev: Non-NULL pointer to Kbase device. ++ * @if_fw: Non-NULL pointer to backend interface structure that is filled in on ++ * creation success. ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_backend_csf_if_fw_create( ++ struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw); ++ ++/** ++ * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of ++ * hardware counter backend. ++ * @if_fw: Pointer to a CSF interface to destroy. ++ */ ++void kbase_hwcnt_backend_csf_if_fw_destroy( ++ struct kbase_hwcnt_backend_csf_if *if_fw); ++ ++#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c +deleted file mode 100644 +index 407c768..0000000 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c ++++ /dev/null +@@ -1,510 +0,0 @@ +-/* +- * +- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- */ +- +-#include "mali_kbase_hwcnt_backend_gpu.h" +-#include "mali_kbase_hwcnt_gpu.h" +-#include "mali_kbase_hwcnt_types.h" +-#include "mali_kbase.h" +-#include "mali_kbase_pm_ca.h" +-#include "mali_kbase_hwaccess_instr.h" +-#ifdef CONFIG_MALI_NO_MALI +-#include "backend/gpu/mali_kbase_model_dummy.h" +-#endif +- +- +-/** +- * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance +- * of a GPU hardware counter backend. +- * @kbdev: KBase device. +- * @use_secondary: True if secondary performance counters should be used, +- * else false. Ignored if secondary counters are not supported. +- * @metadata: Hardware counter metadata. +- * @dump_bytes: Bytes of GPU memory required to perform a +- * hardware counter dump. +- */ +-struct kbase_hwcnt_backend_gpu_info { +- struct kbase_device *kbdev; +- bool use_secondary; +- const struct kbase_hwcnt_metadata *metadata; +- size_t dump_bytes; +-}; +- +-/** +- * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend. +- * @info: Info used to create the backend. +- * @kctx: KBase context used for GPU memory allocation and +- * counter dumping. +- * @gpu_dump_va: GPU hardware counter dump buffer virtual address. +- * @cpu_dump_va: CPU mapping of gpu_dump_va. +- * @vmap: Dump buffer vmap. +- * @enabled: True if dumping has been enabled, else false. +- * @pm_core_mask: PM state sync-ed shaders core mask for the enabled dumping. +- */ +-struct kbase_hwcnt_backend_gpu { +- const struct kbase_hwcnt_backend_gpu_info *info; +- struct kbase_context *kctx; +- u64 gpu_dump_va; +- void *cpu_dump_va; +- struct kbase_vmap_struct *vmap; +- bool enabled; +- u64 pm_core_mask; +-}; +- +-/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +-static u64 kbasep_hwcnt_backend_gpu_timestamp_ns( +- struct kbase_hwcnt_backend *backend) +-{ +- (void)backend; +- return ktime_get_raw_ns(); +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +-static int kbasep_hwcnt_backend_gpu_dump_enable_nolock( +- struct kbase_hwcnt_backend *backend, +- const struct kbase_hwcnt_enable_map *enable_map) +-{ +- int errcode; +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- struct kbase_context *kctx; +- struct kbase_device *kbdev; +- struct kbase_hwcnt_physical_enable_map phys; +- struct kbase_instr_hwcnt_enable enable; +- +- if (!backend_gpu || !enable_map || backend_gpu->enabled || +- (enable_map->metadata != backend_gpu->info->metadata)) +- return -EINVAL; +- +- kctx = backend_gpu->kctx; +- kbdev = backend_gpu->kctx->kbdev; +- +- lockdep_assert_held(&kbdev->hwaccess_lock); +- +- kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map); +- +- enable.jm_bm = phys.jm_bm; +- enable.shader_bm = phys.shader_bm; +- enable.tiler_bm = phys.tiler_bm; +- enable.mmu_l2_bm = phys.mmu_l2_bm; +- enable.use_secondary = backend_gpu->info->use_secondary; +- enable.dump_buffer = backend_gpu->gpu_dump_va; +- enable.dump_buffer_bytes = backend_gpu->info->dump_bytes; +- +- errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); +- if (errcode) +- goto error; +- +- backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); +- backend_gpu->enabled = true; +- +- return 0; +-error: +- return errcode; +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +-static int kbasep_hwcnt_backend_gpu_dump_enable( +- struct kbase_hwcnt_backend *backend, +- const struct kbase_hwcnt_enable_map *enable_map) +-{ +- unsigned long flags; +- int errcode; +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- struct kbase_device *kbdev; +- +- if (!backend_gpu) +- return -EINVAL; +- +- kbdev = backend_gpu->kctx->kbdev; +- +- spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- +- errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock( +- backend, enable_map); +- +- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +- +- return errcode; +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +-static void kbasep_hwcnt_backend_gpu_dump_disable( +- struct kbase_hwcnt_backend *backend) +-{ +- int errcode; +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- +- if (WARN_ON(!backend_gpu) || !backend_gpu->enabled) +- return; +- +- errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx); +- WARN_ON(errcode); +- +- backend_gpu->enabled = false; +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +-static int kbasep_hwcnt_backend_gpu_dump_clear( +- struct kbase_hwcnt_backend *backend) +-{ +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- +- if (!backend_gpu || !backend_gpu->enabled) +- return -EINVAL; +- +- return kbase_instr_hwcnt_clear(backend_gpu->kctx); +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */ +-static int kbasep_hwcnt_backend_gpu_dump_request( +- struct kbase_hwcnt_backend *backend) +-{ +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- +- if (!backend_gpu || !backend_gpu->enabled) +- return -EINVAL; +- +- return kbase_instr_hwcnt_request_dump(backend_gpu->kctx); +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +-static int kbasep_hwcnt_backend_gpu_dump_wait( +- struct kbase_hwcnt_backend *backend) +-{ +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- +- if (!backend_gpu || !backend_gpu->enabled) +- return -EINVAL; +- +- return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx); +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */ +-static int kbasep_hwcnt_backend_gpu_dump_get( +- struct kbase_hwcnt_backend *backend, +- struct kbase_hwcnt_dump_buffer *dst, +- const struct kbase_hwcnt_enable_map *dst_enable_map, +- bool accumulate) +-{ +- struct kbase_hwcnt_backend_gpu *backend_gpu = +- (struct kbase_hwcnt_backend_gpu *)backend; +- +- if (!backend_gpu || !dst || !dst_enable_map || +- (backend_gpu->info->metadata != dst->metadata) || +- (dst_enable_map->metadata != dst->metadata)) +- return -EINVAL; +- +- /* Invalidate the kernel buffer before reading from it. */ +- kbase_sync_mem_regions( +- backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU); +- +- return kbase_hwcnt_gpu_dump_get( +- dst, backend_gpu->cpu_dump_va, dst_enable_map, +- backend_gpu->pm_core_mask, accumulate); +-} +- +-/** +- * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer. +- * @info: Non-NULL pointer to GPU backend info. +- * @kctx: Non-NULL pointer to kbase context. +- * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address +- * is stored on success. +- * +- * Return: 0 on success, else error code. +- */ +-static int kbasep_hwcnt_backend_gpu_dump_alloc( +- const struct kbase_hwcnt_backend_gpu_info *info, +- struct kbase_context *kctx, +- u64 *gpu_dump_va) +-{ +- struct kbase_va_region *reg; +- u64 flags; +- u64 nr_pages; +- +- WARN_ON(!info); +- WARN_ON(!kctx); +- WARN_ON(!gpu_dump_va); +- +- flags = BASE_MEM_PROT_CPU_RD | +- BASE_MEM_PROT_GPU_WR | +- BASEP_MEM_PERMANENT_KERNEL_MAPPING | +- BASE_MEM_CACHED_CPU; +- +- if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) +- flags |= BASE_MEM_UNCACHED_GPU; +- +- nr_pages = PFN_UP(info->dump_bytes); +- +- reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); +- +- if (!reg) +- return -ENOMEM; +- +- return 0; +-} +- +-/** +- * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer. +- * @kctx: Non-NULL pointer to kbase context. +- * @gpu_dump_va: GPU dump buffer virtual address. +- */ +-static void kbasep_hwcnt_backend_gpu_dump_free( +- struct kbase_context *kctx, +- u64 gpu_dump_va) +-{ +- WARN_ON(!kctx); +- if (gpu_dump_va) +- kbase_mem_free(kctx, gpu_dump_va); +-} +- +-/** +- * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend. +- * @backend: Pointer to GPU backend to destroy. +- * +- * Can be safely called on a backend in any state of partial construction. +- */ +-static void kbasep_hwcnt_backend_gpu_destroy( +- struct kbase_hwcnt_backend_gpu *backend) +-{ +- if (!backend) +- return; +- +- if (backend->kctx) { +- struct kbase_context *kctx = backend->kctx; +- struct kbase_device *kbdev = kctx->kbdev; +- +- if (backend->cpu_dump_va) +- kbase_phy_alloc_mapping_put(kctx, backend->vmap); +- +- if (backend->gpu_dump_va) +- kbasep_hwcnt_backend_gpu_dump_free( +- kctx, backend->gpu_dump_va); +- +- kbasep_js_release_privileged_ctx(kbdev, kctx); +- kbase_destroy_context(kctx); +- } +- +- kfree(backend); +-} +- +-/** +- * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend. +- * @info: Non-NULL pointer to backend info. +- * @out_backend: Non-NULL pointer to where backend is stored on success. +- * +- * Return: 0 on success, else error code. +- */ +-static int kbasep_hwcnt_backend_gpu_create( +- const struct kbase_hwcnt_backend_gpu_info *info, +- struct kbase_hwcnt_backend_gpu **out_backend) +-{ +- +- int errcode; +- struct kbase_device *kbdev; +- struct kbase_hwcnt_backend_gpu *backend = NULL; +- +- WARN_ON(!info); +- WARN_ON(!out_backend); +- +- kbdev = info->kbdev; +- +- backend = kzalloc(sizeof(*backend), GFP_KERNEL); +- if (!backend) +- goto alloc_error; +- +- backend->info = info; +- +- backend->kctx = kbase_create_context(kbdev, true, +- BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); +- if (!backend->kctx) +- goto alloc_error; +- +- kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); +- +- errcode = kbasep_hwcnt_backend_gpu_dump_alloc( +- info, backend->kctx, &backend->gpu_dump_va); +- if (errcode) +- goto error; +- +- backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, +- backend->gpu_dump_va, &backend->vmap); +- if (!backend->cpu_dump_va) +- goto alloc_error; +- +-#ifdef CONFIG_MALI_NO_MALI +- /* The dummy model needs the CPU mapping. */ +- gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va); +-#endif +- +- *out_backend = backend; +- return 0; +- +-alloc_error: +- errcode = -ENOMEM; +-error: +- kbasep_hwcnt_backend_gpu_destroy(backend); +- return errcode; +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_init_fn */ +-static int kbasep_hwcnt_backend_gpu_init( +- const struct kbase_hwcnt_backend_info *info, +- struct kbase_hwcnt_backend **out_backend) +-{ +- int errcode; +- struct kbase_hwcnt_backend_gpu *backend = NULL; +- +- if (!info || !out_backend) +- return -EINVAL; +- +- errcode = kbasep_hwcnt_backend_gpu_create( +- (const struct kbase_hwcnt_backend_gpu_info *) info, &backend); +- if (errcode) +- return errcode; +- +- *out_backend = (struct kbase_hwcnt_backend *)backend; +- +- return 0; +-} +- +-/* GPU backend implementation of kbase_hwcnt_backend_term_fn */ +-static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend) +-{ +- if (!backend) +- return; +- +- kbasep_hwcnt_backend_gpu_dump_disable(backend); +- kbasep_hwcnt_backend_gpu_destroy( +- (struct kbase_hwcnt_backend_gpu *)backend); +-} +- +-/** +- * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info. +- * @info: Pointer to info to destroy. +- * +- * Can be safely called on a backend info in any state of partial construction. +- */ +-static void kbasep_hwcnt_backend_gpu_info_destroy( +- const struct kbase_hwcnt_backend_gpu_info *info) +-{ +- if (!info) +- return; +- +- kbase_hwcnt_gpu_metadata_destroy(info->metadata); +- kfree(info); +-} +- +-/** +- * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info. +- * @kbdev: Non_NULL pointer to kbase device. +- * @out_info: Non-NULL pointer to where info is stored on success. +- * +- * Return 0 on success, else error code. +- */ +-static int kbasep_hwcnt_backend_gpu_info_create( +- struct kbase_device *kbdev, +- const struct kbase_hwcnt_backend_gpu_info **out_info) +-{ +- int errcode = -ENOMEM; +- struct kbase_hwcnt_gpu_info hwcnt_gpu_info; +- struct kbase_hwcnt_backend_gpu_info *info = NULL; +- +- WARN_ON(!kbdev); +- WARN_ON(!out_info); +- +- errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info); +- if (errcode) +- return errcode; +- +- info = kzalloc(sizeof(*info), GFP_KERNEL); +- if (!info) +- goto error; +- +- info->kbdev = kbdev; +- +-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY +- info->use_secondary = true; +-#else +- info->use_secondary = false; +-#endif +- +- errcode = kbase_hwcnt_gpu_metadata_create( +- &hwcnt_gpu_info, info->use_secondary, +- &info->metadata, +- &info->dump_bytes); +- if (errcode) +- goto error; +- +- *out_info = info; +- +- return 0; +-error: +- kbasep_hwcnt_backend_gpu_info_destroy(info); +- return errcode; +-} +- +-int kbase_hwcnt_backend_gpu_create( +- struct kbase_device *kbdev, +- struct kbase_hwcnt_backend_interface *iface) +-{ +- int errcode; +- const struct kbase_hwcnt_backend_gpu_info *info = NULL; +- +- if (!kbdev || !iface) +- return -EINVAL; +- +- errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info); +- +- if (errcode) +- return errcode; +- +- iface->metadata = info->metadata; +- iface->info = (struct kbase_hwcnt_backend_info *)info; +- iface->init = kbasep_hwcnt_backend_gpu_init; +- iface->term = kbasep_hwcnt_backend_gpu_term; +- iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns; +- iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable; +- iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock; +- iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable; +- iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear; +- iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request; +- iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait; +- iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get; +- +- return 0; +-} +- +-void kbase_hwcnt_backend_gpu_destroy( +- struct kbase_hwcnt_backend_interface *iface) +-{ +- if (!iface) +- return; +- +- kbasep_hwcnt_backend_gpu_info_destroy( +- (const struct kbase_hwcnt_backend_gpu_info *)iface->info); +- memset(iface, 0, sizeof(*iface)); +-} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c +new file mode 100644 +index 0000000..64001b1 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c +@@ -0,0 +1,793 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include "mali_kbase_hwcnt_backend_jm.h" ++#include "mali_kbase_hwcnt_gpu.h" ++#include "mali_kbase_hwcnt_types.h" ++#include "mali_kbase.h" ++#include "backend/gpu/mali_kbase_pm_ca.h" ++#include "mali_kbase_hwaccess_instr.h" ++#include "mali_kbase_hwaccess_time.h" ++#include "mali_kbase_ccswe.h" ++ ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ ++#include "backend/gpu/mali_kbase_pm_internal.h" ++ ++/** ++ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance ++ * of a JM hardware counter backend. ++ * @kbdev: KBase device. ++ * @counter_set: The performance counter set to use. ++ * @metadata: Hardware counter metadata. ++ * @dump_bytes: Bytes of GPU memory required to perform a ++ * hardware counter dump. ++ */ ++struct kbase_hwcnt_backend_jm_info { ++ struct kbase_device *kbdev; ++ enum kbase_hwcnt_set counter_set; ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t dump_bytes; ++}; ++ ++/** ++ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. ++ * @info: Info used to create the backend. ++ * @kctx: KBase context used for GPU memory allocation and ++ * counter dumping. ++ * @gpu_dump_va: GPU hardware counter dump buffer virtual address. ++ * @cpu_dump_va: CPU mapping of gpu_dump_va. ++ * @vmap: Dump buffer vmap. ++ * @enabled: True if dumping has been enabled, else false. ++ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled ++ * dumping. ++ * @curr_config: Current allocated hardware resources to correctly map the src ++ * raw dump buffer to the dst dump buffer. ++ * @clk_enable_map: The enable map specifying enabled clock domains. ++ * @cycle_count_elapsed: ++ * Cycle count elapsed for a given sample period. ++ * The top clock cycle, index 0, is read directly from ++ * hardware, but the other clock domains need to be ++ * calculated with software estimation. ++ * @prev_cycle_count: Previous cycle count to calculate the cycle count for ++ * sample period. ++ * @rate_listener: Clock rate listener callback state. ++ * @ccswe_shader_cores: Shader cores cycle count software estimator. ++ */ ++struct kbase_hwcnt_backend_jm { ++ const struct kbase_hwcnt_backend_jm_info *info; ++ struct kbase_context *kctx; ++ u64 gpu_dump_va; ++ void *cpu_dump_va; ++ struct kbase_vmap_struct *vmap; ++ bool enabled; ++ u64 pm_core_mask; ++ struct kbase_hwcnt_curr_config curr_config; ++ u64 clk_enable_map; ++ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ struct kbase_clk_rate_listener rate_listener; ++ struct kbase_ccswe ccswe_shader_cores; ++}; ++ ++/** ++ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used ++ * to create the hwcnt metadata. ++ * @kbdev: Non-NULL pointer to kbase device. ++ * @info: Non-NULL pointer to data structure to be filled in. ++ * ++ * The initialised info struct will only be valid for use while kbdev is valid. ++ */ ++static int ++kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, ++ struct kbase_hwcnt_gpu_info *info) ++{ ++ size_t clk; ++ ++ if (!kbdev || !info) ++ return -EINVAL; ++ ++ { ++ const struct base_gpu_props *props = &kbdev->gpu_props.props; ++ const size_t l2_count = props->l2_props.num_l2_slices; ++ const size_t core_mask = ++ props->coherency_info.group[0].core_mask; ++ ++ info->l2_count = l2_count; ++ info->core_mask = core_mask; ++ info->prfcnt_values_per_block = ++ KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; ++ } ++ ++ /* Determine the number of available clock domains. */ ++ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { ++ if (kbdev->pm.clk_rtm.clks[clk] == NULL) ++ break; ++ } ++ info->clk_cnt = clk; ++ ++ return 0; ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback ++ * ++ * @rate_listener: Callback state ++ * @clk_index: Clock index ++ * @clk_rate_hz: Clock frequency(hz) ++ */ ++static void kbasep_hwcnt_backend_jm_on_freq_change( ++ struct kbase_clk_rate_listener *rate_listener, ++ u32 clk_index, ++ u32 clk_rate_hz) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = container_of( ++ rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); ++ u64 timestamp_ns; ++ ++ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) ++ return; ++ ++ timestamp_ns = ktime_get_raw_ns(); ++ kbase_ccswe_freq_change( ++ &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking ++ * ++ * @backend_jm: Non-NULL pointer to backend. ++ * @enable_map: Non-NULL pointer to enable map specifying enabled counters. ++ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. ++ */ ++static void kbasep_hwcnt_backend_jm_cc_enable( ++ struct kbase_hwcnt_backend_jm *backend_jm, ++ const struct kbase_hwcnt_enable_map *enable_map, ++ u64 timestamp_ns) ++{ ++ struct kbase_device *kbdev = backend_jm->kctx->kbdev; ++ u64 clk_enable_map = enable_map->clk_enable_map; ++ u64 cycle_count; ++ ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { ++ /* turn on the cycle counter */ ++ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); ++ /* Read cycle count for top clock domain. */ ++ kbase_backend_get_gpu_time_norequest( ++ kbdev, &cycle_count, NULL, NULL); ++ ++ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = ++ cycle_count; ++ } ++ ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { ++ /* software estimation for non-top clock domains */ ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ const struct kbase_clk_data *clk_data = ++ rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; ++ u32 cur_freq; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&rtm->lock, flags); ++ ++ cur_freq = (u32) clk_data->clock_val; ++ kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); ++ kbase_ccswe_freq_change( ++ &backend_jm->ccswe_shader_cores, ++ timestamp_ns, ++ cur_freq); ++ ++ kbase_clk_rate_trace_manager_subscribe_no_lock( ++ rtm, &backend_jm->rate_listener); ++ ++ spin_unlock_irqrestore(&rtm->lock, flags); ++ ++ /* ccswe was reset. The estimated cycle is zero. */ ++ backend_jm->prev_cycle_count[ ++ KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; ++ } ++ ++ /* Keep clk_enable_map for dump_request. */ ++ backend_jm->clk_enable_map = clk_enable_map; ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking ++ * ++ * @backend_jm: Non-NULL pointer to backend. ++ */ ++static void kbasep_hwcnt_backend_jm_cc_disable( ++ struct kbase_hwcnt_backend_jm *backend_jm) ++{ ++ struct kbase_device *kbdev = backend_jm->kctx->kbdev; ++ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; ++ u64 clk_enable_map = backend_jm->clk_enable_map; ++ ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { ++ /* turn off the cycle counter */ ++ kbase_pm_release_gpu_cycle_counter(kbdev); ++ } ++ ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { ++ ++ kbase_clk_rate_trace_manager_unsubscribe( ++ rtm, &backend_jm->rate_listener); ++ } ++} ++ ++ ++/** ++ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with ++ * current config information. ++ * @kbdev: Non-NULL pointer to kbase device. ++ * @curr_config: Non-NULL pointer to return the current configuration of ++ * hardware allocated to the GPU. ++ * ++ * The current configuration information is used for architectures where the ++ * max_config interface is available from the Arbiter. In this case the current ++ * allocated hardware is not always the same, so the current config information ++ * is used to correctly map the current allocated resources to the memory layout ++ * that is copied to the user space. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_gpu_update_curr_config( ++ struct kbase_device *kbdev, ++ struct kbase_hwcnt_curr_config *curr_config) ++{ ++ if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) ++ return -EINVAL; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ curr_config->num_l2_slices = ++ kbdev->gpu_props.curr_config.l2_slices; ++ curr_config->shader_present = ++ kbdev->gpu_props.curr_config.shader_present; ++ return 0; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ ++static u64 kbasep_hwcnt_backend_jm_timestamp_ns( ++ struct kbase_hwcnt_backend *backend) ++{ ++ (void)backend; ++ return ktime_get_raw_ns(); ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ ++static int kbasep_hwcnt_backend_jm_dump_enable_nolock( ++ struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ int errcode; ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ struct kbase_hwcnt_physical_enable_map phys_enable_map; ++ enum kbase_hwcnt_physical_set phys_counter_set; ++ struct kbase_instr_hwcnt_enable enable; ++ u64 timestamp_ns; ++ ++ if (!backend_jm || !enable_map || backend_jm->enabled || ++ (enable_map->metadata != backend_jm->info->metadata)) ++ return -EINVAL; ++ ++ kctx = backend_jm->kctx; ++ kbdev = backend_jm->kctx->kbdev; ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); ++ ++ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, ++ backend_jm->info->counter_set); ++ ++ enable.fe_bm = phys_enable_map.fe_bm; ++ enable.shader_bm = phys_enable_map.shader_bm; ++ enable.tiler_bm = phys_enable_map.tiler_bm; ++ enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; ++ enable.counter_set = phys_counter_set; ++ enable.dump_buffer = backend_jm->gpu_dump_va; ++ enable.dump_buffer_bytes = backend_jm->info->dump_bytes; ++ ++ timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ++ ++ /* Update the current configuration information. */ ++ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, ++ &backend_jm->curr_config); ++ if (errcode) ++ goto error; ++ ++ errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); ++ if (errcode) ++ goto error; ++ ++ backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); ++ ++ backend_jm->enabled = true; ++ ++ kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); ++ ++ return 0; ++error: ++ return errcode; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ ++static int kbasep_hwcnt_backend_jm_dump_enable( ++ struct kbase_hwcnt_backend *backend, ++ const struct kbase_hwcnt_enable_map *enable_map) ++{ ++ unsigned long flags; ++ int errcode; ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ struct kbase_device *kbdev; ++ ++ if (!backend_jm) ++ return -EINVAL; ++ ++ kbdev = backend_jm->kctx->kbdev; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ ++ errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock( ++ backend, enable_map); ++ ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return errcode; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ ++static void kbasep_hwcnt_backend_jm_dump_disable( ++ struct kbase_hwcnt_backend *backend) ++{ ++ int errcode; ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ ++ if (WARN_ON(!backend_jm) || !backend_jm->enabled) ++ return; ++ ++ kbasep_hwcnt_backend_jm_cc_disable(backend_jm); ++ ++ errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); ++ WARN_ON(errcode); ++ ++ backend_jm->enabled = false; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ ++static int kbasep_hwcnt_backend_jm_dump_clear( ++ struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ ++ if (!backend_jm || !backend_jm->enabled) ++ return -EINVAL; ++ ++ return kbase_instr_hwcnt_clear(backend_jm->kctx); ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ ++static int kbasep_hwcnt_backend_jm_dump_request( ++ struct kbase_hwcnt_backend *backend, ++ u64 *dump_time_ns) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ struct kbase_device *kbdev; ++ const struct kbase_hwcnt_metadata *metadata; ++ u64 current_cycle_count; ++ size_t clk; ++ int ret; ++ ++ if (!backend_jm || !backend_jm->enabled || !dump_time_ns) ++ return -EINVAL; ++ ++ kbdev = backend_jm->kctx->kbdev; ++ metadata = backend_jm->info->metadata; ++ ++ /* Disable pre-emption, to make the timestamp as accurate as possible */ ++ preempt_disable(); ++ { ++ *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ++ ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); ++ ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) { ++ if (!kbase_hwcnt_clk_enable_map_enabled( ++ backend_jm->clk_enable_map, clk)) ++ continue; ++ ++ if (clk == KBASE_CLOCK_DOMAIN_TOP) { ++ /* Read cycle count for top clock domain. */ ++ kbase_backend_get_gpu_time_norequest( ++ kbdev, ¤t_cycle_count, ++ NULL, NULL); ++ } else { ++ /* ++ * Estimate cycle count for non-top clock ++ * domain. ++ */ ++ current_cycle_count = kbase_ccswe_cycle_at( ++ &backend_jm->ccswe_shader_cores, ++ *dump_time_ns); ++ } ++ backend_jm->cycle_count_elapsed[clk] = ++ current_cycle_count - ++ backend_jm->prev_cycle_count[clk]; ++ ++ /* ++ * Keep the current cycle count for later calculation. ++ */ ++ backend_jm->prev_cycle_count[clk] = current_cycle_count; ++ } ++ } ++ preempt_enable(); ++ ++ return ret; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ ++static int kbasep_hwcnt_backend_jm_dump_wait( ++ struct kbase_hwcnt_backend *backend) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ ++ if (!backend_jm || !backend_jm->enabled) ++ return -EINVAL; ++ ++ return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ ++static int kbasep_hwcnt_backend_jm_dump_get( ++ struct kbase_hwcnt_backend *backend, ++ struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ bool accumulate) ++{ ++ struct kbase_hwcnt_backend_jm *backend_jm = ++ (struct kbase_hwcnt_backend_jm *)backend; ++ size_t clk; ++ ++ if (!backend_jm || !dst || !dst_enable_map || ++ (backend_jm->info->metadata != dst->metadata) || ++ (dst_enable_map->metadata != dst->metadata)) ++ return -EINVAL; ++ ++ /* Invalidate the kernel buffer before reading from it. */ ++ kbase_sync_mem_regions( ++ backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); ++ ++ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { ++ if (!kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk)) ++ continue; ++ ++ /* Extract elapsed cycle count for each clock domain. */ ++ dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; ++ } ++ ++ return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va, ++ dst_enable_map, backend_jm->pm_core_mask, ++ &backend_jm->curr_config, accumulate); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. ++ * @info: Non-NULL pointer to JM backend info. ++ * @kctx: Non-NULL pointer to kbase context. ++ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address ++ * is stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_jm_dump_alloc( ++ const struct kbase_hwcnt_backend_jm_info *info, ++ struct kbase_context *kctx, ++ u64 *gpu_dump_va) ++{ ++ struct kbase_va_region *reg; ++ u64 flags; ++ u64 nr_pages; ++ ++ WARN_ON(!info); ++ WARN_ON(!kctx); ++ WARN_ON(!gpu_dump_va); ++ ++ flags = BASE_MEM_PROT_CPU_RD | ++ BASE_MEM_PROT_GPU_WR | ++ BASEP_MEM_PERMANENT_KERNEL_MAPPING | ++ BASE_MEM_CACHED_CPU | ++ BASE_MEM_UNCACHED_GPU; ++ ++ nr_pages = PFN_UP(info->dump_bytes); ++ ++ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); ++ ++ if (!reg) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. ++ * @kctx: Non-NULL pointer to kbase context. ++ * @gpu_dump_va: GPU dump buffer virtual address. ++ */ ++static void kbasep_hwcnt_backend_jm_dump_free( ++ struct kbase_context *kctx, ++ u64 gpu_dump_va) ++{ ++ WARN_ON(!kctx); ++ if (gpu_dump_va) ++ kbase_mem_free(kctx, gpu_dump_va); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. ++ * @backend: Pointer to JM backend to destroy. ++ * ++ * Can be safely called on a backend in any state of partial construction. ++ */ ++static void kbasep_hwcnt_backend_jm_destroy( ++ struct kbase_hwcnt_backend_jm *backend) ++{ ++ if (!backend) ++ return; ++ ++ if (backend->kctx) { ++ struct kbase_context *kctx = backend->kctx; ++ struct kbase_device *kbdev = kctx->kbdev; ++ ++ if (backend->cpu_dump_va) ++ kbase_phy_alloc_mapping_put(kctx, backend->vmap); ++ ++ if (backend->gpu_dump_va) ++ kbasep_hwcnt_backend_jm_dump_free( ++ kctx, backend->gpu_dump_va); ++ ++ kbasep_js_release_privileged_ctx(kbdev, kctx); ++ kbase_destroy_context(kctx); ++ } ++ ++ kfree(backend); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_create() - Create a JM backend. ++ * @info: Non-NULL pointer to backend info. ++ * @out_backend: Non-NULL pointer to where backend is stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_jm_create( ++ const struct kbase_hwcnt_backend_jm_info *info, ++ struct kbase_hwcnt_backend_jm **out_backend) ++{ ++ int errcode; ++ struct kbase_device *kbdev; ++ struct kbase_hwcnt_backend_jm *backend = NULL; ++ ++ WARN_ON(!info); ++ WARN_ON(!out_backend); ++ ++ kbdev = info->kbdev; ++ ++ backend = kzalloc(sizeof(*backend), GFP_KERNEL); ++ if (!backend) ++ goto alloc_error; ++ ++ backend->info = info; ++ ++ backend->kctx = kbase_create_context(kbdev, true, ++ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); ++ if (!backend->kctx) ++ goto alloc_error; ++ ++ kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); ++ ++ errcode = kbasep_hwcnt_backend_jm_dump_alloc( ++ info, backend->kctx, &backend->gpu_dump_va); ++ if (errcode) ++ goto error; ++ ++ backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, ++ backend->gpu_dump_va, &backend->vmap); ++ if (!backend->cpu_dump_va) ++ goto alloc_error; ++ ++ kbase_ccswe_init(&backend->ccswe_shader_cores); ++ backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; ++ ++ ++ *out_backend = backend; ++ return 0; ++ ++alloc_error: ++ errcode = -ENOMEM; ++error: ++ kbasep_hwcnt_backend_jm_destroy(backend); ++ return errcode; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */ ++static const struct kbase_hwcnt_metadata * ++kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info) ++{ ++ if (!info) ++ return NULL; ++ ++ return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_init_fn */ ++static int kbasep_hwcnt_backend_jm_init( ++ const struct kbase_hwcnt_backend_info *info, ++ struct kbase_hwcnt_backend **out_backend) ++{ ++ int errcode; ++ struct kbase_hwcnt_backend_jm *backend = NULL; ++ ++ if (!info || !out_backend) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_backend_jm_create( ++ (const struct kbase_hwcnt_backend_jm_info *) info, &backend); ++ if (errcode) ++ return errcode; ++ ++ *out_backend = (struct kbase_hwcnt_backend *)backend; ++ ++ return 0; ++} ++ ++/* JM backend implementation of kbase_hwcnt_backend_term_fn */ ++static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) ++{ ++ if (!backend) ++ return; ++ ++ kbasep_hwcnt_backend_jm_dump_disable(backend); ++ kbasep_hwcnt_backend_jm_destroy( ++ (struct kbase_hwcnt_backend_jm *)backend); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. ++ * @info: Pointer to info to destroy. ++ * ++ * Can be safely called on a backend info in any state of partial construction. ++ */ ++static void kbasep_hwcnt_backend_jm_info_destroy( ++ const struct kbase_hwcnt_backend_jm_info *info) ++{ ++ if (!info) ++ return; ++ ++ kbase_hwcnt_jm_metadata_destroy(info->metadata); ++ kfree(info); ++} ++ ++/** ++ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. ++ * @kbdev: Non_NULL pointer to kbase device. ++ * @out_info: Non-NULL pointer to where info is stored on success. ++ * ++ * Return 0 on success, else error code. ++ */ ++static int kbasep_hwcnt_backend_jm_info_create( ++ struct kbase_device *kbdev, ++ const struct kbase_hwcnt_backend_jm_info **out_info) ++{ ++ int errcode = -ENOMEM; ++ struct kbase_hwcnt_gpu_info hwcnt_gpu_info; ++ struct kbase_hwcnt_backend_jm_info *info = NULL; ++ ++ WARN_ON(!kbdev); ++ WARN_ON(!out_info); ++ ++ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info); ++ if (errcode) ++ return errcode; ++ ++ info = kzalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) ++ goto error; ++ ++ info->kbdev = kbdev; ++ ++#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) ++ info->counter_set = KBASE_HWCNT_SET_SECONDARY; ++#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) ++ info->counter_set = KBASE_HWCNT_SET_TERTIARY; ++#else ++ /* Default to primary */ ++ info->counter_set = KBASE_HWCNT_SET_PRIMARY; ++#endif ++ ++ errcode = kbase_hwcnt_jm_metadata_create(&hwcnt_gpu_info, ++ info->counter_set, ++ &info->metadata, ++ &info->dump_bytes); ++ if (errcode) ++ goto error; ++ ++ *out_info = info; ++ ++ return 0; ++error: ++ kbasep_hwcnt_backend_jm_info_destroy(info); ++ return errcode; ++} ++ ++int kbase_hwcnt_backend_jm_create( ++ struct kbase_device *kbdev, ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ int errcode; ++ const struct kbase_hwcnt_backend_jm_info *info = NULL; ++ ++ if (!kbdev || !iface) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); ++ ++ if (errcode) ++ return errcode; ++ ++ iface->info = (struct kbase_hwcnt_backend_info *)info; ++ iface->metadata = kbasep_hwcnt_backend_jm_metadata; ++ iface->init = kbasep_hwcnt_backend_jm_init; ++ iface->term = kbasep_hwcnt_backend_jm_term; ++ iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; ++ iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; ++ iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; ++ iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; ++ iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; ++ iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; ++ iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; ++ iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; ++ ++ return 0; ++} ++ ++void kbase_hwcnt_backend_jm_destroy( ++ struct kbase_hwcnt_backend_interface *iface) ++{ ++ if (!iface) ++ return; ++ ++ kbasep_hwcnt_backend_jm_info_destroy( ++ (const struct kbase_hwcnt_backend_jm_info *)iface->info); ++ memset(iface, 0, sizeof(*iface)); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h +similarity index 75% +rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h +rename to dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h +index 7712f14..5319516 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,24 +17,22 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU ++ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM + * backend. + */ + +-#ifndef _KBASE_HWCNT_BACKEND_GPU_H_ +-#define _KBASE_HWCNT_BACKEND_GPU_H_ ++#ifndef _KBASE_HWCNT_BACKEND_JM_H_ ++#define _KBASE_HWCNT_BACKEND_JM_H_ + + #include "mali_kbase_hwcnt_backend.h" + + struct kbase_device; + + /** +- * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend ++ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend + * interface. + * @kbdev: Non-NULL pointer to kbase device. + * @iface: Non-NULL pointer to backend interface structure that is filled in +@@ -43,19 +42,19 @@ struct kbase_device; + * + * Return: 0 on success, else error code. + */ +-int kbase_hwcnt_backend_gpu_create( ++int kbase_hwcnt_backend_jm_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); + + /** +- * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend ++ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +-void kbase_hwcnt_backend_gpu_destroy( ++void kbase_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_interface *iface); + +-#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */ ++#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h +index bc50ad1..1adf2ef 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -28,6 +27,7 @@ + #define _KBASE_HWCNT_CONTEXT_H_ + + #include ++#include + + struct kbase_hwcnt_backend_interface; + struct kbase_hwcnt_context; +@@ -66,7 +66,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + + /** + * kbase_hwcnt_context_disable() - Increment the disable count of the context. +- * @hctx: Pointer to the hardware counter context. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * + * If a call to this function increments the disable count from 0 to 1, and + * an accumulator has been acquired, then a counter dump will be performed +@@ -84,7 +84,7 @@ void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the + * context if possible in an atomic + * context. +- * @hctx: Pointer to the hardware counter context. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * + * This function will only succeed if hardware counters are effectively already + * disabled, i.e. there is no accumulator, the disable count is already +@@ -99,7 +99,7 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + + /** + * kbase_hwcnt_context_enable() - Decrement the disable count of the context. +- * @hctx: Pointer to the hardware counter context. ++ * @hctx: Non-NULL pointer to the hardware counter context. + * + * If a call to this function decrements the disable count from 1 to 0, and + * an accumulator has been acquired, then counters will be re-enabled via the +@@ -116,4 +116,36 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + */ + void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + ++/** ++ * kbase_hwcnt_context_queue_work() - Queue hardware counter related async ++ * work on a workqueue specialized for ++ * hardware counters. ++ * @hctx: Non-NULL pointer to the hardware counter context. ++ * @work: Non-NULL pointer to work to queue. ++ * ++ * Return: false if work was already on a queue, true otherwise. ++ * ++ * Performance counter related work is high priority, short running, and ++ * generally CPU locality is unimportant. There is no standard workqueue that ++ * can service this flavor of work. ++ * ++ * Rather than have each user of counters define their own workqueue, we have ++ * a centralized one in here that anybody using this hardware counter API ++ * should use. ++ * ++ * Before the context is destroyed, all work submitted must have been completed. ++ * Given that the work enqueued via this function is likely to be hardware ++ * counter related and will therefore use the context object, this is likely ++ * to be behavior that will occur naturally. ++ * ++ * Historical note: prior to this centralized workqueue, the system_highpri_wq ++ * was used. This was generally fine, except when a particularly long running, ++ * higher priority thread ended up scheduled on the enqueuing CPU core. Given ++ * that hardware counters requires tight integration with power management, ++ * this meant progress through the power management states could be stalled ++ * for however long that higher priority thread took. ++ */ ++bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, ++ struct work_struct *work); ++ + #endif /* _KBASE_HWCNT_CONTEXT_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c +index 095c765..2975269 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,170 +17,111 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_hwcnt_gpu.h" + #include "mali_kbase_hwcnt_types.h" +-#include "mali_kbase.h" +-#ifdef CONFIG_MALI_NO_MALI +-#include "backend/gpu/mali_kbase_model_dummy.h" +-#endif +- +-#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 +-#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 +-#define KBASE_HWCNT_V4_MAX_GROUPS \ +- (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) +-#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 +-#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 +-#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ +- (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) +-/* Index of the PRFCNT_EN header into a V4 counter block */ +-#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 +- +-#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +-#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +-#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 +-#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ +- (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) +-/* Index of the PRFCNT_EN header into a V5 counter block */ +-#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 +- +-/** +- * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter +- * metadata for a v4 GPU. +- * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. +- * @metadata: Non-NULL pointer to where created metadata is stored on success. +- * +- * Return: 0 on success, else error code. +- */ +-static int kbasep_hwcnt_backend_gpu_metadata_v4_create( +- const struct kbase_hwcnt_gpu_v4_info *v4_info, +- const struct kbase_hwcnt_metadata **metadata) +-{ +- size_t grp; +- int errcode = -ENOMEM; +- struct kbase_hwcnt_description desc; +- struct kbase_hwcnt_group_description *grps; +- size_t avail_mask_bit; + +- WARN_ON(!v4_info); +- WARN_ON(!metadata); ++#include ++#include + +- /* Check if there are enough bits in the availability mask to represent +- * all the hardware counter blocks in the system. +- */ +- if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) +- return -EINVAL; + +- grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); +- if (!grps) +- goto clean_up; +- +- desc.grp_cnt = v4_info->cg_count; +- desc.grps = grps; +- +- for (grp = 0; grp < v4_info->cg_count; grp++) { +- size_t blk; +- size_t sc; +- const u64 core_mask = v4_info->cgs[grp].core_mask; +- struct kbase_hwcnt_block_description *blks = kcalloc( +- KBASE_HWCNT_V4_BLOCKS_PER_GROUP, +- sizeof(*blks), +- GFP_KERNEL); +- +- if (!blks) +- goto clean_up; +- +- grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; +- grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; +- grps[grp].blks = blks; +- +- for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { +- blks[blk].inst_cnt = 1; +- blks[blk].hdr_cnt = +- KBASE_HWCNT_V4_HEADERS_PER_BLOCK; +- blks[blk].ctr_cnt = +- KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; ++static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, ++ bool is_csf) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ if (is_csf) { ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; ++ } else { ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + } +- +- for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { +- blks[sc].type = core_mask & (1ull << sc) ? +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ if (is_csf) { ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; ++ } else { ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + } +- +- blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; +- blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; +- blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; +- blks[7].type = (grp == 0) ? +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; +- +- WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); ++ break; ++ default: ++ WARN_ON(true); + } ++} + +- /* Initialise the availability mask */ +- desc.avail_mask = 0; +- avail_mask_bit = 0; +- +- for (grp = 0; grp < desc.grp_cnt; grp++) { +- size_t blk; +- const struct kbase_hwcnt_block_description *blks = +- desc.grps[grp].blks; +- for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { +- WARN_ON(blks[blk].inst_cnt != 1); +- if (blks[blk].type != +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) +- desc.avail_mask |= (1ull << avail_mask_bit); +- +- avail_mask_bit++; +- } ++static void kbasep_get_tiler_block_type(u64 *dst, ++ enum kbase_hwcnt_set counter_set) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ case KBASE_HWCNT_SET_TERTIARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; ++ break; ++ default: ++ WARN_ON(true); + } ++} + +- errcode = kbase_hwcnt_metadata_create(&desc, metadata); +- +- /* Always clean up, as metadata will make a copy of the input args */ +-clean_up: +- if (grps) { +- for (grp = 0; grp < v4_info->cg_count; grp++) +- kfree(grps[grp].blks); +- kfree(grps); ++static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, ++ bool is_csf) ++{ ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ if (is_csf) { ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; ++ } else { ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; ++ } ++ break; ++ default: ++ WARN_ON(true); + } +- return errcode; + } + +-/** +- * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a +- * V4 GPU. +- * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. +- * +- * Return: Size of buffer the V4 GPU needs to perform a counter dump. +- */ +-static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( +- const struct kbase_hwcnt_gpu_v4_info *v4_info) ++static void kbasep_get_memsys_block_type(u64 *dst, ++ enum kbase_hwcnt_set counter_set) + { +- return v4_info->cg_count * +- KBASE_HWCNT_V4_BLOCKS_PER_GROUP * +- KBASE_HWCNT_V4_VALUES_PER_BLOCK * +- KBASE_HWCNT_VALUE_BYTES; ++ switch (counter_set) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; ++ break; ++ default: ++ WARN_ON(true); ++ } + } + + /** +- * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter +- * metadata for a v5 GPU. +- * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. +- * @use_secondary: True if secondary performance counters should be used, else +- * false. Ignored if secondary counters are not supported. ++ * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata ++ * for the GPU. ++ * @gpu_info: Non-NULL pointer to hwcnt info for current GPU. ++ * @is_csf: true for CSF GPU, otherwise false. ++ * @counter_set: The performance counter set to use. + * @metadata: Non-NULL pointer to where created metadata is stored + * on success. + * + * Return: 0 on success, else error code. + */ +-static int kbasep_hwcnt_backend_gpu_metadata_v5_create( +- const struct kbase_hwcnt_gpu_v5_info *v5_info, +- bool use_secondary, ++static int kbasep_hwcnt_backend_gpu_metadata_create( ++ const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf, ++ enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **metadata) + { + struct kbase_hwcnt_description desc; +@@ -189,13 +131,13 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + size_t non_sc_block_count; + size_t sc_block_count; + +- WARN_ON(!v5_info); ++ WARN_ON(!gpu_info); + WARN_ON(!metadata); + + /* Calculate number of block instances that aren't shader cores */ +- non_sc_block_count = 2 + v5_info->l2_count; ++ non_sc_block_count = 2 + gpu_info->l2_count; + /* Calculate number of block instances that are shader cores */ +- sc_block_count = fls64(v5_info->core_mask); ++ sc_block_count = fls64(gpu_info->core_mask); + + /* + * A system can have up to 64 shader cores, but the 64-bit +@@ -207,25 +149,26 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + +- /* One Job Manager block */ +- blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; ++ /* One Front End block */ ++ kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); + blks[0].inst_cnt = 1; + blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; +- blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; ++ blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - ++ KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + /* One Tiler block */ +- blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; ++ kbasep_get_tiler_block_type(&blks[1].type, counter_set); + blks[1].inst_cnt = 1; + blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; +- blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; ++ blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - ++ KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + /* l2_count memsys blks */ +- blks[2].type = use_secondary ? +- KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : +- KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; +- blks[2].inst_cnt = v5_info->l2_count; ++ kbasep_get_memsys_block_type(&blks[2].type, counter_set); ++ blks[2].inst_cnt = gpu_info->l2_count; + blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; +- blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; ++ blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - ++ KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + /* + * There are as many shader cores in the system as there are bits set in +@@ -243,12 +186,11 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + * requirements, and embed the core mask into the availability mask so + * we can determine later which shader cores physically exist. + */ +- blks[3].type = use_secondary ? +- KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : +- KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; ++ kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); + blks[3].inst_cnt = sc_block_count; + blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; +- blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; ++ blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - ++ KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + +@@ -258,61 +200,35 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + + desc.grp_cnt = 1; + desc.grps = &group; ++ desc.clk_cnt = gpu_info->clk_cnt; + + /* The JM, Tiler, and L2s are always available, and are before cores */ + desc.avail_mask = (1ull << non_sc_block_count) - 1; + /* Embed the core mask directly in the availability mask */ +- desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); ++ desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); + + return kbase_hwcnt_metadata_create(&desc, metadata); + } + + /** +- * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a +- * V5 GPU. +- * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. ++ * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the ++ * GPU. ++ * @gpu_info: Non-NULL pointer to hwcnt info for the GPU. + * +- * Return: Size of buffer the V5 GPU needs to perform a counter dump. ++ * Return: Size of buffer the GPU needs to perform a counter dump. + */ +-static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( +- const struct kbase_hwcnt_gpu_v5_info *v5_info) ++static size_t ++kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) + { +- WARN_ON(!v5_info); +- return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * +- KBASE_HWCNT_V5_VALUES_PER_BLOCK * +- KBASE_HWCNT_VALUE_BYTES; +-} ++ WARN_ON(!gpu_info); + +-int kbase_hwcnt_gpu_info_init( +- struct kbase_device *kbdev, +- struct kbase_hwcnt_gpu_info *info) +-{ +- if (!kbdev || !info) +- return -EINVAL; +- +-#ifdef CONFIG_MALI_NO_MALI +- /* NO_MALI uses V5 layout, regardless of the underlying platform. */ +- info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; +- info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; +- info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +-#else +- { +- const struct base_gpu_props *props = &kbdev->gpu_props.props; +- const size_t l2_count = props->l2_props.num_l2_slices; +- const size_t core_mask = +- props->coherency_info.group[0].core_mask; +- +- info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; +- info->v5.l2_count = l2_count; +- info->v5.core_mask = core_mask; +- } +-#endif +- return 0; ++ return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * ++ gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_BYTES; + } + +-int kbase_hwcnt_gpu_metadata_create( +- const struct kbase_hwcnt_gpu_info *info, +- bool use_secondary, ++int kbase_hwcnt_jm_metadata_create( ++ const struct kbase_hwcnt_gpu_info *gpu_info, ++ enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) + { +@@ -320,23 +236,19 @@ int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + +- if (!info || !out_metadata || !out_dump_bytes) ++ if (!gpu_info || !out_metadata || !out_dump_bytes) + return -EINVAL; + +- switch (info->type) { +- case KBASE_HWCNT_GPU_GROUP_TYPE_V4: +- dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); +- errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( +- &info->v4, &metadata); +- break; +- case KBASE_HWCNT_GPU_GROUP_TYPE_V5: +- dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); +- errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( +- &info->v5, use_secondary, &metadata); +- break; +- default: +- return -EINVAL; +- } ++ /* ++ * For architectures where a max_config interface is available ++ * from the arbiter, the v5 dump bytes and the metadata v5 are ++ * based on the maximum possible allocation of the HW in the ++ * GPU cause it needs to be prepared for the worst case where ++ * all the available L2 cache and Shader cores are allocated. ++ */ ++ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); ++ errcode = kbasep_hwcnt_backend_gpu_metadata_create( ++ gpu_info, false, counter_set, &metadata); + if (errcode) + return errcode; + +@@ -351,9 +263,37 @@ int kbase_hwcnt_gpu_metadata_create( + + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); + +-void kbase_hwcnt_gpu_metadata_destroy( ++void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) ++{ ++ if (!metadata) ++ return; ++ ++ kbase_hwcnt_metadata_destroy(metadata); ++} ++ ++int kbase_hwcnt_csf_metadata_create( ++ const struct kbase_hwcnt_gpu_info *gpu_info, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **out_metadata) ++{ ++ int errcode; ++ const struct kbase_hwcnt_metadata *metadata; ++ ++ if (!gpu_info || !out_metadata) ++ return -EINVAL; ++ ++ errcode = kbasep_hwcnt_backend_gpu_metadata_create( ++ gpu_info, true, counter_set, &metadata); ++ if (errcode) ++ return errcode; ++ ++ *out_metadata = metadata; ++ ++ return 0; ++} ++ ++void kbase_hwcnt_csf_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata) + { + if (!metadata) +@@ -361,7 +301,127 @@ void kbase_hwcnt_gpu_metadata_destroy( + + kbase_hwcnt_metadata_destroy(metadata); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); ++ ++int kbase_hwcnt_gpu_metadata_create_truncate_64( ++ const struct kbase_hwcnt_metadata **dst_md, ++ const struct kbase_hwcnt_metadata *src_md) ++{ ++ struct kbase_hwcnt_description desc; ++ struct kbase_hwcnt_group_description group; ++ struct kbase_hwcnt_block_description ++ blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; ++ size_t prfcnt_values_per_block; ++ size_t blk; ++ ++ if (!dst_md || !src_md || !src_md->grp_metadata || ++ !src_md->grp_metadata[0].blk_metadata) ++ return -EINVAL; ++ ++ /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block ++ * count in the metadata. ++ */ ++ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || ++ (kbase_hwcnt_metadata_block_count(src_md, 0) != ++ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) ++ return -EINVAL; ++ ++ /* Get the values count in the first block. */ ++ prfcnt_values_per_block = ++ kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); ++ ++ /* check all blocks should have same values count. */ ++ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { ++ size_t val_cnt = ++ kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); ++ if (val_cnt != prfcnt_values_per_block) ++ return -EINVAL; ++ } ++ ++ /* Only support 64 and 128 entries per block. */ ++ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) ++ return -EINVAL; ++ ++ if (prfcnt_values_per_block == 64) { ++ /* If the values per block is 64, no need to truncate. */ ++ *dst_md = NULL; ++ return 0; ++ } ++ ++ /* Truncate from 128 to 64 entries per block to keep API backward ++ * compatibility. ++ */ ++ prfcnt_values_per_block = 64; ++ ++ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { ++ blks[blk].type = ++ kbase_hwcnt_metadata_block_type(src_md, 0, blk); ++ blks[blk].inst_cnt = kbase_hwcnt_metadata_block_instance_count( ++ src_md, 0, blk); ++ blks[blk].hdr_cnt = kbase_hwcnt_metadata_block_headers_count( ++ src_md, 0, blk); ++ blks[blk].ctr_cnt = prfcnt_values_per_block - blks[blk].hdr_cnt; ++ } ++ ++ group.type = kbase_hwcnt_metadata_group_type(src_md, 0); ++ group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; ++ group.blks = blks; ++ ++ desc.grp_cnt = kbase_hwcnt_metadata_group_count(src_md); ++ desc.avail_mask = src_md->avail_mask; ++ desc.clk_cnt = src_md->clk_cnt; ++ desc.grps = &group; ++ ++ return kbase_hwcnt_metadata_create(&desc, dst_md); ++} ++ ++void kbase_hwcnt_dump_buffer_copy_strict_narrow( ++ struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ size_t grp, blk, blk_inst; ++ size_t clk; ++ ++ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || ++ WARN_ON(dst == src) || WARN_ON(dst->metadata == src->metadata) || ++ WARN_ON(dst->metadata->grp_cnt != src->metadata->grp_cnt) || ++ WARN_ON(src->metadata->grp_cnt != 1) || ++ WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != ++ src->metadata->grp_metadata[0].blk_cnt) || ++ WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != 4) || ++ WARN_ON(dst->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > ++ src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) ++ return; ++ ++ /* Don't use src metadata since src buffer is bigger than dst buffer. */ ++ metadata = dst->metadata; ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { ++ u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( ++ dst, grp, blk, blk_inst); ++ const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( ++ src, grp, blk, blk_inst); ++ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( ++ dst_enable_map, grp, blk, blk_inst); ++ size_t val_cnt = kbase_hwcnt_metadata_block_values_count( ++ metadata, grp, blk); ++ /* Align upwards to include padding bytes */ ++ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( ++ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / ++ KBASE_HWCNT_VALUE_BYTES)); ++ ++ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, ++ blk_em, val_cnt); ++ } ++ ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) { ++ bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk); ++ ++ dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; ++ } ++} + + static bool is_block_type_shader( + const u64 grp_type, +@@ -370,44 +430,53 @@ static bool is_block_type_shader( + { + bool is_shader = false; + ++ /* Warn on unknown group type */ ++ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) ++ return false; ++ ++ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3) ++ is_shader = true; ++ ++ return is_shader; ++} ++ ++static bool is_block_type_l2_cache( ++ const u64 grp_type, ++ const u64 blk_type) ++{ ++ bool is_l2_cache = false; ++ + switch (grp_type) { +- case KBASE_HWCNT_GPU_GROUP_TYPE_V4: +- /* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1] +- * corresponds to a shader, or its implementation +- * reserved. As such, here we use the blk index value to +- * tell the reserved case. +- */ +- if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER || +- (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP && +- blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)) +- is_shader = true; +- break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: +- if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || +- blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2) +- is_shader = true; ++ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || ++ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2) ++ is_l2_cache = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + +- return is_shader; ++ return is_l2_cache; + } + +-int kbase_hwcnt_gpu_dump_get( +- struct kbase_hwcnt_dump_buffer *dst, +- void *src, +- const struct kbase_hwcnt_enable_map *dst_enable_map, +- u64 pm_core_mask, +- bool accumulate) ++int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ u64 pm_core_mask, ++ const struct kbase_hwcnt_curr_config *curr_config, ++ bool accumulate) + { + const struct kbase_hwcnt_metadata *metadata; + const u32 *dump_src; + size_t src_offset, grp, blk, blk_inst; +- size_t grp_prev = 0; + u64 core_mask = pm_core_mask; + ++ /* Variables to deal with the current configuration */ ++ int l2_count = 0; ++ bool hw_res_available = true; ++ + if (!dst || !src || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; +@@ -429,27 +498,43 @@ int kbase_hwcnt_gpu_dump_get( + const bool is_shader_core = is_block_type_shader( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type, blk); ++ const bool is_l2_cache = is_block_type_l2_cache( ++ kbase_hwcnt_metadata_group_type(metadata, grp), ++ blk_type); + +- if (grp != grp_prev) { +- /* grp change would only happen with V4. V5 and +- * further are envisaged to be single group +- * scenario only. Here needs to drop the lower +- * group core-mask by shifting right with +- * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP. +- */ +- core_mask = pm_core_mask >> +- KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; +- grp_prev = grp; ++ /* ++ * If l2 blocks is greater than the current allocated number of ++ * L2 slices, there is no hw allocated to that block. ++ */ ++ if (is_l2_cache) { ++ l2_count++; ++ if (l2_count > curr_config->num_l2_slices) ++ hw_res_available = false; ++ else ++ hw_res_available = true; ++ } ++ /* ++ * For the shader cores, the current shader_mask allocated is ++ * always a subgroup of the maximum shader_mask, so after ++ * jumping any L2 cache not available the available shader cores ++ * will always have a matching set of blk instances available to ++ * accumulate them. ++ */ ++ else { ++ hw_res_available = true; + } + +- /* Early out if no values in the dest block are enabled */ ++ /* ++ * Early out if no values in the dest block are enabled or if ++ * the resource target of the block is not available in the HW. ++ */ + if (kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = dump_src + src_offset; + +- if (!is_shader_core || (core_mask & 1)) { ++ if ((!is_shader_core || (core_mask & 1)) && hw_res_available) { + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, +@@ -465,14 +550,60 @@ int kbase_hwcnt_gpu_dump_get( + } + } + +- src_offset += (hdr_cnt + ctr_cnt); ++ /* Just increase the src_offset if the HW is available */ ++ if (hw_res_available) ++ src_offset += (hdr_cnt + ctr_cnt); + if (is_shader_core) + core_mask = core_mask >> 1; + } + + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); ++ ++int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ bool accumulate) ++{ ++ const struct kbase_hwcnt_metadata *metadata; ++ const u32 *dump_src; ++ size_t src_offset, grp, blk, blk_inst; ++ ++ if (!dst || !src || !dst_enable_map || ++ (dst_enable_map->metadata != dst->metadata)) ++ return -EINVAL; ++ ++ metadata = dst->metadata; ++ dump_src = (const u32 *)src; ++ src_offset = 0; ++ ++ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { ++ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( ++ metadata, grp, blk); ++ const size_t ctr_cnt = ++ kbase_hwcnt_metadata_block_counters_count(metadata, grp, ++ blk); ++ ++ /* Early out if no values in the dest block are enabled */ ++ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, ++ blk, blk_inst)) { ++ u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( ++ dst, grp, blk, blk_inst); ++ const u32 *src_blk = dump_src + src_offset; ++ ++ if (accumulate) { ++ kbase_hwcnt_dump_buffer_block_accumulate( ++ dst_blk, src_blk, hdr_cnt, ctr_cnt); ++ } else { ++ kbase_hwcnt_dump_buffer_block_copy( ++ dst_blk, src_blk, (hdr_cnt + ctr_cnt)); ++ } ++ } ++ ++ src_offset += (hdr_cnt + ctr_cnt); ++ } ++ ++ return 0; ++} + + /** + * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block +@@ -563,7 +694,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( + { + const struct kbase_hwcnt_metadata *metadata; + +- u64 jm_bm = 0; ++ u64 fe_bm = 0; + u64 shader_bm = 0; + u64 tiler_bm = 0; + u64 mmu_l2_bm = 0; +@@ -581,45 +712,26 @@ void kbase_hwcnt_gpu_enable_map_to_physical( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); +- const size_t blk_val_cnt = +- kbase_hwcnt_metadata_block_values_count( +- metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + src, grp, blk, blk_inst); + +- switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { +- case KBASE_HWCNT_GPU_GROUP_TYPE_V4: +- WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); +- switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: +- shader_bm |= *blk_map; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: +- tiler_bm |= *blk_map; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: +- mmu_l2_bm |= *blk_map; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: +- jm_bm |= *blk_map; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: +- break; +- default: +- WARN_ON(true); +- } +- break; +- case KBASE_HWCNT_GPU_GROUP_TYPE_V5: +- WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); ++ if ((enum kbase_hwcnt_gpu_group_type)grp_type == ++ KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { +- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: +- jm_bm |= *blk_map; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: ++ /* Nothing to do in this case. */ ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: ++ fe_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: +@@ -629,14 +741,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical( + default: + WARN_ON(true); + } +- break; +- default: ++ } else { + WARN_ON(true); + } + } + +- dst->jm_bm = +- kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); ++ dst->fe_bm = ++ kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0); + dst->shader_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); + dst->tiler_bm = +@@ -644,7 +755,24 @@ void kbase_hwcnt_gpu_enable_map_to_physical( + dst->mmu_l2_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); ++ ++void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, ++ enum kbase_hwcnt_set src) ++{ ++ switch (src) { ++ case KBASE_HWCNT_SET_PRIMARY: ++ *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; ++ break; ++ case KBASE_HWCNT_SET_SECONDARY: ++ *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; ++ break; ++ case KBASE_HWCNT_SET_TERTIARY: ++ *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; ++ break; ++ default: ++ WARN_ON(true); ++ } ++} + + void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, +@@ -653,7 +781,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( + const struct kbase_hwcnt_metadata *metadata; + + u64 ignored_hi; +- u64 jm_bm; ++ u64 fe_bm; + u64 shader_bm; + u64 tiler_bm; + u64 mmu_l2_bm; +@@ -665,7 +793,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( + metadata = dst->metadata; + + kbasep_hwcnt_backend_gpu_block_map_from_physical( +- src->jm_bm, &jm_bm, &ignored_hi); ++ src->fe_bm, &fe_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->shader_bm, &shader_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( +@@ -678,45 +806,26 @@ void kbase_hwcnt_gpu_enable_map_from_physical( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); +- const size_t blk_val_cnt = +- kbase_hwcnt_metadata_block_values_count( +- metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + +- switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { +- case KBASE_HWCNT_GPU_GROUP_TYPE_V4: +- WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); +- switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: +- *blk_map = shader_bm; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: +- *blk_map = tiler_bm; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: +- *blk_map = mmu_l2_bm; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: +- *blk_map = jm_bm; +- break; +- case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: +- break; +- default: +- WARN_ON(true); +- } +- break; +- case KBASE_HWCNT_GPU_GROUP_TYPE_V5: +- WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); ++ if ((enum kbase_hwcnt_gpu_group_type)grp_type == ++ KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { +- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: +- *blk_map = jm_bm; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: ++ /* Nothing to do in this case. */ ++ break; ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: ++ *blk_map = fe_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: ++ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: +@@ -726,13 +835,11 @@ void kbase_hwcnt_gpu_enable_map_from_physical( + default: + WARN_ON(true); + } +- break; +- default: ++ } else { + WARN_ON(true); + } + } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); + + void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, +@@ -758,16 +865,11 @@ void kbase_hwcnt_gpu_patch_dump_headers( + kbasep_hwcnt_backend_gpu_block_map_to_physical( + blk_map[0], 0); + +- switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { +- case KBASE_HWCNT_GPU_GROUP_TYPE_V4: +- buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; +- break; +- case KBASE_HWCNT_GPU_GROUP_TYPE_V5: ++ if ((enum kbase_hwcnt_gpu_group_type)grp_type == ++ KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; +- break; +- default: ++ } else { + WARN_ON(true); + } + } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h +index 12891e0..50ae80d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_HWCNT_GPU_H_ +@@ -30,125 +29,157 @@ struct kbase_hwcnt_metadata; + struct kbase_hwcnt_enable_map; + struct kbase_hwcnt_dump_buffer; + ++#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 ++#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 ++#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 ++#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ ++ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \ ++ KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) ++/** Index of the PRFCNT_EN header into a V5 counter block */ ++#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 ++ + /** + * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to + * identify metadata groups. +- * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ + enum kbase_hwcnt_gpu_group_type { +- KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, + KBASE_HWCNT_GPU_GROUP_TYPE_V5, + }; + +-/** +- * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, +- * used to identify metadata blocks. +- * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. +- * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. +- * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. +- * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. +- * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. +- */ +-enum kbase_hwcnt_gpu_v4_block_type { +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, +- KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, +-}; +- + /** + * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, + * used to identify metadata blocks. +- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. +- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. +- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. +- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. +- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. +- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a ++ * counter set that a block ++ * doesn't support is used). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager ++ * or CSF HW). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job ++ * manager or CSF HW). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job ++ * manager or CSF HW). ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. ++ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + */ + enum kbase_hwcnt_gpu_v5_block_type { +- KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, ++ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, + }; + ++/** ++ * enum kbase_hwcnt_set - GPU hardware counter sets ++ * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters ++ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters ++ * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters ++ */ ++enum kbase_hwcnt_set { ++ KBASE_HWCNT_SET_PRIMARY, ++ KBASE_HWCNT_SET_SECONDARY, ++ KBASE_HWCNT_SET_TERTIARY, ++}; ++ + /** + * struct kbase_hwcnt_physical_enable_map - Representation of enable map + * directly used by GPU. +- * @jm_bm: Job Manager counters selection bitmask. ++ * @fe_bm: Front end (JM/CSHW) counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ + struct kbase_hwcnt_physical_enable_map { +- u32 jm_bm; ++ u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + }; + +-/** +- * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. +- * @cg_count: Core group count. +- * @cgs: Non-NULL pointer to array of cg_count coherent group structures. +- * +- * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, +- * where each core group may have a physically different layout. ++/* ++ * Values for Hardware Counter SET_SELECT value. ++ * Directly passed to HW. + */ +-struct kbase_hwcnt_gpu_v4_info { +- size_t cg_count; +- const struct mali_base_gpu_coherent_group *cgs; ++enum kbase_hwcnt_physical_set { ++ KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0, ++ KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1, ++ KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2, + }; + + /** +- * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. +- * @l2_count: L2 cache count. +- * @core_mask: Shader core mask. May be sparse. ++ * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. ++ * @l2_count: L2 cache count. ++ * @core_mask: Shader core mask. May be sparse. ++ * @clk_cnt: Number of clock domains available. ++ * @prfcnt_values_per_block: Total entries (header + counters) of performance ++ * counter per block. + */ +-struct kbase_hwcnt_gpu_v5_info { ++struct kbase_hwcnt_gpu_info { + size_t l2_count; + u64 core_mask; ++ u8 clk_cnt; ++ size_t prfcnt_values_per_block; + }; + + /** +- * struct kbase_hwcnt_gpu_info - Tagged union with information about the current +- * GPU's hwcnt blocks. +- * @type: GPU type. +- * @v4: Info filled in if a v4 GPU. +- * @v5: Info filled in if a v5 GPU. +- */ +-struct kbase_hwcnt_gpu_info { +- enum kbase_hwcnt_gpu_group_type type; +- union { +- struct kbase_hwcnt_gpu_v4_info v4; +- struct kbase_hwcnt_gpu_v5_info v5; +- }; +-}; +- +-/** +- * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the +- * hwcnt metadata. +- * @kbdev: Non-NULL pointer to kbase device. +- * @info: Non-NULL pointer to data structure to be filled in. ++ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the ++ * GPU. ++ * @num_l2_slices: Current number of L2 slices allocated to the GPU. ++ * @shader_present: Current shader present bitmap that is allocated to the GPU. + * +- * The initialised info struct will only be valid for use while kbdev is valid. ++ * For architectures with the max_config interface available from the Arbiter, ++ * the current resources allocated may change during runtime due to a ++ * re-partitioning (possible with partition manager). Thus, the HWC needs to be ++ * prepared to report any possible set of counters. For this reason the memory ++ * layout in the userspace is based on the maximum possible allocation. On the ++ * other hand, each partition has just the view of its currently allocated ++ * resources. Therefore, it is necessary to correctly map the dumped HWC values ++ * from the registers into this maximum memory layout so that it can be exposed ++ * to the userspace side correctly. ++ * ++ * For L2 cache just the number is enough once the allocated ones will be ++ * accumulated on the first L2 slots available in the destination buffer. ++ * ++ * For the correct mapping of the shader cores it is necessary to jump all the ++ * L2 cache slots in the destination buffer that are not allocated. But, it is ++ * not necessary to add any logic to map the shader cores bitmap into the memory ++ * layout because the shader_present allocated will always be a subset of the ++ * maximum shader_present. It is possible because: ++ * 1 - Partitions are made of slices and they are always ordered from the ones ++ * with more shader cores to the ones with less. ++ * 2 - The shader cores in a slice are always contiguous. ++ * 3 - A partition can only have a contiguous set of slices allocated to it. ++ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with ++ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: ++ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are ++ * contiguous in any slice. ++ * Supposing that a partition takes the two slices in the middle, the current ++ * config shader_present for this partition would be: ++ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices ++ * are contiguous. ++ * Therefore, by directly copying any subset of the maximum possible ++ * shader_present the mapping is already achieved. + */ +-int kbase_hwcnt_gpu_info_init( +- struct kbase_device *kbdev, +- struct kbase_hwcnt_gpu_info *info); ++struct kbase_hwcnt_curr_config { ++ size_t num_l2_slices; ++ u64 shader_present; ++}; + + /** +- * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the +- * current GPU. +- * @info: Non-NULL pointer to info struct initialised by +- * kbase_hwcnt_gpu_info_init. +- * @use_secondary: True if secondary performance counters should be used, else +- * false. Ignored if secondary counters are not supported. ++ * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the ++ * JM GPUs. ++ * @info: Non-NULL pointer to info struct. ++ * @counter_set: The performance counter set used. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump +@@ -156,44 +187,133 @@ int kbase_hwcnt_gpu_info_init( + * + * Return: 0 on success, else error code. + */ +-int kbase_hwcnt_gpu_metadata_create( ++int kbase_hwcnt_jm_metadata_create( + const struct kbase_hwcnt_gpu_info *info, +- bool use_secondary, ++ enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); + + /** +- * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. ++ * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. ++ * ++ * @metadata: Pointer to metadata to destroy. ++ */ ++void kbase_hwcnt_jm_metadata_destroy( ++ const struct kbase_hwcnt_metadata *metadata); ++ ++/** ++ * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the ++ * CSF GPUs. ++ * @info: Non-NULL pointer to info struct. ++ * @counter_set: The performance counter set used. ++ * @out_metadata: Non-NULL pointer to where created metadata is stored on ++ * success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_csf_metadata_create( ++ const struct kbase_hwcnt_gpu_info *info, ++ enum kbase_hwcnt_set counter_set, ++ const struct kbase_hwcnt_metadata **out_metadata); ++ ++/** ++ * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter ++ * metadata. + * @metadata: Pointer to metadata to destroy. + */ +-void kbase_hwcnt_gpu_metadata_destroy( ++void kbase_hwcnt_csf_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata); + + /** +- * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw ++ * kbase_hwcnt_gpu_metadata_create_truncate_64() - Create HWC metadata with HWC ++ * block entries truncated ++ * to 64. ++ * ++ * @dst_md: Non-NULL pointer to where created metadata is stored on success. ++ * @src_md: Non-NULL pointer to the HWC metadata used as the source to create ++ * dst_md. ++ * ++ * If the total block entries in src_md is 64, metadata dst_md returns NULL ++ * since no need to truncate. ++ * if the total block entries in src_md is 128, then a new metadata with block ++ * entries truncated to 64 will be created for dst_md, which keeps the interface ++ * to user clients backward compatible. ++ * If the total block entries in src_md is other values, function returns error ++ * since it's not supported. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_gpu_metadata_create_truncate_64( ++ const struct kbase_hwcnt_metadata **dst_md, ++ const struct kbase_hwcnt_metadata *src_md); ++ ++/** ++ * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values from ++ * src to dst. ++ * ++ * @dst: Non-NULL pointer to dst dump buffer. ++ * @src: Non-NULL pointer to src dump buffer. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. ++ * ++ * After the operation, all non-enabled values (including padding bytes) will be ++ * zero. ++ * ++ * The dst and src have different metadata, and the dst metadata is narrower ++ * than src metadata. ++ */ ++void kbase_hwcnt_dump_buffer_copy_strict_narrow( ++ struct kbase_hwcnt_dump_buffer *dst, ++ const struct kbase_hwcnt_dump_buffer *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map); ++ ++/** ++ * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw ++ * dump buffer in src into the dump buffer ++ * abstraction in dst. ++ * @dst: Non-NULL pointer to dst dump buffer. ++ * @src: Non-NULL pointer to src raw dump buffer, of same length ++ * as returned in out_dump_bytes parameter of ++ * kbase_hwcnt_jm_metadata_create. ++ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. ++ * @pm_core_mask: PM state synchronized shaders core mask with the dump. ++ * @curr_config: Current allocated hardware resources to correctly map the ++ * src raw dump buffer to the dst dump buffer. ++ * @accumulate: True if counters in src should be accumulated into dst, ++ * rather than copied. ++ * ++ * The dst and dst_enable_map MUST have been created from the same metadata as ++ * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get ++ * the length of src. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ const u64 pm_core_mask, ++ const struct kbase_hwcnt_curr_config *curr_config, ++ bool accumulate); ++ ++/** ++ * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src raw dump buffer, of same length + * as returned in out_dump_bytes parameter of +- * kbase_hwcnt_gpu_metadata_create. ++ * kbase_hwcnt_csf_metadata_create. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. +- * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @accumulate: True if counters in src should be accumulated into dst, + * rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as +- * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get ++ * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +-int kbase_hwcnt_gpu_dump_get( +- struct kbase_hwcnt_dump_buffer *dst, +- void *src, +- const struct kbase_hwcnt_enable_map *dst_enable_map, +- const u64 pm_core_mask, +- bool accumulate); ++int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, ++ const struct kbase_hwcnt_enable_map *dst_enable_map, ++ bool accumulate); + + /** + * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction +@@ -202,7 +322,7 @@ int kbase_hwcnt_gpu_dump_get( + * @src: Non-NULL pointer to src enable map abstraction. + * + * The src must have been created from a metadata returned from a call to +- * kbase_hwcnt_gpu_metadata_create. ++ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + * + * This is a lossy conversion, as the enable map abstraction has one bit per + * individual counter block value, but the physical enable map uses 1 bit for +@@ -212,6 +332,16 @@ void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); + ++/** ++ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical ++ * SET_SELECT value. ++ * ++ * @dst: Non-NULL pointer to dst physical SET_SELECT value. ++ * @src: Non-NULL pointer to src counter set selection. ++ */ ++void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, ++ enum kbase_hwcnt_set src); ++ + /** + * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to + * an enable map abstraction. +@@ -219,7 +349,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( + * @src: Non-NULL pointer to src physical enable map. + * + * The dst must have been created from a metadata returned from a call to +- * kbase_hwcnt_gpu_metadata_create. ++ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + * + * This is a lossy conversion, as the physical enable map can technically + * support counter blocks with 128 counters each, but no hardware actually uses +@@ -238,7 +368,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( + * @enable_map: Non-NULL pointer to enable map. + * + * The buf and enable_map must have been created from a metadata returned from +- * a call to kbase_hwcnt_gpu_metadata_create. ++ * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + * + * This function should be used before handing off a dump buffer over the + * kernel-user boundary, to ensure the header is accurate for the enable map +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c +index b0e6aee..0687253 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,13 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_hwcnt_legacy.h" + #include "mali_kbase_hwcnt_virtualizer.h" + #include "mali_kbase_hwcnt_types.h" + #include "mali_kbase_hwcnt_gpu.h" +-#include "mali_kbase_ioctl.h" ++#include + + #include + #include +@@ -69,7 +68,7 @@ int kbase_hwcnt_legacy_client_create( + goto error; + + /* Translate from the ioctl enable map to the internal one */ +- phys_em.jm_bm = enable->jm_bm; ++ phys_em.fe_bm = enable->fe_bm; + phys_em.shader_bm = enable->shader_bm; + phys_em.tiler_bm = enable->tiler_bm; + phys_em.mmu_l2_bm = enable->mmu_l2_bm; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h +index 7a610ae..36ff44d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c +index 1e9efde..492f572 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,26 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_hwcnt_types.h" +-#include "mali_kbase.h" +- +-/* Minimum alignment of each block of hardware counters */ +-#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ +- (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +-/** +- * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. +- * @value: The value to align upwards. +- * @alignment: The alignment. +- * +- * Return: A number greater than or equal to value that is aligned to alignment. +- */ +-#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ +- (value + ((alignment - (value % alignment)) % alignment)) ++#include + + int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, +@@ -55,6 +41,10 @@ int kbase_hwcnt_metadata_create( + if (!desc || !out_metadata) + return -EINVAL; + ++ /* The maximum number of clock domains is 64. */ ++ if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) ++ return -EINVAL; ++ + /* Calculate the bytes needed to tightly pack the metadata */ + + /* Top level metadata */ +@@ -158,6 +148,7 @@ int kbase_hwcnt_metadata_create( + enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; + metadata->avail_mask = desc->avail_mask; ++ metadata->clk_cnt = desc->clk_cnt; + + WARN_ON(size != offset); + /* Due to the block alignment, there should be exactly one enable map +@@ -170,13 +161,11 @@ int kbase_hwcnt_metadata_create( + *out_metadata = metadata; + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); + + void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) + { + kfree(metadata); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); + + int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, +@@ -187,45 +176,55 @@ int kbase_hwcnt_enable_map_alloc( + if (!metadata || !enable_map) + return -EINVAL; + +- enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); +- if (!enable_map_buf) +- return -ENOMEM; ++ if (metadata->enable_map_bytes > 0) { ++ enable_map_buf = ++ kzalloc(metadata->enable_map_bytes, GFP_KERNEL); ++ if (!enable_map_buf) ++ return -ENOMEM; ++ } else { ++ enable_map_buf = NULL; ++ } + + enable_map->metadata = metadata; +- enable_map->enable_map = enable_map_buf; ++ enable_map->hwcnt_enable_map = enable_map_buf; + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); + + void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) + { + if (!enable_map) + return; + +- kfree(enable_map->enable_map); +- enable_map->enable_map = NULL; ++ kfree(enable_map->hwcnt_enable_map); ++ enable_map->hwcnt_enable_map = NULL; + enable_map->metadata = NULL; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); + + int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) + { +- u32 *buf; ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; ++ u8 *buf; + + if (!metadata || !dump_buf) + return -EINVAL; + +- buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); ++ dump_buf_bytes = metadata->dump_buf_bytes; ++ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; ++ ++ /* Make a single allocation for both dump_buf and clk_cnt_buf. */ ++ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dump_buf->metadata = metadata; +- dump_buf->dump_buf = buf; ++ dump_buf->dump_buf = (u32 *)buf; ++ dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); ++ + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); + + void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) + { +@@ -235,7 +234,6 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) + kfree(dump_buf->dump_buf); + memset(dump_buf, 0, sizeof(*dump_buf)); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); + + int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, +@@ -246,10 +244,16 @@ int kbase_hwcnt_dump_buffer_array_alloc( + size_t buf_idx; + unsigned int order; + unsigned long addr; ++ size_t dump_buf_bytes; ++ size_t clk_cnt_buf_bytes; + + if (!metadata || !dump_bufs) + return -EINVAL; + ++ dump_buf_bytes = metadata->dump_buf_bytes; ++ clk_cnt_buf_bytes = ++ sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; ++ + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) +@@ -258,8 +262,8 @@ int kbase_hwcnt_dump_buffer_array_alloc( + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ +- order = get_order(metadata->dump_buf_bytes * n); +- addr = __get_free_pages(GFP_KERNEL, order); ++ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); ++ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + + if (!addr) { + kfree(buffers); +@@ -273,15 +277,18 @@ int kbase_hwcnt_dump_buffer_array_alloc( + + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { +- const size_t offset = metadata->dump_buf_bytes * buf_idx; ++ const size_t dump_buf_offset = dump_buf_bytes * buf_idx; ++ const size_t clk_cnt_buf_offset = ++ (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); + + buffers[buf_idx].metadata = metadata; +- buffers[buf_idx].dump_buf = (u32 *)(addr + offset); ++ buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset); ++ buffers[buf_idx].clk_cnt_buf = ++ (u64 *)(addr + clk_cnt_buf_offset); + } + + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); + + void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +@@ -293,7 +300,6 @@ void kbase_hwcnt_dump_buffer_array_free( + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); + + void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, +@@ -324,8 +330,10 @@ void kbase_hwcnt_dump_buffer_zero( + + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } ++ ++ memset(dst->clk_cnt_buf, 0, ++ sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); + + void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst) +@@ -334,8 +342,10 @@ void kbase_hwcnt_dump_buffer_zero_strict( + return; + + memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); ++ ++ memset(dst->clk_cnt_buf, 0, ++ sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); + + void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, +@@ -375,7 +385,6 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled( + } + } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); + + void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, +@@ -384,6 +393,7 @@ void kbase_hwcnt_dump_buffer_copy( + { + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; ++ size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || +@@ -413,8 +423,13 @@ void kbase_hwcnt_dump_buffer_copy( + + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + } ++ ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) { ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk)) ++ dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; ++ } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); + + void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, +@@ -423,6 +438,7 @@ void kbase_hwcnt_dump_buffer_copy_strict( + { + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; ++ size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || +@@ -451,8 +467,15 @@ void kbase_hwcnt_dump_buffer_copy_strict( + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, val_cnt); + } ++ ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) { ++ bool clk_enabled = ++ kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk); ++ ++ dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; ++ } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); + + void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, +@@ -461,6 +484,7 @@ void kbase_hwcnt_dump_buffer_accumulate( + { + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; ++ size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || +@@ -494,8 +518,13 @@ void kbase_hwcnt_dump_buffer_accumulate( + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } ++ ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) { ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk)) ++ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; ++ } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); + + void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, +@@ -504,6 +533,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( + { + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; ++ size_t clk; + + if (WARN_ON(!dst) || + WARN_ON(!src) || +@@ -534,5 +564,12 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( + kbase_hwcnt_dump_buffer_block_accumulate_strict( + dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + } ++ ++ kbase_hwcnt_metadata_for_each_clock(metadata, clk) { ++ if (kbase_hwcnt_clk_enable_map_enabled( ++ dst_enable_map->clk_enable_map, clk)) ++ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; ++ else ++ dst->clk_cnt_buf[clk] = 0; ++ } + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h +index 4d78c84..da12952 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -85,7 +84,6 @@ + #include + #include + #include +-#include "mali_malisw.h" + + /* Number of bytes in each bitfield */ + #define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) +@@ -101,6 +99,21 @@ + */ + #define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + ++/* Minimum alignment of each block of hardware counters */ ++#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ ++ (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) ++ ++/** ++ * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value. ++ * @value: The value to align upwards. ++ * @alignment: The alignment boundary. ++ * ++ * Return: Input value if already aligned to the specified boundary, or next ++ * (incrementing upwards) aligned value. ++ */ ++#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ ++ (value + ((alignment - (value % alignment)) % alignment)) ++ + /** + * struct kbase_hwcnt_block_description - Description of one or more identical, + * contiguous, Hardware Counter Blocks. +@@ -136,11 +149,13 @@ struct kbase_hwcnt_group_description { + * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, + * describing each Hardware Counter Group in the system. + * @avail_mask: Flat Availability Mask for all block instances in the system. ++ * @clk_cnt: The number of clock domains in the system. The maximum is 64. + */ + struct kbase_hwcnt_description { + size_t grp_cnt; + const struct kbase_hwcnt_group_description *grps; + u64 avail_mask; ++ u8 clk_cnt; + }; + + /** +@@ -220,6 +235,7 @@ struct kbase_hwcnt_group_metadata { + * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. + * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @avail_mask: The Availability Mask for the system. ++ * @clk_cnt: The number of clock domains in the system. + */ + struct kbase_hwcnt_metadata { + size_t grp_cnt; +@@ -227,6 +243,7 @@ struct kbase_hwcnt_metadata { + size_t enable_map_bytes; + size_t dump_buf_bytes; + u64 avail_mask; ++ u8 clk_cnt; + }; + + /** +@@ -234,13 +251,16 @@ struct kbase_hwcnt_metadata { + * bitfields. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the enable map. +- * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array +- * of u64 bitfields, each bit of which enables one hardware ++ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an ++ * array of u64 bitfields, each bit of which enables one hardware + * counter. ++ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle ++ * counter for a given clock domain. + */ + struct kbase_hwcnt_enable_map { + const struct kbase_hwcnt_metadata *metadata; +- u64 *enable_map; ++ u64 *hwcnt_enable_map; ++ u64 clk_enable_map; + }; + + /** +@@ -250,10 +270,13 @@ struct kbase_hwcnt_enable_map { + * the layout of the Dump Buffer. + * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array + * of u32 values. ++ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed ++ * for each clock domain. + */ + struct kbase_hwcnt_dump_buffer { + const struct kbase_hwcnt_metadata *metadata; + u32 *dump_buf; ++ u64 *clk_cnt_buf; + }; + + /** +@@ -349,8 +372,7 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * +- * Return: Number of u32 counter headers in each instance of block blk in +- * group grp. ++ * Return: Number of counter headers in each instance of block blk in group grp. + */ + #define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) +@@ -361,19 +383,29 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * +- * Return: Number of u32 counters in each instance of block blk in group +- * grp. ++ * Return: Number of counters in each instance of block blk in group grp. + */ + #define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) + ++/** ++ * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. ++ * @metadata: Non-NULL pointer to metadata. ++ * @grp: Index of the group in the metadata. ++ * @blk: Index of the block in the group. ++ * ++ * Return: enable map stride in each instance of block blk in group grp. ++ */ ++#define kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk) \ ++ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride) ++ + /** + * kbase_hwcnt_metadata_block_values_count() - Get the number of values. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * +- * Return: Number of u32 headers plus counters in each instance of block blk ++ * Return: Number of headers plus counters in each instance of block blk + * in group grp. + */ + #define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ +@@ -473,7 +505,7 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + * block instance. + */ + #define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ +- ((map)->enable_map + \ ++ ((map)->hwcnt_enable_map + \ + (map)->metadata->grp_metadata[(grp)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) +@@ -520,7 +552,11 @@ static inline void kbase_hwcnt_enable_map_block_disable_all( + static inline void kbase_hwcnt_enable_map_disable_all( + struct kbase_hwcnt_enable_map *dst) + { +- memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); ++ if (dst->hwcnt_enable_map != NULL) ++ memset(dst->hwcnt_enable_map, 0, ++ dst->metadata->enable_map_bytes); ++ ++ dst->clk_enable_map = 0; + } + + /** +@@ -569,6 +605,8 @@ static inline void kbase_hwcnt_enable_map_enable_all( + kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all( + dst, grp, blk, blk_inst); ++ ++ dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; + } + + /** +@@ -582,9 +620,13 @@ static inline void kbase_hwcnt_enable_map_copy( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) + { +- memcpy(dst->enable_map, +- src->enable_map, +- dst->metadata->enable_map_bytes); ++ if (dst->hwcnt_enable_map != NULL) { ++ memcpy(dst->hwcnt_enable_map, ++ src->hwcnt_enable_map, ++ dst->metadata->enable_map_bytes); ++ } ++ ++ dst->clk_enable_map = src->clk_enable_map; + } + + /** +@@ -602,8 +644,12 @@ static inline void kbase_hwcnt_enable_map_union( + dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; + size_t i; + +- for (i = 0; i < bitfld_count; i++) +- dst->enable_map[i] |= src->enable_map[i]; ++ if (dst->hwcnt_enable_map != NULL) { ++ for (i = 0; i < bitfld_count; i++) ++ dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; ++ } ++ ++ dst->clk_enable_map |= src->clk_enable_map; + } + + /** +@@ -656,6 +702,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled( + const struct kbase_hwcnt_enable_map *enable_map) + { + size_t grp, blk, blk_inst; ++ const u64 clk_enable_map_mask = ++ (1ull << enable_map->metadata->clk_cnt) - 1; ++ ++ if (enable_map->metadata->clk_cnt > 0 && ++ (enable_map->clk_enable_map & clk_enable_map_mask)) ++ return true; + + kbase_hwcnt_metadata_for_each_block( + enable_map->metadata, grp, blk, blk_inst) { +@@ -749,12 +801,10 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. + * @metadata: Non-NULL pointer to metadata describing the system. + * @n: Number of dump buffers to allocate +- * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each +- * dump buffer in the array will be initialised to undefined values, +- * so must be used as a copy dest, or cleared before use. ++ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. + * +- * A single contiguous page allocation will be used for all of the buffers +- * inside the array, where: ++ * A single zeroed contiguous page allocation will be used for all of the ++ * buffers inside the array, where: + * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * + * Return: 0 on success, else error code. +@@ -1084,4 +1134,29 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( + } + } + ++/** ++ * Iterate over each clock domain in the metadata. ++ * ++ * @md: Non-NULL pointer to metadata. ++ * @clk: size_t variable used as clock iterator. ++ */ ++#define kbase_hwcnt_metadata_for_each_clock(md, clk) \ ++ for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) ++ ++/** ++ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled ++ * in clk_enable_map. ++ * @clk_enable_map: An enable map for clock domains. ++ * @index: Index of the enable map for clock domain. ++ * ++ * Return: true if the index of the clock domain is enabled, else false. ++ */ ++static inline bool kbase_hwcnt_clk_enable_map_enabled( ++ const u64 clk_enable_map, const size_t index) ++{ ++ if (clk_enable_map & (1ull << index)) ++ return true; ++ return false; ++} ++ + #endif /* _KBASE_HWCNT_TYPES_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c +index 917e47c..52ecb7b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,17 +17,12 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_hwcnt_virtualizer.h" + #include "mali_kbase_hwcnt_accumulator.h" + #include "mali_kbase_hwcnt_context.h" + #include "mali_kbase_hwcnt_types.h" +-#include "mali_malisw.h" +-#include "mali_kbase_debug.h" +-#include "mali_kbase_linux.h" + + #include + #include +@@ -87,7 +83,6 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + + return hvirt->metadata; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); + + /** + * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. +@@ -496,7 +491,6 @@ int kbase_hwcnt_virtualizer_client_set_counters( + + return errcode; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); + + /** + * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's +@@ -686,7 +680,6 @@ int kbase_hwcnt_virtualizer_client_dump( + + return errcode; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); + + int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, +@@ -719,7 +712,6 @@ int kbase_hwcnt_virtualizer_client_create( + *out_hvcli = hvcli; + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); + + void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli) +@@ -735,7 +727,6 @@ void kbase_hwcnt_virtualizer_client_destroy( + + kbasep_hwcnt_virtualizer_client_free(hvcli); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); + + int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, +@@ -766,7 +757,6 @@ int kbase_hwcnt_virtualizer_init( + *out_hvirt = virt; + return 0; + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); + + void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt) +@@ -787,4 +777,12 @@ void kbase_hwcnt_virtualizer_term( + + kfree(hvirt); + } +-KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); ++ ++bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, ++ struct work_struct *work) ++{ ++ if (WARN_ON(!hvirt) || WARN_ON(!work)) ++ return false; ++ ++ return kbase_hwcnt_context_queue_work(hvirt->hctx, work); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h +index 8f628c3..e3a8137 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -31,6 +30,7 @@ + #define _KBASE_HWCNT_VIRTUALIZER_H_ + + #include ++#include + + struct kbase_hwcnt_context; + struct kbase_hwcnt_virtualizer; +@@ -142,4 +142,19 @@ int kbase_hwcnt_virtualizer_client_dump( + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + ++/** ++ * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async ++ * work on a workqueue specialized for ++ * hardware counters. ++ * @hvirt: Non-NULL pointer to the hardware counter virtualizer. ++ * @work: Non-NULL pointer to work to queue. ++ * ++ * Return: false if work was already on a queue, true otherwise. ++ * ++ * This is a convenience function that directly calls the underlying ++ * kbase_hwcnt_context's kbase_hwcnt_context_queue_work. ++ */ ++bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, ++ struct work_struct *work); ++ + #endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c +index 43f4b4d..83977f8 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,22 +17,20 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include +-#ifdef CONFIG_COMPAT ++#if IS_ENABLED(CONFIG_COMPAT) + #include + #endif + #include + #include + #include + #include ++#include + + #include ++#include + #include + #include + #include +@@ -39,24 +38,17 @@ + #include "mali_kbase_dma_fence.h" + #include + +-#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) ++#include + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +-/* random32 was renamed to prandom_u32 in 3.8 */ +-#define prandom_u32 random32 +-#endif ++#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + + /* Return whether katom will run on the GPU or not. Currently only soft jobs and +- * dependency-only atoms do not run on the GPU */ ++ * dependency-only atoms do not run on the GPU ++ */ + #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ + ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ + BASE_JD_REQ_DEP))) + +-/* Minimum API version that supports the just-in-time memory allocation pressure +- * limit feature. +- */ +-#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20) +- + /* + * This is the kernel side of the API. Only entry points are: + * - kbase_jd_submit(): Called from userspace to submit a single bag +@@ -70,13 +62,22 @@ + static void __user * + get_compat_pointer(struct kbase_context *kctx, const u64 p) + { +-#ifdef CONFIG_COMPAT ++#if IS_ENABLED(CONFIG_COMPAT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + return compat_ptr(p); + #endif + return u64_to_user_ptr(p); + } + ++/* Mark an atom as complete, and trace it in kinstr_jm */ ++static void jd_mark_atom_complete(struct kbase_jd_atom *katom) ++{ ++ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; ++ kbase_kinstr_jm_atom_complete(katom); ++ dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", ++ (void *)katom); ++} ++ + /* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs + * + * Returns whether the JS needs a reschedule. +@@ -88,7 +89,7 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) + { + struct kbase_context *kctx = katom->kctx; + +- dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", ++ dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", + (void *)katom, (void *)kctx); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); +@@ -97,30 +98,24 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) + /* Dependency only atom */ + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); +- katom->status = KBASE_JD_ATOM_STATE_COMPLETED; +- dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", +- (void *)katom); +- return 0; ++ jd_mark_atom_complete(katom); ++ return false; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* Soft-job */ + if (katom->will_fail_event_code) { + kbase_finish_soft_job(katom); +- katom->status = KBASE_JD_ATOM_STATE_COMPLETED; +- dev_dbg(kctx->kbdev->dev, +- "Atom %p status to completed\n", (void *)katom); +- return 0; ++ jd_mark_atom_complete(katom); ++ return false; + } + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); +- katom->status = KBASE_JD_ATOM_STATE_COMPLETED; +- dev_dbg(kctx->kbdev->dev, +- "Atom %p status to completed\n", (void *)katom); ++ jd_mark_atom_complete(katom); + } +- return 0; ++ return false; + } + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; +- dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); ++ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); + /* Queue an action about whether we should try scheduling a context */ + return kbasep_js_add_job(kctx, katom); + } +@@ -205,7 +200,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) + * jctx.lock must be held when this is called. + */ + +-static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) ++static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) + { + int err_ret_val = -EINVAL; + u32 res_no; +@@ -242,7 +237,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + + /* copy user buffer to the end of our real buffer. + * Make sure the struct sizes haven't changed in a way +- * we don't support */ ++ * we don't support ++ */ + BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); + input_extres = (struct base_external_resource *) + (((unsigned char *)katom->extres) + +@@ -258,9 +254,14 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + + #ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { +- info.resv_objs = kmalloc_array(katom->nr_extres, +- sizeof(struct reservation_object *), +- GFP_KERNEL); ++ info.resv_objs = ++ kmalloc_array(katom->nr_extres, ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) ++ sizeof(struct reservation_object *), ++#else ++ sizeof(struct dma_resv *), ++#endif ++ GFP_KERNEL); + if (!info.resv_objs) { + err_ret_val = -ENOMEM; + goto early_err_out; +@@ -277,7 +278,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + #endif /* CONFIG_MALI_DMA_FENCE */ + + /* Take the processes mmap lock */ +- down_read(¤t->mm->mmap_sem); ++ down_read(kbase_mem_get_process_mmap_lock()); + + /* need to keep the GPU VM locked while we set up UMM buffers */ + kbase_gpu_vm_lock(katom->kctx); +@@ -314,8 +315,11 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + #ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync && + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { ++#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *resv; +- ++#else ++ struct dma_resv *resv; ++#endif + resv = reg->gpu_alloc->imported.umm.dma_buf->resv; + if (resv) + kbase_dma_fence_add_reservation(resv, &info, +@@ -328,7 +332,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + * at least not before the first write) as we overwrite elements + * as we loop and could be overwriting ourself, so no writes + * until the last read for an element. +- * */ ++ */ + katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ + katom->extres[res_no].alloc = alloc; + } +@@ -337,7 +341,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + kbase_gpu_vm_unlock(katom->kctx); + + /* Release the processes mmap lock */ +- up_read(¤t->mm->mmap_sem); ++ up_read(kbase_mem_get_process_mmap_lock()); + + #ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { +@@ -362,7 +366,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st + #ifdef CONFIG_MALI_DMA_FENCE + failed_dma_fence_setup: + /* Lock the processes mmap lock */ +- down_read(¤t->mm->mmap_sem); ++ down_read(kbase_mem_get_process_mmap_lock()); + + /* lock before we unmap */ + kbase_gpu_vm_lock(katom->kctx); +@@ -378,7 +382,7 @@ failed_dma_fence_setup: + kbase_gpu_vm_unlock(katom->kctx); + + /* Release the processes mmap lock */ +- up_read(¤t->mm->mmap_sem); ++ up_read(kbase_mem_get_process_mmap_lock()); + + early_err_out: + kfree(katom->extres); +@@ -455,9 +459,6 @@ static inline void jd_resolve_dep(struct list_head *out_list, + #endif /* CONFIG_MALI_DMA_FENCE */ + + if (dep_satisfied) { +- trace_sysgraph(SGR_DEP_RES, +- dep_atom->kctx->id, +- kbase_jd_atom_id(katom->kctx, dep_atom)); + dep_atom->in_jd_list = true; + list_add_tail(&dep_atom->jd_item, out_list); + } +@@ -481,7 +482,8 @@ static inline void jd_resolve_dep(struct list_head *out_list, + static bool is_dep_valid(struct kbase_jd_atom *katom) + { + /* If there's no dependency then this is 'valid' from the perspective of +- * early dependency submission */ ++ * early dependency submission ++ */ + if (!katom) + return true; + +@@ -490,7 +492,8 @@ static bool is_dep_valid(struct kbase_jd_atom *katom) + return false; + + /* If dependency has completed and has failed or will fail then it is +- * not valid */ ++ * not valid ++ */ + if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && + (katom->event_code != BASE_JD_EVENT_DONE || + katom->will_fail_event_code)) +@@ -544,10 +547,6 @@ static void jd_try_submitting_deps(struct list_head *out_list, + #endif /* CONFIG_MALI_DMA_FENCE */ + + if (dep0_valid && dep1_valid && dep_satisfied) { +- trace_sysgraph(SGR_DEP_RES, +- dep_atom->kctx->id, +- kbase_jd_atom_id(dep_atom->kctx, +- dep_atom)); + dep_atom->in_jd_list = true; + list_add(&dep_atom->jd_item, out_list); + } +@@ -556,7 +555,7 @@ static void jd_try_submitting_deps(struct list_head *out_list, + } + } + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /** + * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. + * +@@ -632,8 +631,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) + u64 addr_end; + + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { +- const unsigned long extent_bytes = reg->extent +- << PAGE_SHIFT; ++ const unsigned long extension_bytes = ++ reg->extension << PAGE_SHIFT; + const u64 low_ptr = ptr[LOW]; + const u64 high_ptr = ptr[HIGH]; + +@@ -654,8 +653,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) + * this, but here to avoid future maintenance + * hazards + */ +- WARN_ON(!is_power_of_2(extent_bytes)); +- addr_end = ALIGN(read_val, extent_bytes); ++ WARN_ON(!is_power_of_2(extension_bytes)); ++ addr_end = ALIGN(read_val, extension_bytes); + } else { + addr_end = read_val = READ_ONCE(*ptr); + } +@@ -696,7 +695,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) + + kbase_jit_retry_pending_alloc(kctx); + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /* + * Perform the necessary handling of an atom that has finished running +@@ -721,12 +720,14 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + +-#if MALI_JIT_PRESSURE_LIMIT +- jd_update_jit_usage(katom); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)) ++ jd_update_jit_usage(katom); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /* This is needed in case an atom is failed due to being invalid, this +- * can happen *before* the jobs that the atom depends on have completed */ ++ * can happen *before* the jobs that the atom depends on have completed ++ */ + for (i = 0; i < 2; i++) { + if (kbase_jd_katom_dep_atom(&katom->dep[i])) { + list_del(&katom->dep_item[i]); +@@ -734,9 +735,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, + } + } + +- katom->status = KBASE_JD_ATOM_STATE_COMPLETED; +- dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", +- (void *)katom); ++ jd_mark_atom_complete(katom); + list_add_tail(&katom->jd_item, &completed_jobs); + + while (!list_empty(&completed_jobs)) { +@@ -759,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, + list_del(runnable_jobs.next); + node->in_jd_list = false; + +- dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", ++ dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", + node, node->status); + + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); +@@ -786,7 +785,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, + !node->will_fail_event_code) { + /* Node successfully submitted, try submitting + * dependencies as they may now be representable +- * in JS */ ++ * in JS ++ */ + jd_try_submitting_deps(&runnable_jobs, node); + } + } +@@ -802,10 +802,14 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, + + /* Decrement and check the TOTAL number of jobs. This includes + * those not tracked by the scheduler: 'not ready to run' and +- * 'dependency-only' jobs. */ ++ * 'dependency-only' jobs. ++ */ + if (--kctx->jctx.job_nr == 0) +- wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter +- * that we've got no more jobs (so we can be safely terminated) */ ++ /* All events are safely queued now, and we can signal ++ * any waiter that we've got no more jobs (so we can be ++ * safely terminated) ++ */ ++ wake_up(&kctx->jctx.zero_jobs_wait); + } + + return need_to_try_schedule_context; +@@ -813,7 +817,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, + + KBASE_EXPORT_TEST_API(jd_done_nolock); + +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + enum { + CORE_REQ_DEP_ONLY, + CORE_REQ_SOFT, +@@ -868,8 +872,23 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) + } + #endif + ++/* Trace an atom submission. */ ++static void jd_trace_atom_submit(struct kbase_context *const kctx, ++ struct kbase_jd_atom *const katom, ++ int *priority) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom)); ++ KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); ++ if (priority) ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); ++ kbase_kinstr_jm_atom_queue(katom); ++} ++ + static bool jd_submit_atom(struct kbase_context *const kctx, +- const struct base_jd_atom_v2 *const user_atom, ++ const struct base_jd_atom *const user_atom, + const struct base_jd_fragment *const user_jc_incr, + struct kbase_jd_atom *const katom) + { +@@ -879,14 +898,17 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + int i; + int sched_prio; + bool will_fail = false; ++ unsigned long flags; ++ enum kbase_jd_atom_state status; + +- dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); ++ dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom); + + /* Update the TOTAL number of jobs. This includes those not tracked by +- * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ ++ * the scheduler: 'not ready to run' and 'dependency-only' jobs. ++ */ + jctx->job_nr++; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE + katom->start_timestamp.tv64 = 0; + #else + katom->start_timestamp = 0; +@@ -899,6 +921,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + katom->jc = user_atom->jc; + katom->core_req = user_atom->core_req; + katom->jobslot = user_atom->jobslot; ++ katom->seq_nr = user_atom->seq_nr; + katom->atom_flags = 0; + katom->retry_count = 0; + katom->need_cache_flush_cores_retained = 0; +@@ -911,19 +934,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + + trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Older API version atoms might have random values where jit_id now + * lives, but we must maintain backwards compatibility - handle the + * issue. + */ +- if (kctx->api_version < MIN_API_VERSION_WITH_JPL) { ++ if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) { + katom->jit_ids[0] = 0; + katom->jit_ids[1] = 0; + } else { + katom->jit_ids[0] = user_atom->jit_id[0]; + katom->jit_ids[1] = user_atom->jit_id[1]; + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + katom->renderpass_id = user_atom->renderpass_id; + +@@ -953,23 +976,14 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kbdev->dev, +- "Atom %p status to completed\n", ++ "Atom %pK status to completed\n", + (void *)katom); + + /* Wrong dependency setup. Atom will be sent + * back to user space. Do not record any +- * dependencies. */ +- KBASE_TLSTREAM_TL_NEW_ATOM( +- kbdev, +- katom, +- kbase_jd_atom_id(kctx, katom)); +- KBASE_TLSTREAM_TL_RET_ATOM_CTX( +- kbdev, +- katom, kctx); +- KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( +- kbdev, +- katom, +- TL_ATOM_STATE_IDLE); ++ * dependencies. ++ */ ++ jd_trace_atom_submit(kctx, katom, NULL); + + return jd_done_nolock(katom, NULL); + } +@@ -1005,19 +1019,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + /* Atom has completed, propagate the error code if any */ + katom->event_code = dep_atom->event_code; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; +- dev_dbg(kbdev->dev, "Atom %p status to queued\n", ++ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", + (void *)katom); + + /* This atom will be sent back to user space. + * Do not record any dependencies. + */ +- KBASE_TLSTREAM_TL_NEW_ATOM( +- kbdev, +- katom, +- kbase_jd_atom_id(kctx, katom)); +- KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); +- KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, +- TL_ATOM_STATE_IDLE); ++ jd_trace_atom_submit(kctx, katom, NULL); + + will_fail = true; + +@@ -1045,20 +1053,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + return jd_done_nolock(katom, NULL); + } + +- if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { +- /* This softjob has failed due to a previous +- * dependency, however we should still run the +- * prepare & finish functions +- */ +- if (kbase_prepare_soft_job(katom) != 0) { +- katom->event_code = +- BASE_JD_EVENT_JOB_INVALID; +- return jd_done_nolock(katom, NULL); +- } +- } +- + katom->will_fail_event_code = katom->event_code; +- return false; + } + + /* These must occur after the above loop to ensure that an atom +@@ -1067,22 +1062,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; +- dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); ++ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); + + /* For invalid priority, be most lenient and choose the default */ + sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); + if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) + sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; +- katom->sched_priority = sched_prio; ++ ++ /* Cap the priority to jctx.max_priority */ ++ katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ? ++ kctx->jctx.max_priority : sched_prio; + + /* Create a new atom. */ +- KBASE_TLSTREAM_TL_NEW_ATOM( +- kbdev, +- katom, +- kbase_jd_atom_id(kctx, katom)); +- KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); +- KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); +- KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); ++ jd_trace_atom_submit(kctx, katom, &katom->sched_priority); + + #if !MALI_INCREMENTAL_RENDERING + /* Reject atoms for incremental rendering if not supported */ +@@ -1149,8 +1141,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + } + } + +-#if !MALI_JIT_PRESSURE_LIMIT +- if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) && ++#if !MALI_JIT_PRESSURE_LIMIT_BASE ++ if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && + (user_atom->jit_id[0] || user_atom->jit_id[1])) { + /* JIT pressure limit is disabled, but we are receiving non-0 + * JIT IDs - atom is invalid. +@@ -1158,7 +1150,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /* Validate the atom. Function will return error if the atom is + * malformed. +@@ -1180,7 +1172,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + } + } + +-#ifdef CONFIG_GPU_TRACEPOINTS ++#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) + katom->work_id = atomic_inc_return(&jctx->work_id); + trace_gpu_job_enqueue(kctx->id, katom->work_id, + kbasep_map_core_reqs_to_string(katom->core_req)); +@@ -1207,13 +1199,24 @@ static bool jd_submit_atom(struct kbase_context *const kctx, + bool need_to_try_schedule_context; + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; +- dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", ++ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", + (void *)katom); + + need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); + /* If job was cancelled then resolve immediately */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + return need_to_try_schedule_context; ++ ++ /* Synchronize with backend reset */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ status = katom->status; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ if (status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { ++ dev_dbg(kctx->kbdev->dev, ++ "Atom %d cancelled on HW\n", ++ kbase_jd_atom_id(katom->kctx, katom)); ++ return need_to_try_schedule_context; ++ } + } + + /* This is a pure dependency. Resolve it immediately */ +@@ -1231,6 +1234,9 @@ int kbase_jd_submit(struct kbase_context *kctx, + struct kbase_device *kbdev; + u32 latest_flush; + ++ bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || ++ stride == offsetof(struct base_jd_atom_v2, renderpass_id)); ++ + /* + * kbase_jd_submit isn't expected to fail and so all errors with the + * jobs are reported by immediately failing them (through event system) +@@ -1245,7 +1251,9 @@ int kbase_jd_submit(struct kbase_context *kctx, + } + + if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && +- stride != sizeof(struct base_jd_atom_v2)) { ++ stride != sizeof(struct base_jd_atom_v2) && ++ stride != offsetof(struct base_jd_atom, renderpass_id) && ++ stride != sizeof(struct base_jd_atom)) { + dev_err(kbdev->dev, + "Stride %u passed to job_submit isn't supported by the kernel\n", + stride); +@@ -1256,16 +1264,29 @@ int kbase_jd_submit(struct kbase_context *kctx, + latest_flush = kbase_backend_get_current_flush_id(kbdev); + + for (i = 0; i < nr_atoms; i++) { +- struct base_jd_atom_v2 user_atom; ++ struct base_jd_atom user_atom; + struct base_jd_fragment user_jc_incr; + struct kbase_jd_atom *katom; + +- if (copy_from_user(&user_atom, user_addr, stride) != 0) { +- dev_err(kbdev->dev, +- "Invalid atom address %p passed to job_submit\n", +- user_addr); +- err = -EFAULT; +- break; ++ if (unlikely(jd_atom_is_v2)) { ++ if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { ++ dev_dbg(kbdev->dev, ++ "Invalid atom address %p passed to job_submit\n", ++ user_addr); ++ err = -EFAULT; ++ break; ++ } ++ ++ /* no seq_nr in v2 */ ++ user_atom.seq_nr = 0; ++ } else { ++ if (copy_from_user(&user_atom, user_addr, stride) != 0) { ++ dev_dbg(kbdev->dev, ++ "Invalid atom address %p passed to job_submit\n", ++ user_addr); ++ err = -EFAULT; ++ break; ++ } + } + + if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { +@@ -1399,7 +1420,7 @@ void kbase_jd_done_worker(struct work_struct *data) + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + +- dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", ++ dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", + (void *)katom, (void *)kctx); + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); +@@ -1423,7 +1444,7 @@ void kbase_jd_done_worker(struct work_struct *data) + if (katom->event_code == BASE_JD_EVENT_STOPPED) { + unsigned long flags; + +- dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", ++ dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", + (void *)katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); +@@ -1431,7 +1452,7 @@ void kbase_jd_done_worker(struct work_struct *data) + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; +- dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", ++ dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", + (void *)katom); + kbase_js_unpull(kctx, katom); + +@@ -1518,7 +1539,9 @@ void kbase_jd_done_worker(struct work_struct *data) + mutex_unlock(&jctx->lock); + + /* Job is now no longer running, so can now safely release the context +- * reference, and handle any actions that were logged against the atom's retained state */ ++ * reference, and handle any actions that were logged against the ++ * atom's retained state ++ */ + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + +@@ -1526,7 +1549,8 @@ void kbase_jd_done_worker(struct work_struct *data) + + if (!atomic_dec_return(&kctx->work_count)) { + /* If worker now idle then post all events that jd_done_nolock() +- * has queued */ ++ * has queued ++ */ + mutex_lock(&jctx->lock); + while (!list_empty(&kctx->completed_jobs)) { + struct kbase_jd_atom *atom = list_entry( +@@ -1546,7 +1570,7 @@ void kbase_jd_done_worker(struct work_struct *data) + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); + +- dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", ++ dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", + (void *)katom, (void *)kctx); + } + +@@ -1601,7 +1625,8 @@ static void jd_cancel_worker(struct work_struct *data) + need_to_try_schedule_context = jd_done_nolock(katom, NULL); + /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to + * schedule the context. There's also no need for the jsctx_mutex to have been taken +- * around this too. */ ++ * around this too. ++ */ + KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); + + /* katom may have been freed now, do not use! */ +@@ -1652,7 +1677,7 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, + + atomic_inc(&kctx->work_count); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + /* a failed job happened and is waiting for dumping*/ + if (!katom->will_fail_event_code && + kbase_debug_job_fault_process(katom, katom->event_code)) +@@ -1670,12 +1695,12 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) + { + struct kbase_context *kctx; + +- KBASE_DEBUG_ASSERT(NULL != kbdev); +- KBASE_DEBUG_ASSERT(NULL != katom); ++ KBASE_DEBUG_ASSERT(kbdev != NULL); ++ KBASE_DEBUG_ASSERT(katom != NULL); + kctx = katom->kctx; +- KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + +- dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); ++ dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom); + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is not scheduled */ +@@ -1731,7 +1756,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx) + flush_workqueue(kctx->dma_fence.wq); + #endif + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + kbase_debug_job_fault_kctx_unblock(kctx); + #endif + +@@ -1744,12 +1769,15 @@ int kbase_jd_init(struct kbase_context *kctx) + { + int i; + int mali_err = 0; ++ struct priority_control_manager_device *pcm_device = NULL; + + KBASE_DEBUG_ASSERT(kctx); ++ pcm_device = kctx->kbdev->pcm_dev; ++ kctx->jctx.max_priority = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; + + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", + WQ_HIGHPRI | WQ_UNBOUND, 1); +- if (NULL == kctx->jctx.job_done_wq) { ++ if (kctx->jctx.job_done_wq == NULL) { + mali_err = -ENOMEM; + goto out1; + } +@@ -1785,6 +1813,11 @@ int kbase_jd_init(struct kbase_context *kctx) + INIT_LIST_HEAD(&kctx->completed_jobs); + atomic_set(&kctx->work_count, 0); + ++ /* Check if there are platform rules for maximum priority */ ++ if (pcm_device) ++ kctx->jctx.max_priority = pcm_device->ops.pcm_scheduler_priority_check( ++ pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME); ++ + return 0; + + out1: +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +index e9a161f..67d1bd9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,9 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + #include + #include +@@ -29,7 +28,7 @@ + #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + #include + #endif +-#include ++#include + + struct kbase_jd_debugfs_depinfo { + u8 id; +@@ -47,13 +46,13 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + res = kbase_sync_fence_out_info_get(atom, &info); + if (res == 0) +- seq_printf(sfile, "Sa([%p]%d) ", ++ seq_printf(sfile, "Sa([%pK]%d) ", + info.fence, info.status); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + res = kbase_sync_fence_in_info_get(atom, &info); + if (res == 0) +- seq_printf(sfile, "Wa([%p]%d) ", ++ seq_printf(sfile, "Wa([%pK]%d) ", + info.fence, info.status); + break; + default: +@@ -66,42 +65,40 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, + struct kbase_fence_cb *cb; + + if (atom->dma_fence.fence) { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = atom->dma_fence.fence; + #else + struct dma_fence *fence = atom->dma_fence.fence; + #endif + + seq_printf(sfile, +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +- "Sd(%u#%u: %s) ", ++#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) ++ "Sd(%u#%u: %s) ", + #else +- "Sd(%llu#%u: %s) ", ++ "Sd(%llu#%u: %s) ", + #endif +- fence->context, +- fence->seqno, +- dma_fence_is_signaled(fence) ? +- "signaled" : "active"); ++ fence->context, fence->seqno, ++ dma_fence_is_signaled(fence) ? "signaled" : ++ "active"); + } + + list_for_each_entry(cb, &atom->dma_fence.callbacks, + node) { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = cb->fence; + #else + struct dma_fence *fence = cb->fence; + #endif + + seq_printf(sfile, +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +- "Wd(%u#%u: %s) ", ++#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) ++ "Wd(%u#%u: %s) ", + #else +- "Wd(%llu#%u: %s) ", ++ "Wd(%llu#%u: %s) ", + #endif +- fence->context, +- fence->seqno, +- dma_fence_is_signaled(fence) ? +- "signaled" : "active"); ++ fence->context, fence->seqno, ++ dma_fence_is_signaled(fence) ? "signaled" : ++ "active"); + } + } + #endif /* CONFIG_MALI_DMA_FENCE */ +@@ -180,7 +177,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) + + /* start_timestamp is cleared as soon as the atom leaves UNUSED state + * and set before a job is submitted to the h/w, a non-zero value means +- * it is valid */ ++ * it is valid ++ */ + if (ktime_to_ns(atom->start_timestamp)) + start_timestamp = ktime_to_ns( + ktime_sub(ktime_get(), atom->start_timestamp)); +@@ -228,6 +226,12 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = { + + void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) + { ++#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) ++ const mode_t mode = S_IRUGO; ++#else ++ const mode_t mode = S_IRUSR; ++#endif ++ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ +@@ -236,7 +240,7 @@ void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) + return; + + /* Expose all atoms */ +- debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, ++ debugfs_create_file("atoms", mode, kctx->kctx_dentry, kctx, + &kbasep_jd_debugfs_atoms_fops); + + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +index 697bdef..8e6140c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_jd_debugfs.h +- * Header file for job dispatcher-related entries in debugfs ++ * DOC: Header file for job dispatcher-related entries in debugfs + */ + + #ifndef _KBASE_JD_DEBUGFS_H +@@ -38,7 +36,7 @@ struct kbase_context; + /** + * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system + * +- * @kctx Pointer to kbase_context ++ * @kctx: Pointer to kbase_context + */ + void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c +index 3f17dd7..6995050 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * HW access job manager common APIs + */ +@@ -29,6 +27,7 @@ + #include "mali_kbase_hwaccess_jm.h" + #include "mali_kbase_jm.h" + ++#if !MALI_USE_CSF + /** + * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot + * @js on the active context. +@@ -46,7 +45,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, + + kctx = kbdev->hwaccess.active_kctx[js]; + dev_dbg(kbdev->dev, +- "Trying to run the next %d jobs in kctx %p (s:%d)\n", ++ "Trying to run the next %d jobs in kctx %pK (s:%d)\n", + nr_jobs_to_submit, (void *)kctx, js); + + if (!kctx) +@@ -118,7 +117,7 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == kctx) { +- dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", ++ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", + (void *)kctx, js); + kbdev->hwaccess.active_kctx[js] = NULL; + } +@@ -130,7 +129,7 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + { + lockdep_assert_held(&kbdev->hwaccess_lock); + +- dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", ++ dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", + (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_STOPPED && +@@ -149,3 +148,4 @@ struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + + return kbase_js_complete_atom(katom, end_timestamp); + } ++#endif /* !MALI_USE_CSF */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h +index a3c7744..c6b28f3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2013-2014, 2016, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + /* + * Job manager common APIs + */ +@@ -28,6 +26,7 @@ + #ifndef _KBASE_JM_H_ + #define _KBASE_JM_H_ + ++#if !MALI_USE_CSF + /** + * kbase_jm_kick() - Indicate that there are jobs ready to run. + * @kbdev: Device pointer +@@ -73,7 +72,9 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); + * kbase_jm_kick_all() otherwise it will do nothing. + */ + void kbase_jm_try_kick_all(struct kbase_device *kbdev); ++#endif /* !MALI_USE_CSF */ + ++#if !MALI_USE_CSF + /** + * kbase_jm_idle_ctx() - Mark a context as idle. + * @kbdev: Device pointer +@@ -111,5 +112,6 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + */ + struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp); ++#endif /* !MALI_USE_CSF */ + + #endif /* _KBASE_JM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c +index 0b0c5bf..3682486 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Job Scheduler Implementation + */ +@@ -37,6 +34,7 @@ + + #include "mali_kbase_jm.h" + #include "mali_kbase_hwaccess_jm.h" ++#include + + /* + * Private types +@@ -45,26 +43,30 @@ + /* Bitpattern indicating the result of releasing a context */ + enum { + /* The context was descheduled - caller should try scheduling in a new +- * one to keep the runpool full */ ++ * one to keep the runpool full ++ */ + KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), + /* Ctx attributes were changed - caller should try scheduling all +- * contexts */ ++ * contexts ++ */ + KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) + }; + + typedef u32 kbasep_js_release_result; + + const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { +- KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ +- KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ +- KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ ++ KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ ++ KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ ++ KBASE_JS_ATOM_SCHED_PRIO_LOW, /* BASE_JD_PRIO_LOW */ ++ KBASE_JS_ATOM_SCHED_PRIO_REALTIME /* BASE_JD_PRIO_REALTIME */ + }; + + const base_jd_prio + kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { +- BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ +- BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ +- BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ ++ BASE_JD_PRIO_REALTIME, /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */ ++ BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ ++ BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ ++ BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ + }; + + +@@ -79,7 +81,7 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + + static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, +- kbasep_js_ctx_job_cb callback); ++ kbasep_js_ctx_job_cb *callback); + + /* Helper for ktrace */ + #if KBASE_KTRACE_ENABLE +@@ -160,7 +162,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) + none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); + + dev_dbg(kctx->kbdev->dev, +- "Slot %d (prio %d) is %spullable in kctx %p\n", ++ "Slot %d (prio %d) is %spullable in kctx %pK\n", + js, prio, none_to_pull ? "not " : "", kctx); + + return none_to_pull; +@@ -184,7 +186,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + +- for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) + return false; +@@ -210,9 +212,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) + * + * The HW access lock must always be held when calling this function. + */ +-static void +-jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, +- kbasep_js_ctx_job_cb callback) ++static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, ++ int prio, kbasep_js_ctx_job_cb *callback) + { + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + +@@ -234,7 +235,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, + WARN_ON(!(entry->core_req & + BASE_JD_REQ_END_RENDERPASS)); + dev_dbg(kctx->kbdev->dev, +- "Del runnable atom %p from X_DEP list\n", ++ "Del runnable atom %pK from X_DEP list\n", + (void *)entry); + + list_del(&entry->queue); +@@ -250,7 +251,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, + WARN_ON(!(entry->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + dev_dbg(kctx->kbdev->dev, +- "Del blocked atom %p from X_DEP list\n", ++ "Del blocked atom %pK from X_DEP list\n", + (void *)entry); + + list_del(queue->x_dep_head.next); +@@ -271,13 +272,12 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, + * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback + * for each entry, and remove the entry from the queue. + */ +-static inline void +-jsctx_queue_foreach(struct kbase_context *kctx, int js, +- kbasep_js_ctx_job_cb callback) ++static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js, ++ kbasep_js_ctx_job_cb *callback) + { + int prio; + +- for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + jsctx_queue_foreach_prio(kctx, js, prio, callback); + } +@@ -301,7 +301,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, +- "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", ++ "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n", + (void *)kctx, prio, js); + + node = rb_first(&rb->runnable_tree); +@@ -319,7 +319,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) + * @js: Job slot id to check. + * + * Check the ring buffers for all priorities, starting from +- * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a ++ * KBASE_JS_ATOM_SCHED_PRIO_REALTIME, for the specified @js and @prio and return a + * pointer to the next atom, unless all the priority's ring buffers are empty. + * + * Caller must hold the hwaccess_lock. +@@ -333,7 +333,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + +- for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_jd_atom *katom; + +@@ -363,7 +363,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + +- dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", ++ dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n", + (void *)katom, (void *)kctx); + + /* Atoms must be pulled in the correct order. */ +@@ -385,7 +385,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + +- dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n", + (void *)katom, (void *)kctx, js); + + while (*new) { +@@ -448,7 +448,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) + + #ifdef CONFIG_MALI_DEBUG + /* Soft-stop will be disabled on a single context by default unless +- * softstop_always is set */ ++ * softstop_always is set ++ */ + jsdd->softstop_always = false; + #endif /* CONFIG_MALI_DEBUG */ + jsdd->nr_all_contexts_running = 0; +@@ -531,14 +532,15 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) + kbdev->gpu_props.props.raw_props.js_features[i]); + + /* On error, we could continue on: providing none of the below resources +- * rely on the ones above */ ++ * rely on the ones above ++ */ + + mutex_init(&jsdd->runpool_mutex); + mutex_init(&jsdd->queue_mutex); + sema_init(&jsdd->schedule_sem, 1); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { +- for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { ++ for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); + INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); + } +@@ -595,16 +597,18 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) + sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + + /* Initially, the context is disabled from submission until the create +- * flags are set */ ++ * flags are set ++ */ + kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); + + /* On error, we could continue on: providing none of the below resources +- * rely on the ones above */ ++ * rely on the ones above ++ */ + mutex_init(&js_kctx_info->ctx.jsctx_mutex); + + init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); + +- for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { ++ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); + kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; +@@ -678,7 +682,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); +- dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n", + (void *)kctx, js); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) +@@ -720,7 +724,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); +- dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n", + (void *)kctx, js); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) +@@ -796,7 +800,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); +- dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n", + (void *)kctx, js); + + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], +@@ -879,7 +883,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( + + lockdep_assert_held(&kbdev->hwaccess_lock); + +- for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { ++ for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) + continue; + +@@ -889,7 +893,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + dev_dbg(kbdev->dev, +- "Popped %p from the pullable queue (s:%d)\n", ++ "Popped %pK from the pullable queue (s:%d)\n", + (void *)kctx, js); + return kctx; + } +@@ -943,25 +947,25 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, + + if (is_scheduled) { + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { +- dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", ++ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", + (void *)kctx); + return false; + } + } + katom = jsctx_rb_peek(kctx, js); + if (!katom) { +- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", + (void *)kctx, js); + return false; /* No pullable atoms */ + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, +- "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", ++ "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return false; + } + if (atomic_read(&katom->blocked)) { +- dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", ++ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", + (void *)katom); + return false; /* next atom blocked */ + } +@@ -970,20 +974,20 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, +- "JS: X pre-dep %p is not present in slot FIFO or will fail\n", ++ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return false; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { + dev_dbg(kbdev->dev, +- "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", ++ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return false; + } + } + +- dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n", + (void *)katom, (void *)kctx, js); + + return true; +@@ -1007,7 +1011,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + int dep_prio = dep_atom->sched_priority; + + dev_dbg(kbdev->dev, +- "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", ++ "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, dep_js); + + /* Dependent atom must already have been submitted */ +@@ -1020,7 +1024,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + } + + /* Dependencies with different priorities can't +- be represented in the ringbuffer */ ++ * be represented in the ringbuffer ++ */ + if (prio != dep_prio) { + dev_dbg(kbdev->dev, + "Different atom priorities\n"); +@@ -1030,7 +1035,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + + if (js == dep_js) { + /* Only one same-slot dependency can be +- * represented in the ringbuffer */ ++ * represented in the ringbuffer ++ */ + if (has_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot deps\n"); +@@ -1038,7 +1044,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + break; + } + /* Each dependee atom can only have one +- * same-slot dependency */ ++ * same-slot dependency ++ */ + if (dep_atom->post_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot successors\n"); +@@ -1048,7 +1055,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + has_dep = true; + } else { + /* Only one cross-slot dependency can be +- * represented in the ringbuffer */ ++ * represented in the ringbuffer ++ */ + if (has_x_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot deps\n"); +@@ -1056,7 +1064,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + break; + } + /* Each dependee atom can only have one +- * cross-slot dependency */ ++ * cross-slot dependency ++ */ + if (dep_atom->x_post_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot successors\n"); +@@ -1064,7 +1073,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + break; + } + /* The dependee atom can not already be in the +- * HW access ringbuffer */ ++ * HW access ringbuffer ++ */ + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + dev_dbg(kbdev->dev, +@@ -1074,7 +1084,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + break; + } + /* The dependee atom can not already have +- * completed */ ++ * completed ++ */ + if (dep_atom->status != + KBASE_JD_ATOM_STATE_IN_JS) { + dev_dbg(kbdev->dev, +@@ -1092,7 +1103,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + } + + /* If dependencies can be represented by ringbuffer then clear them from +- * atom structure */ ++ * atom structure ++ */ + if (ret) { + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; +@@ -1101,7 +1113,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + + dev_dbg(kbdev->dev, +- "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", ++ "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, + dep_js); + +@@ -1116,7 +1128,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + katom->atom_flags |= + KBASE_KATOM_FLAG_X_DEP_BLOCKED; + +- dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", ++ dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", + (void *)katom); + + katom->x_pre_dep = dep_atom; +@@ -1140,7 +1152,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, + } + } else { + dev_dbg(kbdev->dev, +- "Deps of atom %p (s:%d) could not be represented\n", ++ "Deps of atom %pK (s:%d) could not be represented\n", + (void *)katom, js); + } + +@@ -1181,7 +1193,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) + /* Determine the new priority for context, as per the priority + * of currently in-use atoms. + */ +- for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (kctx->atoms_count[prio]) { + new_priority = prio; +@@ -1192,6 +1204,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) + + kbase_js_set_ctx_priority(kctx, new_priority); + } ++KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority); + + /** + * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler +@@ -1222,7 +1235,7 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom) + if (rp->state != KBASE_JD_RP_COMPLETE) + return -EINVAL; + +- dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", ++ dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", + (void *)start_katom, start_katom->renderpass_id); + + /* The following members are read when updating the job slot +@@ -1265,7 +1278,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom) + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + +- dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", ++ dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (rp->state == KBASE_JD_RP_COMPLETE) +@@ -1332,7 +1345,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, + /* Refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); + ++(js_kctx_info->ctx.nr_jobs); +- dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", ++ dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + /* Lock for state available during IRQ */ +@@ -1345,13 +1358,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, + /* Dependencies could not be represented */ + --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, +- "Remove atom %p from kctx %p; now %d in ctx\n", ++ "Remove atom %pK from kctx %pK; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + /* Setting atom status back to queued as it still has unresolved +- * dependencies */ ++ * dependencies ++ */ + atom->status = KBASE_JD_ATOM_STATE_QUEUED; +- dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); ++ dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom); + + /* Undo the count, as the atom will get added again later but + * leave the context priority adjusted or boosted, in case if +@@ -1389,7 +1403,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, + kbdev, kctx, atom->slot_nr); + } + /* If this context is active and the atom is the first on its slot, +- * kick the job manager to attempt to fast-start the atom */ ++ * kick the job manager to attempt to fast-start the atom ++ */ + if (enqueue_required && kctx == + kbdev->hwaccess.active_kctx[atom->slot_nr]) + kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); +@@ -1404,22 +1419,25 @@ bool kbasep_js_add_job(struct kbase_context *kctx, + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* A job got added while/after kbase_job_zap_context() + * was called on a non-scheduled context. Kill that job +- * by killing the context. */ ++ * by killing the context. ++ */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, + false); + } else if (js_kctx_info->ctx.nr_jobs == 1) { + /* Handle Refcount going from 0 to 1: schedule the +- * context on the Queue */ ++ * context on the Queue ++ */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); +- dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); ++ dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx); + +- /* Queue was updated - caller must try to +- * schedule the head context */ ++ /* Queue was updated - caller must try to schedule the ++ * head context ++ */ + WARN_ON(!enqueue_required); + } + } + out_unlock: +- dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", ++ dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", + kctx, enqueue_required ? "" : "not "); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); +@@ -1448,7 +1466,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); + --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, +- "Remove atom %p from kctx %p; now %d in ctx\n", ++ "Remove atom %pK from kctx %pK; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +@@ -1478,7 +1496,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + * + * This is because it returns false for soft-stopped atoms, but we + * want to override that, because we're cancelling an atom regardless of +- * whether it was soft-stopped or not */ ++ * whether it was soft-stopped or not ++ */ + attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, + &katom_retained_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +@@ -1525,7 +1544,8 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( + + if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { + /* A change in runpool ctx attributes might mean we can +- * run more jobs than before */ ++ * run more jobs than before ++ */ + result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, +@@ -1624,7 +1644,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + + /* Make a set of checks to see if the context should be scheduled out. + * Note that there'll always be at least 1 reference to the context +- * which was previously acquired by kbasep_js_schedule_ctx(). */ ++ * which was previously acquired by kbasep_js_schedule_ctx(). ++ */ + if (new_ref_count == 1 && + (!kbasep_js_is_submit_allowed(js_devdata, kctx) || + #ifdef CONFIG_MALI_ARBITER_SUPPORT +@@ -1635,8 +1656,9 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + int slot; + + /* Last reference, and we've been told to remove this context +- * from the Run Pool */ +- dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", ++ * from the Run Pool ++ */ ++ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", + kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, + kbasep_js_is_submit_allowed(js_devdata, kctx)); + +@@ -1646,7 +1668,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + + for (slot = 0; slot < num_slots; slot++) { + if (kbdev->hwaccess.active_kctx[slot] == kctx) { +- dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", ++ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", + (void *)kctx, slot); + kbdev->hwaccess.active_kctx[slot] = NULL; + } +@@ -1662,7 +1684,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + + /* Releasing the context and katom retained state can allow +- * more jobs to run */ ++ * more jobs to run ++ */ + release_result |= + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, + kctx, katom_retained_state, +@@ -1702,7 +1725,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + /* Signal any waiter that the context is not scheduled, so is + * safe for termination - once the jsctx_mutex is also dropped, +- * and jobs have finished. */ ++ * and jobs have finished. ++ */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Queue an action to occur after we've dropped the lock */ +@@ -1744,9 +1768,10 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* Dying: don't requeue, but kill all jobs on the context. This +- * happens asynchronously */ ++ * happens asynchronously ++ */ + dev_dbg(kbdev->dev, +- "JS: ** Killing Context %p on RunPool Remove **", kctx); ++ "JS: ** Killing Context %pK on RunPool Remove **", kctx); + kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); + } + } +@@ -1798,7 +1823,8 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + } + + /* Variant of kbasep_js_runpool_release_ctx() that doesn't call into +- * kbase_js_sched_all() */ ++ * kbase_js_sched_all() ++ */ + static void kbasep_js_runpool_release_ctx_no_schedule( + struct kbase_device *kbdev, struct kbase_context *kctx) + { +@@ -1851,7 +1877,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + bool kctx_suspended = false; + int as_nr; + +- dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); ++ dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; +@@ -1867,7 +1893,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + kbdev, kctx); + if (as_nr != KBASEP_AS_NR_INVALID) { + /* Attempt to retain the context again, this should +- * succeed */ ++ * succeed ++ */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + as_nr = kbase_ctx_sched_retain_ctx(kctx); +@@ -1926,7 +1953,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); + + /* Cause any future waiter-on-termination to wait until the context is +- * descheduled */ ++ * descheduled ++ */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Re-check for suspending: a suspend could've occurred, and all the +@@ -1939,7 +1967,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + * was taken (i.e. this condition doesn't execute), then the + * kbasep_js_suspend() code will cleanup this context instead (by virtue + * of it being called strictly after the suspend flag is set, and will +- * wait for this lock to drop) */ ++ * wait for this lock to drop) ++ */ + #ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { + #else +@@ -1967,13 +1996,15 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* Note: after this point, the context could potentially get scheduled +- * out immediately */ ++ * out immediately ++ */ + + if (kctx_suspended) { + /* Finishing forcing out the context due to a suspend. Use a + * variant of kbasep_js_runpool_release_ctx() that doesn't + * schedule a new context, to prevent a risk of recursion back +- * into this function */ ++ * into this function ++ */ + kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); + return false; + } +@@ -1992,7 +2023,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, + kbase_backend_use_ctx_sched(kbdev, kctx, js)) { + + dev_dbg(kbdev->dev, +- "kctx %p already has ASID - mark as active (s:%d)\n", ++ "kctx %pK already has ASID - mark as active (s:%d)\n", + (void *)kctx, js); + + if (kbdev->hwaccess.active_kctx[js] != kctx) { +@@ -2059,7 +2090,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + kbase_js_sync_timers(kbdev); + + /* Fast-starting requires the jsctx_mutex to be dropped, +- * because it works on multiple ctxs */ ++ * because it works on multiple ctxs ++ */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + +@@ -2071,7 +2103,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + } else { + /* Already scheduled in - We need to retain it to keep the +- * corresponding address space */ ++ * corresponding address space ++ */ + WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); +@@ -2116,7 +2149,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) + js_devdata->runpool_irq.submit_allowed = 0; + + /* Retain each of the contexts, so we can cause it to leave even if it +- * had no refcount to begin with */ ++ * had no refcount to begin with ++ */ + for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { + struct kbase_context *kctx = kbdev->as_to_kctx[i]; + +@@ -2137,7 +2171,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* De-ref the previous retain to ensure each context gets pulled out +- * sometime later. */ ++ * sometime later. ++ */ + for (i = 0; + i < BASE_MAX_NR_AS; + ++i, retained = retained >> 1) { +@@ -2148,7 +2183,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) + } + + /* Caller must wait for all Power Manager active references to be +- * dropped */ ++ * dropped ++ */ + } + + void kbasep_js_resume(struct kbase_device *kbdev) +@@ -2162,7 +2198,7 @@ void kbasep_js_resume(struct kbase_device *kbdev) + + mutex_lock(&js_devdata->queue_mutex); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { +- for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; ++ for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_context *kctx, *n; + unsigned long flags; +@@ -2283,7 +2319,8 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + lockdep_assert_held(&kctx->jctx.lock); + + /* If slot will transition from unpullable to pullable then add to +- * pullable list */ ++ * pullable list ++ */ + if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { + enqueue_required = true; + } else { +@@ -2297,7 +2334,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + +- dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", ++ dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n", + (void *)katom, js); + + list_add_tail(&katom->queue, &queue->x_dep_head); +@@ -2307,7 +2344,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + add_required = false; + } + } else { +- dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", ++ dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", + (void *)katom); + } + +@@ -2321,7 +2358,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + } + + dev_dbg(kctx->kbdev->dev, +- "Enqueue of kctx %p is %srequired to submit atom %p\n", ++ "Enqueue of kctx %pK is %srequired to submit atom %pK\n", + kctx, enqueue_required ? "" : "not ", katom); + + return enqueue_required; +@@ -2348,7 +2385,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) + + if (!kbase_js_atom_blocked_on_x_dep(katom)) { + dev_dbg(kctx->kbdev->dev, +- "Del atom %p from X_DEP list in js_move_to_tree\n", ++ "Del atom %pK from X_DEP list in js_move_to_tree\n", + (void *)katom); + + list_del(&katom->queue); +@@ -2366,7 +2403,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) + } + } else { + dev_dbg(kctx->kbdev->dev, +- "Atom %p blocked on x-dep in js_move_to_tree\n", ++ "Atom %pK blocked on x-dep in js_move_to_tree\n", + (void *)katom); + break; + } +@@ -2409,10 +2446,8 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + /* Remove dependency.*/ + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; +- trace_sysgraph(SGR_DEP_RES, kctx->id, +- kbase_jd_atom_id(kctx, x_dep)); + +- dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", ++ dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", + (void *)x_dep); + + /* Fail if it had a data dependency. */ +@@ -2434,14 +2469,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; +- dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n", + (void *)kctx, js); + + js_devdata = &kbdev->js_data; + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { +- dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", ++ dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", + (void *)kctx); + return NULL; + } +@@ -2454,25 +2489,26 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) + + katom = jsctx_rb_peek(kctx, js); + if (!katom) { +- dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", + (void *)kctx, js); + return NULL; + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, +- "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", ++ "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return NULL; + } + if (atomic_read(&katom->blocked)) { +- dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", ++ dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", + (void *)katom); + return NULL; + } + + /* Due to ordering restrictions when unpulling atoms on failure, we do + * not allow multiple runs of fail-dep atoms from the same context to be +- * present on the same slot */ ++ * present on the same slot ++ */ + if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { + struct kbase_jd_atom *prev_atom = + kbase_backend_inspect_tail(kbdev, js); +@@ -2486,14 +2522,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, +- "JS: X pre-dep %p is not present in slot FIFO or will fail\n", ++ "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return NULL; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kbdev, js)) { + dev_dbg(kbdev->dev, +- "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", ++ "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return NULL; + } +@@ -2518,7 +2554,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) + + katom->ticks = 0; + +- dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n", + (void *)katom, (void *)kctx, js); + + return katom; +@@ -2561,7 +2597,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) + return; + + dev_dbg(kctx->kbdev->dev, +- "JS return start atom %p in state %d of RP %d\n", ++ "JS return start atom %pK in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + +@@ -2589,7 +2625,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) + /* Prevent the tiler job being pulled for execution in the + * job scheduler again. + */ +- dev_dbg(kbdev->dev, "Blocking start atom %p\n", ++ dev_dbg(kbdev->dev, "Blocking start atom %pK\n", + (void *)start_katom); + atomic_inc(&start_katom->blocked); + +@@ -2601,14 +2637,14 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) + /* Was the fragment job chain submitted to kbase yet? */ + end_katom = rp->end_katom; + if (end_katom) { +- dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", ++ dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", + (void *)end_katom); + + if (rp->state == KBASE_JD_RP_RETRY_OOM) { + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ +- dev_dbg(kbdev->dev, "Unblocking end atom %p\n", ++ dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + WARN_ON(!(end_katom->atom_flags & +@@ -2670,7 +2706,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) + return; + + dev_dbg(kctx->kbdev->dev, +- "JS return end atom %p in state %d of RP %d\n", ++ "JS return end atom %pK in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_OOM && +@@ -2692,14 +2728,14 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, +- "Reset backing to %zu pages for region %p\n", ++ "Reset backing to %zu pages for region %pK\n", + reg->threshold_pages, (void *)reg); + + if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) + kbase_mem_shrink(kctx, reg, reg->threshold_pages); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +- dev_dbg(kbdev->dev, "Deleting region %p from list\n", ++ dev_dbg(kbdev->dev, "Deleting region %pK from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); +@@ -2717,7 +2753,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) + */ + start_katom = rp->start_katom; + if (!WARN_ON(!start_katom)) { +- dev_dbg(kbdev->dev, "Unblocking start atom %p\n", ++ dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", + (void *)start_katom); + atomic_dec(&start_katom->blocked); + (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, +@@ -2743,7 +2779,7 @@ static void js_return_worker(struct work_struct *data) + unsigned long flags; + base_jd_core_req core_req = katom->core_req; + +- dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", ++ dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", + __func__, (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) +@@ -2771,13 +2807,15 @@ static void js_return_worker(struct work_struct *data) + timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); + + /* If this slot has been blocked due to soft-stopped atoms, and all +- * atoms have now been processed, then unblock the slot */ ++ * atoms have now been processed, then unblock the slot ++ */ + if (!kctx->atoms_pulled_slot_pri[js][prio] && + kctx->blocked_js[js][prio]) { + kctx->blocked_js[js][prio] = false; + + /* Only mark the slot as pullable if the context is not idle - +- * that case is handled below */ ++ * that case is handled below ++ */ + if (atomic_read(&kctx->atoms_pulled) && + kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( +@@ -2786,12 +2824,12 @@ static void js_return_worker(struct work_struct *data) + + if (!atomic_read(&kctx->atoms_pulled)) { + dev_dbg(kbdev->dev, +- "No atoms currently pulled from context %p\n", ++ "No atoms currently pulled from context %pK\n", + (void *)kctx); + + if (!kctx->slots_pullable) { + dev_dbg(kbdev->dev, +- "Context %p %s counted as runnable\n", ++ "Context %pK %s counted as runnable\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? + "is" : "isn't"); +@@ -2827,7 +2865,7 @@ static void js_return_worker(struct work_struct *data) + + if (context_idle) { + dev_dbg(kbdev->dev, +- "Context %p %s counted as active\n", ++ "Context %pK %s counted as active\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_ACTIVE) ? + "is" : "isn't"); +@@ -2866,13 +2904,13 @@ static void js_return_worker(struct work_struct *data) + + kbase_backend_complete_wq_post_sched(kbdev, core_req); + +- dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", ++ dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", + __func__, (void *)katom); + } + + void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) + { +- dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", ++ dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", + (void *)katom, (void *)kctx); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); +@@ -2927,7 +2965,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, + return false; + + dev_dbg(kctx->kbdev->dev, +- "Start atom %p is done in state %d of RP %d\n", ++ "Start atom %pK is done in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + +@@ -2939,7 +2977,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, +- "Start atom %p completed before soft-stop\n", ++ "Start atom %pK completed before soft-stop\n", + (void *)start_katom); + + kbase_gpu_vm_lock(kctx); +@@ -2951,7 +2989,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, + struct kbase_va_region, link); + + WARN_ON(reg->flags & KBASE_REG_VA_FREED); +- dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", ++ dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); +@@ -2961,7 +2999,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, + kbase_gpu_vm_unlock(kctx); + } else { + dev_dbg(kctx->kbdev->dev, +- "Start atom %p did not exceed memory threshold\n", ++ "Start atom %pK did not exceed memory threshold\n", + (void *)start_katom); + + WARN_ON(rp->state != KBASE_JD_RP_START && +@@ -2978,7 +3016,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ +- dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", ++ dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + +@@ -3022,7 +3060,7 @@ static void js_complete_end_rp(struct kbase_context *kctx, + if (WARN_ON(rp->end_katom != end_katom)) + return; + +- dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", ++ dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || +@@ -3056,7 +3094,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + kbdev = kctx->kbdev; + atom_slot = katom->slot_nr; + +- dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", ++ dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", + __func__, (void *)katom, atom_slot); + + /* Update the incremental rendering state machine. +@@ -3075,7 +3113,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { +- dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", ++ dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", + (void *)katom); + + context_idle = !atomic_dec_return(&kctx->atoms_pulled); +@@ -3091,11 +3129,12 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + } + + /* If this slot has been blocked due to soft-stopped atoms, and +- * all atoms have now been processed, then unblock the slot */ ++ * all atoms have now been processed, then unblock the slot ++ */ + if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] + && kctx->blocked_js[atom_slot][prio]) { + dev_dbg(kbdev->dev, +- "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", ++ "kctx %pK is no longer blocked from submitting on slot %d at priority %d\n", + (void *)kctx, atom_slot, prio); + + kctx->blocked_js[atom_slot][prio] = false; +@@ -3149,7 +3188,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + * jd_done_worker(). + */ + if (context_idle) { +- dev_dbg(kbdev->dev, "kctx %p is no longer active\n", ++ dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", + (void *)kctx); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + } +@@ -3200,7 +3239,7 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) + return true; + + dev_dbg(kbdev->dev, +- "JS complete end atom %p in state %d of RP %d\n", ++ "JS complete end atom %pK in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, + end_katom->renderpass_id); + +@@ -3229,7 +3268,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + struct kbase_jd_atom *x_dep = katom->x_post_dep; + + kbdev = kctx->kbdev; +- dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", ++ dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", + (void *)katom, (void *)kctx, (void *)x_dep); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); +@@ -3245,7 +3284,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + katom->event_code = katom->will_fail_event_code; + + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; +- dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); ++ dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom); + + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_js_evict_deps(kctx, katom, katom->slot_nr, +@@ -3267,9 +3306,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false); + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; +- trace_sysgraph(SGR_DEP_RES, kctx->id, +- kbase_jd_atom_id(katom->kctx, x_dep)); +- dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", ++ dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", + (void *)x_dep); + + kbase_js_move_to_tree(x_dep); +@@ -3280,13 +3317,13 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + x_dep->slot_nr); + + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { +- dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", ++ dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", + (void *)x_dep); + return x_dep; + } + } else { + dev_dbg(kbdev->dev, +- "No cross-slot dep to unblock for atom %p\n", ++ "No cross-slot dep to unblock for atom %pK\n", + (void *)katom); + } + +@@ -3317,13 +3354,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) + + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { +- dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", ++ dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", + (void *)katom); + return false; + } + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { +- dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", ++ dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", + (void *)katom); + return true; + } +@@ -3349,12 +3386,12 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) + * if it only depends on the tiler job chain. + */ + if (katom->x_pre_dep != rp->start_katom) { +- dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", ++ dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", + (void *)katom->x_pre_dep, (void *)rp->start_katom); + return true; + } + +- dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", ++ dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", + (void *)katom->x_pre_dep); + + return false; +@@ -3368,7 +3405,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; + int js; + +- dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", ++ dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", + __func__, (void *)kbdev, (unsigned int)js_mask); + + js_devdata = &kbdev->js_data; +@@ -3403,7 +3440,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + context_idle = true; + + dev_dbg(kbdev->dev, +- "kctx %p is not active (s:%d)\n", ++ "kctx %pK is not active (s:%d)\n", + (void *)kctx, js); + + if (kbase_pm_context_active_handle_suspend( +@@ -3412,7 +3449,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + dev_dbg(kbdev->dev, + "Suspend pending (s:%d)\n", js); + /* Suspend pending - return context to +- * queue and stop scheduling */ ++ * queue and stop scheduling ++ */ + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (kbase_js_ctx_list_add_pullable_head( +@@ -3432,7 +3470,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + &kctx->jctx.sched_info.ctx.jsctx_mutex); + + dev_dbg(kbdev->dev, +- "kctx %p cannot be used at this time\n", ++ "kctx %pK cannot be used at this time\n", + kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +@@ -3474,7 +3512,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + bool pullable; + + dev_dbg(kbdev->dev, +- "No atoms pulled from kctx %p (s:%d)\n", ++ "No atoms pulled from kctx %pK (s:%d)\n", + (void *)kctx, js); + + pullable = kbase_js_ctx_pullable(kctx, js, +@@ -3483,7 +3521,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + /* Failed to pull jobs - push to head of list. + * Unless this context is already 'active', in + * which case it's effectively already scheduled +- * so push it to the back of the list. */ ++ * so push it to the back of the list. ++ */ + if (pullable && kctx == last_active[js] && + kbase_ctx_flag(kctx, + (KCTX_PULLED_SINCE_ACTIVE_JS0 << +@@ -3508,7 +3547,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + * slot, then we need to remove the active + * marker to prevent it from submitting atoms in + * the IRQ handler, which would prevent this +- * context from making progress. */ ++ * context from making progress. ++ */ + if (last_active[js] && kctx != last_active[js] + && kbase_js_ctx_pullable( + last_active[js], js, true)) +@@ -3534,7 +3574,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + break; /* Could not run atoms on this slot */ + } + +- dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", ++ dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", + (void *)kctx); + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= +@@ -3556,7 +3596,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == last_active[js] && + ctx_waiting[js]) { +- dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", ++ dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", + (void *)last_active[js], js); + kbdev->hwaccess.active_kctx[js] = NULL; + } +@@ -3580,13 +3620,14 @@ void kbase_js_zap_context(struct kbase_context *kctx) + + /* First, atomically do the following: + * - mark the context as dying +- * - try to evict it from the queue */ ++ * - try to evict it from the queue ++ */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbase_ctx_flag_set(kctx, KCTX_DYING); + +- dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); ++ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx); + + /* + * At this point we know: +@@ -3650,13 +3691,14 @@ void kbase_js_zap_context(struct kbase_context *kctx) + + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + +- dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); + + /* Only cancel jobs when we evicted from the + * queue. No Power Manager active reference was held. + * +- * Having is_dying set ensures that this kills, and +- * doesn't requeue */ ++ * Having is_dying set ensures that this kills, and doesn't ++ * requeue ++ */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); +@@ -3667,9 +3709,10 @@ void kbase_js_zap_context(struct kbase_context *kctx) + bool was_retained; + + /* Case c: didn't evict, but it is scheduled - it's in the Run +- * Pool */ ++ * Pool ++ */ + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); +- dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); + + /* Disable the ctx from submitting any more jobs */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +@@ -3678,18 +3721,21 @@ void kbase_js_zap_context(struct kbase_context *kctx) + + /* Retain and (later) release the context whilst it is is now + * disallowed from submitting jobs - ensures that someone +- * somewhere will be removing the context later on */ ++ * somewhere will be removing the context later on ++ */ + was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); + + /* Since it's scheduled and we have the jsctx_mutex, it must be +- * retained successfully */ ++ * retained successfully ++ */ + KBASE_DEBUG_ASSERT(was_retained); + +- dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx); + + /* Cancel any remaining running jobs for this kctx - if any. + * Submit is disallowed which takes effect immediately, so no +- * more new jobs will appear after we do this. */ ++ * more new jobs will appear after we do this. ++ */ + kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +@@ -3697,7 +3743,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) + mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); + +- dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", ++ dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", + kctx); + + kbasep_js_runpool_release_ctx(kbdev, kctx); +@@ -3711,7 +3757,8 @@ void kbase_js_zap_context(struct kbase_context *kctx) + * to be destroyed, and the context to be de-scheduled (if it was on the + * runpool). + * +- * kbase_jd_zap_context() will do this. */ ++ * kbase_jd_zap_context() will do this. ++ */ + } + + static inline int trace_get_refcnt(struct kbase_device *kbdev, +@@ -3739,7 +3786,7 @@ static inline int trace_get_refcnt(struct kbase_device *kbdev, + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ + static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, +- kbasep_js_ctx_job_cb callback) ++ kbasep_js_ctx_job_cb *callback) + { + struct kbase_device *kbdev; + unsigned long flags; +@@ -3758,3 +3805,18 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } ++ ++base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority) ++{ ++ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; ++ int req_priority, out_priority; ++ base_jd_prio out_jd_priority = priority; ++ ++ if (pcm_device) { ++ req_priority = kbasep_js_atom_prio_to_sched_prio(priority); ++ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); ++ out_jd_priority = kbasep_js_sched_prio_to_atom_prio(out_priority); ++ } ++ return out_jd_priority; ++} ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h +index 541acd4..96974c8 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_js.h +- * Job Scheduler APIs. ++ * DOC: Job Scheduler APIs. + */ + + #ifndef _KBASE_JS_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +index 141d04a..7775648 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + #include + #include + +@@ -29,8 +27,11 @@ + */ + + /** +- * @brief Check whether a ctx has a certain attribute, and if so, retain that ++ * Check whether a ctx has a certain attribute, and if so, retain that + * attribute on the runpool. ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to check/retain + * + * Requires: + * - jsctx mutex +@@ -75,8 +76,11 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s + } + + /** +- * @brief Check whether a ctx has a certain attribute, and if so, release that ++ * Check whether a ctx has a certain attribute, and if so, release that + * attribute on the runpool. ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to release + * + * Requires: + * - jsctx mutex +@@ -120,8 +124,11 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, + } + + /** +- * @brief Retain a certain attribute on a ctx, also retaining it on the runpool ++ * Retain a certain attribute on a ctx, also retaining it on the runpool + * if the context is scheduled. ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to retain + * + * Requires: + * - jsctx mutex +@@ -156,9 +163,12 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc + return runpool_state_changed; + } + +-/* +- * @brief Release a certain attribute on a ctx, also releasing it from the runpool ++/** ++ * Release a certain attribute on a ctx, also releasing it from the runpool + * if the context is scheduled. ++ * @kbdev: Device pointer ++ * @kctx: KBase context ++ * @attribute: Atribute to release + * + * Requires: + * - jsctx mutex +@@ -211,7 +221,8 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb + + /* We don't need to know about state changed, because retaining a + * context occurs on scheduling it, and that itself will also try +- * to run new atoms */ ++ * to run new atoms ++ */ + CSTD_UNUSED(runpool_state_changed); + } + } +@@ -251,9 +262,9 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + +- /* We don't need to know about state changed, because retaining an +- * atom occurs on adding it, and that itself will also try to run +- * new atoms */ ++ /* We don't need to know about state changed, because retaining an atom ++ * occurs on adding it, and that itself will also try to run new atoms ++ */ + CSTD_UNUSED(runpool_state_changed); + } + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h +index 25fd397..6f29241 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,37 +17,19 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_js_ctx_attr.h +- * Job Scheduler Context Attribute APIs ++ * DOC: Job Scheduler Context Attribute APIs + */ + + #ifndef _KBASE_JS_CTX_ATTR_H_ + #define _KBASE_JS_CTX_ATTR_H_ + +-/** +- * @addtogroup base_api +- * @{ +- */ +- +-/** +- * @addtogroup base_kbase_api +- * @{ +- */ +- +-/** +- * @addtogroup kbase_js +- * @{ +- */ +- + /** + * Retain all attributes of a context ++ * @kbdev: KBase device ++ * @kctx: KBase context + * + * This occurs on scheduling in the context on the runpool (but after + * is_scheduled is set) +@@ -60,6 +43,8 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb + + /** + * Release all attributes of a context ++ * @kbdev: KBase device ++ * @kctx: KBase context + * + * This occurs on scheduling out the context from the runpool (but before + * is_scheduled is cleared) +@@ -79,6 +64,9 @@ bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct k + + /** + * Retain all attributes of an atom ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * @katom: Atom + * + * This occurs on adding an atom to a context + * +@@ -90,6 +78,9 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase + + /** + * Release all attributes of an atom, given its retained state. ++ * @kbdev: KBase device ++ * @kctx: KBase context ++ * @katom_retained_state: Retained state + * + * This occurs after (permanently) removing an atom from a context + * +@@ -107,7 +98,7 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase + */ + bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + +-/** ++/* + * Requires: + * - runpool_irq spinlock + */ +@@ -122,7 +113,7 @@ static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, + return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; + } + +-/** ++/* + * Requires: + * - runpool_irq spinlock + */ +@@ -132,7 +123,7 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kb + return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); + } + +-/** ++/* + * Requires: + * - jsctx mutex + */ +@@ -148,8 +139,4 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, + return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + } + +- /** @} *//* end group kbase_js */ +- /** @} *//* end group base_kbase_api */ +- /** @} *//* end group base_api */ +- + #endif /* _KBASE_JS_DEFS_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c +new file mode 100644 +index 0000000..1b23b41 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c +@@ -0,0 +1,894 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * mali_kbase_kinstr_jm.c ++ * Kernel driver public interface to job manager atom tracing ++ */ ++ ++#include "mali_kbase_kinstr_jm.h" ++#include ++ ++#include "mali_kbase.h" ++#include "mali_kbase_linux.h" ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE ++#include ++#else ++// Stringify the expression if no message is given. ++#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) ++#define __static_assert(e, msg, ...) _Static_assert(e, msg) ++#endif ++ ++#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE ++typedef unsigned int __poll_t; ++#endif ++ ++#ifndef ENOTSUP ++#define ENOTSUP EOPNOTSUPP ++#endif ++ ++/* The module printing prefix */ ++#define PR_ "mali_kbase_kinstr_jm: " ++ ++/* Allows us to perform ASM goto for the tracing ++ * https://www.kernel.org/doc/Documentation/static-keys.txt ++ */ ++DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key); ++ ++#define KBASE_KINSTR_JM_VERSION 2 ++ ++/** ++ * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing ++ * @readers: a bitlocked list of opened readers. Readers are attached to the ++ * private data of a file descriptor that the user opens with the ++ * KBASE_IOCTL_KINSTR_JM_FD IO control call. ++ * @refcount: reference count for the context. Any reader will have a link ++ * back to the context so that they can remove themselves from the ++ * list. ++ * ++ * This is opaque outside this compilation unit ++ */ ++struct kbase_kinstr_jm { ++ struct hlist_bl_head readers; ++ struct kref refcount; ++}; ++ ++/** ++ * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a ++ * new state ++ * @timestamp: Raw monotonic nanoseconds of the state change ++ * @state: The state that the atom has moved to ++ * @atom: The atom number that has changed state ++ * @flags: Flags associated with the state change. See ++ * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines. ++ * @reserved: Reserved for future use. ++ * @data: Extra data for the state change. Active member depends on state. ++ * @data.start: Extra data for the state change. Active member depends on ++ * state. ++ * @data.start.slot: Extra data for the state change. Active member depends on ++ * state. ++ * @data.padding: Padding ++ * ++ * We can add new fields to the structure and old user code will gracefully ++ * ignore the new fields. ++ * ++ * We can change the size of the structure and old user code will gracefully ++ * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`. ++ * ++ * If we remove fields, the version field in `struct ++ * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will ++ * gracefully fail and tell the user that the kernel API is too new and has ++ * backwards-incompatible changes. Note that one userspace can opt to handle ++ * multiple kernel major versions of the structure. ++ * ++ * If we need to change the _meaning_ of one of the fields, i.e. the state ++ * machine has had a incompatible change, we can keep the same members in the ++ * structure and update the version as above. User code will no longer ++ * recognise that it has the supported field and can gracefully explain to the ++ * user that the kernel API is no longer supported. ++ * ++ * When making changes to this structure, make sure they are either: ++ * - additions to the end (for minor version bumps (i.e. only a size increase)) ++ * such that the layout of existing fields doesn't change, or; ++ * - update the version reported to userspace so that it can fail explicitly. ++ */ ++struct kbase_kinstr_jm_atom_state_change { ++ u64 timestamp; ++ s8 state; /* enum kbase_kinstr_jm_reader_atom_state */ ++ u8 atom; ++ u8 flags; ++ u8 reserved[1]; ++ /* Tagged union based on state. Ensure members are aligned correctly! */ ++ union { ++ struct { ++ u8 slot; ++ } start; ++ u8 padding[4]; ++ } data; ++}; ++static_assert( ++ ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= ++ KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); ++ ++#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) ++ ++/** ++ * struct reader_changes - The circular buffer of kernel atom state changes ++ * @data: The allocated buffer. This is allocated when the user requests ++ * the reader file descriptor. It is released when the user calls ++ * close() on the fd. When accessing this, lock the producer spin ++ * lock to prevent races on the allocated memory. The consume lock ++ * does not need to be held because newly-inserted data will always ++ * be outside the currenly-read range. ++ * @producer: The producing spinlock which allows us to push changes into the ++ * buffer at the same time as a user read occurring. This needs to ++ * be locked when saving/restoring the IRQ because we can receive an ++ * interrupt from the GPU when an atom completes. The CPU could have ++ * a task preempted that is holding this lock. ++ * @consumer: The consuming mutex which locks around the user read(). ++ * Must be held when updating the tail of the circular buffer. ++ * @head: The head of the circular buffer. Can be used with Linux @c CIRC_ ++ * helpers. The producer should lock and update this with an SMP ++ * store when a new change lands. The consumer can read with an ++ * SMP load. This allows the producer to safely insert new changes ++ * into the circular buffer. ++ * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_ ++ * helpers. The producer should do a READ_ONCE load and the consumer ++ * should SMP store. ++ * @size: The number of changes that are allowed in @c data. Can be used ++ * with Linux @c CIRC_ helpers. Will always be a power of two. The ++ * producer lock should be held when updating this and stored with ++ * an SMP release memory barrier. This means that the consumer can ++ * do an SMP load. ++ * @threshold: The number of changes above which threads polling on the reader ++ * file descriptor will be woken up. ++ */ ++struct reader_changes { ++ struct kbase_kinstr_jm_atom_state_change *data; ++ spinlock_t producer; ++ struct mutex consumer; ++ u32 head; ++ u32 tail; ++ u32 size; ++ u32 threshold; ++}; ++ ++/** ++ * reader_changes_is_valid_size() - Determines if requested changes buffer size ++ * is valid. ++ * @size: The requested memory size ++ * ++ * We have a constraint that the underlying physical buffer must be a ++ * power of two so that we can use the efficient circular buffer helpers that ++ * the kernel provides. It also needs to be representable within a u32. ++ * ++ * Return: ++ * * true - the size is valid ++ * * false - the size is invalid ++ */ ++static inline bool reader_changes_is_valid_size(const size_t size) ++{ ++ typedef struct reader_changes changes_t; ++ const size_t elem_size = sizeof(*((changes_t *)0)->data); ++ const size_t size_size = sizeof(((changes_t *)0)->size); ++ const size_t size_max = (1ull << (size_size * 8)) - 1; ++ ++ return is_power_of_2(size) && /* Is a power of two */ ++ ((size / elem_size) <= size_max); /* Small enough */ ++} ++ ++/** ++ * reader_changes_init() - Initializes the reader changes and allocates the ++ * changes buffer ++ * @changes: The context pointer, must point to a zero-inited allocated reader ++ * changes structure. We may support allocating the structure in the ++ * future. ++ * @size: The requested changes buffer size ++ * ++ * Return: ++ * (0, U16_MAX] - the number of data elements allocated ++ * -EINVAL - a pointer was invalid ++ * -ENOTSUP - we do not support allocation of the context ++ * -ERANGE - the requested memory size was invalid ++ * -ENOMEM - could not allocate the memory ++ * -EADDRINUSE - the buffer memory was already allocated ++ */ ++static int reader_changes_init(struct reader_changes *const changes, ++ const size_t size) ++{ ++ BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); ++ ++ if (!reader_changes_is_valid_size(size)) { ++ pr_warn(PR_ "invalid size %zu\n", size); ++ return -ERANGE; ++ } ++ ++ changes->data = vmalloc(size); ++ if (!changes->data) ++ return -ENOMEM; ++ ++ spin_lock_init(&changes->producer); ++ mutex_init(&changes->consumer); ++ ++ changes->size = size / sizeof(*changes->data); ++ changes->threshold = min(((size_t)(changes->size)) / 4, ++ ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); ++ ++ return changes->size; ++} ++ ++/** ++ * reader_changes_term() - Cleans up a reader changes structure ++ * @changes: The context to clean up ++ * ++ * Releases the allocated state changes memory ++ */ ++static void reader_changes_term(struct reader_changes *const changes) ++{ ++ struct kbase_kinstr_jm_atom_state_change *data = NULL; ++ unsigned long irq; ++ ++ /* ++ * Although changes->data is used on the consumer side, too, no active ++ * consumer is possible by the time we clean up the reader changes, so ++ * no need to take the consumer lock. However, we do need the producer ++ * lock because the list removal can race with list traversal. ++ */ ++ spin_lock_irqsave(&changes->producer, irq); ++ swap(changes->data, data); ++ spin_unlock_irqrestore(&changes->producer, irq); ++ ++ mutex_destroy(&changes->consumer); ++ vfree(data); ++} ++ ++/** ++ * reader_changes_count_locked() - Retrieves the count of state changes from the ++ * tail to the physical end of the buffer ++ * @changes: The state changes context ++ * ++ * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to ++ * determine the count, so there may be more items. However, that's the maximum ++ * number that can be read in one contiguous read. ++ * ++ * Return: the number of changes in the circular buffer until the end of the ++ * allocation ++ */ ++static u32 reader_changes_count_locked(struct reader_changes *const changes) ++{ ++ u32 head; ++ ++ lockdep_assert_held_once(&changes->consumer); ++ ++ head = smp_load_acquire(&changes->head); ++ ++ return CIRC_CNT_TO_END(head, changes->tail, changes->size); ++} ++ ++/** ++ * reader_changes_count() - Retrieves the count of state changes from the ++ * tail to the physical end of the buffer ++ * @changes: The state changes context ++ * ++ * Return: the number of changes in the circular buffer until the end of the ++ * allocation ++ */ ++static u32 reader_changes_count(struct reader_changes *const changes) ++{ ++ u32 ret; ++ ++ mutex_lock(&changes->consumer); ++ ret = reader_changes_count_locked(changes); ++ mutex_unlock(&changes->consumer); ++ return ret; ++} ++ ++/** ++ * reader_changes_push() - Pushes a change into the reader circular buffer. ++ * @changes: The buffer to insert the change into ++ * @change: Kernel atom change to insert ++ * @wait_queue: The queue to be kicked when changes should be read from ++ * userspace. Kicked when a threshold is reached or there is ++ * overflow. ++ */ ++static void reader_changes_push( ++ struct reader_changes *const changes, ++ const struct kbase_kinstr_jm_atom_state_change *const change, ++ wait_queue_head_t *const wait_queue) ++{ ++ u32 head, tail, size, space; ++ unsigned long irq; ++ struct kbase_kinstr_jm_atom_state_change *data; ++ ++ spin_lock_irqsave(&changes->producer, irq); ++ ++ /* We may be called for a reader_changes that's awaiting cleanup. */ ++ data = changes->data; ++ if (!data) ++ goto unlock; ++ ++ size = changes->size; ++ head = changes->head; ++ tail = smp_load_acquire(&changes->tail); ++ ++ space = CIRC_SPACE(head, tail, size); ++ if (space >= 1) { ++ data[head] = *change; ++ if (space == 1) { ++ data[head].flags |= ++ KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; ++ pr_warn(PR_ "overflow of circular buffer\n"); ++ } ++ smp_store_release(&changes->head, (head + 1) & (size - 1)); ++ } ++ ++ /* Wake for either overflow or over-threshold cases. */ ++ if (CIRC_CNT(head + 1, tail, size) >= changes->threshold) ++ wake_up_interruptible(wait_queue); ++ ++unlock: ++ spin_unlock_irqrestore(&changes->producer, irq); ++} ++ ++/** ++ * struct reader - Allows the kernel state changes to be read by user space. ++ * @node: The node in the @c readers locked list ++ * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu) ++ * @changes: The circular buffer of user changes ++ * @wait_queue: A wait queue for poll ++ * @context: a pointer to the parent context that created this reader. Can be ++ * used to remove the reader from the list of readers. Reference ++ * counted. ++ * ++ * The reader is a circular buffer in kernel space. State changes are pushed ++ * into the buffer. The flow from user space is: ++ * ++ * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will ++ * allocate the kernel side circular buffer with a size specified in the ++ * ioctl argument. ++ * * The user will then poll the file descriptor for data ++ * * Upon receiving POLLIN, perform a read() on the file descriptor to get ++ * the data out. ++ * * The buffer memory will be freed when the file descriptor is closed ++ */ ++struct reader { ++ struct hlist_bl_node node; ++ struct rcu_head rcu_head; ++ struct reader_changes changes; ++ wait_queue_head_t wait_queue; ++ struct kbase_kinstr_jm *context; ++}; ++ ++static struct kbase_kinstr_jm * ++kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); ++static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); ++static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader); ++static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader); ++ ++/** ++ * reader_term() - Terminate a instrumentation job manager reader context. ++ * @reader: Pointer to context to be terminated. ++ */ ++static void reader_term(struct reader *const reader) ++{ ++ if (!reader) ++ return; ++ ++ kbase_kinstr_jm_readers_del(reader->context, reader); ++ reader_changes_term(&reader->changes); ++ kbase_kinstr_jm_ref_put(reader->context); ++ ++ kfree_rcu(reader, rcu_head); ++} ++ ++/** ++ * reader_init() - Initialise a instrumentation job manager reader context. ++ * @out_reader: Non-NULL pointer to where the pointer to the created context ++ * will be stored on success. ++ * @ctx: the pointer to the parent context. Reference count will be ++ * increased if initialization is successful ++ * @num_changes: The number of changes to allocate a buffer for ++ * ++ * Return: 0 on success, else error code. ++ */ ++static int reader_init(struct reader **const out_reader, ++ struct kbase_kinstr_jm *const ctx, ++ size_t const num_changes) ++{ ++ struct reader *reader = NULL; ++ const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); ++ int status; ++ ++ if (!out_reader || !ctx || !num_changes) ++ return -EINVAL; ++ ++ reader = kzalloc(sizeof(*reader), GFP_KERNEL); ++ if (!reader) ++ return -ENOMEM; ++ ++ INIT_HLIST_BL_NODE(&reader->node); ++ init_waitqueue_head(&reader->wait_queue); ++ ++ reader->context = kbase_kinstr_jm_ref_get(ctx); ++ ++ status = reader_changes_init(&reader->changes, num_changes * change_size); ++ if (status < 0) ++ goto fail; ++ ++ status = kbase_kinstr_jm_readers_add(ctx, reader); ++ if (status < 0) ++ goto fail; ++ ++ *out_reader = reader; ++ ++ return 0; ++ ++fail: ++ kbase_kinstr_jm_ref_put(reader->context); ++ kfree(reader); ++ return status; ++} ++ ++/** ++ * reader_release() - Invoked when the reader file descriptor is released ++ * @node: The inode that the file descriptor that the file corresponds to. In ++ * our case our reader file descriptor is backed by an anonymous node so ++ * not much is in this. ++ * @file: the file data. Our reader context is held in the private data ++ * Return: zero on success ++ */ ++static int reader_release(struct inode *const node, struct file *const file) ++{ ++ struct reader *const reader = file->private_data; ++ ++ reader_term(reader); ++ file->private_data = NULL; ++ ++ return 0; ++} ++ ++/** ++ * reader_changes_copy_to_user() - Copy any changes from a changes structure to ++ * the user-provided buffer. ++ * @changes: The changes structure from which to copy. ++ * @buffer: The user buffer to copy the data to. ++ * @buffer_size: The number of bytes in the buffer. ++ * Return: The number of bytes copied or negative errno on failure. ++ */ ++static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, ++ char __user *buffer, ++ size_t buffer_size) ++{ ++ ssize_t ret = 0; ++ struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( ++ changes->data); ++ size_t const entry_size = sizeof(*src_buf); ++ size_t changes_tail, changes_count, read_size; ++ ++ /* Needed for the quick buffer capacity calculation below. ++ * Note that we can't use is_power_of_2() since old compilers don't ++ * understand it's a constant expression. ++ */ ++#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) ++ static_assert(is_power_of_two( ++ sizeof(struct kbase_kinstr_jm_atom_state_change))); ++#undef is_power_of_two ++ ++ lockdep_assert_held_once(&changes->consumer); ++ ++ /* Read continuously until either: ++ * - we've filled the output buffer, or ++ * - there are no changes when we check. ++ * ++ * If more changes arrive while we're copying to the user, we can copy ++ * those as well, space permitting. ++ */ ++ do { ++ changes_tail = changes->tail; ++ changes_count = reader_changes_count_locked(changes); ++ read_size = min(changes_count * entry_size, ++ buffer_size & ~(entry_size - 1)); ++ ++ if (!read_size) ++ break; ++ ++ if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size)) ++ return -EFAULT; ++ ++ buffer += read_size; ++ buffer_size -= read_size; ++ ret += read_size; ++ changes_tail = (changes_tail + read_size / entry_size) & ++ (changes->size - 1); ++ smp_store_release(&changes->tail, changes_tail); ++ } while (read_size); ++ ++ return ret; ++} ++ ++/** ++ * reader_read() - Handles a read call on the reader file descriptor ++ * ++ * @filp: The file that the read was performed on ++ * @buffer: The destination buffer ++ * @buffer_size: The maximum number of bytes to read ++ * @offset: The offset into the 'file' to read from. ++ * ++ * Note the destination buffer needs to be fully mapped in userspace or the read ++ * will fault. ++ * ++ * Return: ++ * * The number of bytes read or: ++ * * -EBADF - the file descriptor did not have an attached reader ++ * * -EFAULT - memory access fault ++ * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there ++ * is no data available ++ * ++ * Note: The number of bytes read will always be a multiple of the size of an ++ * entry. ++ */ ++static ssize_t reader_read(struct file *const filp, ++ char __user *const buffer, ++ size_t const buffer_size, ++ loff_t *const offset) ++{ ++ struct reader *const reader = filp->private_data; ++ struct reader_changes *changes; ++ ssize_t ret; ++ ++ if (!reader) ++ return -EBADF; ++ ++ if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change)) ++ return -ENOBUFS; ++ ++#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE ++ if (!access_ok(buffer, buffer_size)) ++ return -EIO; ++#else ++ if (!access_ok(VERIFY_WRITE, buffer, buffer_size)) ++ return -EIO; ++#endif ++ ++ changes = &reader->changes; ++ ++ mutex_lock(&changes->consumer); ++ if (!reader_changes_count_locked(changes)) { ++ if (filp->f_flags & O_NONBLOCK) { ++ ret = -EAGAIN; ++ goto exit; ++ } ++ ++ if (wait_event_interruptible( ++ reader->wait_queue, ++ !!reader_changes_count_locked(changes))) { ++ ret = -EINTR; ++ goto exit; ++ } ++ } ++ ++ ret = reader_changes_copy_to_user(changes, buffer, buffer_size); ++ ++exit: ++ mutex_unlock(&changes->consumer); ++ return ret; ++} ++ ++/** ++ * reader_poll() - Handles a poll call on the reader file descriptor ++ * @file: The file that the poll was performed on ++ * @wait: The poll table ++ * ++ * The results of the poll will be unreliable if there is no mapped memory as ++ * there is no circular buffer to push atom state changes into. ++ * ++ * Return: ++ * * 0 - no data ready ++ * * POLLIN - state changes have been buffered ++ * * -EBADF - the file descriptor did not have an attached reader ++ * * -EINVAL - the IO control arguments were invalid ++ */ ++static __poll_t reader_poll(struct file *const file, ++ struct poll_table_struct *const wait) ++{ ++ struct reader *reader; ++ struct reader_changes *changes; ++ ++ if (unlikely(!file || !wait)) ++ return -EINVAL; ++ ++ reader = file->private_data; ++ if (unlikely(!reader)) ++ return -EBADF; ++ ++ changes = &reader->changes; ++ ++ if (reader_changes_count(changes) >= changes->threshold) ++ return POLLIN; ++ ++ poll_wait(file, &reader->wait_queue, wait); ++ ++ return (reader_changes_count(changes) > 0) ? POLLIN : 0; ++} ++ ++/* The file operations virtual function table */ ++static const struct file_operations file_operations = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .read = reader_read, ++ .poll = reader_poll, ++ .release = reader_release ++}; ++ ++/* The maximum amount of readers that can be created on a context. */ ++static const size_t kbase_kinstr_jm_readers_max = 16; ++ ++/** ++ * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped ++ * @ref: the context reference count ++ */ ++static void kbase_kinstr_jm_release(struct kref *const ref) ++{ ++ struct kbase_kinstr_jm *const ctx = ++ container_of(ref, struct kbase_kinstr_jm, refcount); ++ ++ kfree(ctx); ++} ++ ++/** ++ * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context ++ * @ctx: the context to reference count ++ * Return: the reference counted context ++ */ ++static struct kbase_kinstr_jm * ++kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) ++{ ++ if (likely(ctx)) ++ kref_get(&ctx->refcount); ++ return ctx; ++} ++ ++/** ++ * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context ++ * @ctx: the context to lower the reference count on ++ */ ++static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) ++{ ++ if (likely(ctx)) ++ kref_put(&ctx->refcount, kbase_kinstr_jm_release); ++} ++ ++/** ++ * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers ++ * @ctx: the instrumentation context ++ * @reader: the reader to add ++ * ++ * Return: ++ * 0 - success ++ * -ENOMEM - too many readers already added. ++ */ ++static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader) ++{ ++ struct hlist_bl_head *const readers = &ctx->readers; ++ struct hlist_bl_node *node; ++ struct reader *temp; ++ size_t count = 0; ++ ++ hlist_bl_lock(readers); ++ ++ hlist_bl_for_each_entry_rcu(temp, node, readers, node) ++ ++count; ++ ++ if (kbase_kinstr_jm_readers_max < count) { ++ hlist_bl_unlock(readers); ++ return -ENOMEM; ++ } ++ ++ hlist_bl_add_head_rcu(&reader->node, readers); ++ ++ hlist_bl_unlock(readers); ++ ++ static_branch_inc(&basep_kinstr_jm_reader_static_key); ++ ++ return 0; ++} ++ ++/** ++ * readers_del() - Deletes a reader from the list of readers ++ * @ctx: the instrumentation context ++ * @reader: the reader to delete ++ */ ++static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, ++ struct reader *const reader) ++{ ++ struct hlist_bl_head *const readers = &ctx->readers; ++ ++ hlist_bl_lock(readers); ++ hlist_bl_del_rcu(&reader->node); ++ hlist_bl_unlock(readers); ++ ++ static_branch_dec(&basep_kinstr_jm_reader_static_key); ++} ++ ++int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, ++ union kbase_kinstr_jm_fd *jm_fd_arg) ++{ ++ struct kbase_kinstr_jm_fd_in const *in; ++ struct reader *reader; ++ size_t const change_size = sizeof(struct ++ kbase_kinstr_jm_atom_state_change); ++ int status; ++ int fd; ++ int i; ++ ++ if (!ctx || !jm_fd_arg) ++ return -EINVAL; ++ ++ in = &jm_fd_arg->in; ++ ++ if (!is_power_of_2(in->count)) ++ return -EINVAL; ++ ++ for (i = 0; i < sizeof(in->padding); ++i) ++ if (in->padding[i]) ++ return -EINVAL; ++ ++ status = reader_init(&reader, ctx, in->count); ++ if (status < 0) ++ return status; ++ ++ jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION; ++ jm_fd_arg->out.size = change_size; ++ memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); ++ ++ fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, ++ O_CLOEXEC); ++ if (fd < 0) ++ reader_term(reader); ++ ++ return fd; ++} ++ ++int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx) ++{ ++ struct kbase_kinstr_jm *ctx = NULL; ++ ++ if (!out_ctx) ++ return -EINVAL; ++ ++ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); ++ if (!ctx) ++ return -ENOMEM; ++ ++ INIT_HLIST_BL_HEAD(&ctx->readers); ++ kref_init(&ctx->refcount); ++ ++ *out_ctx = ctx; ++ ++ return 0; ++} ++ ++void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) ++{ ++ kbase_kinstr_jm_ref_put(ctx); ++} ++ ++void kbasep_kinstr_jm_atom_state( ++ struct kbase_jd_atom *const katom, ++ const enum kbase_kinstr_jm_reader_atom_state state) ++{ ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; ++ const u8 id = kbase_jd_atom_id(kctx, katom); ++ struct kbase_kinstr_jm_atom_state_change change = { ++ .timestamp = ktime_get_raw_ns(), .atom = id, .state = state ++ }; ++ struct reader *reader; ++ struct hlist_bl_node *node; ++ ++ WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state, ++ PR_ "unsupported katom (%u) state (%i)", id, state); ++ ++ switch (state) { ++ case KBASE_KINSTR_JM_READER_ATOM_STATE_START: ++ change.data.start.slot = katom->slot_nr; ++ break; ++ default: ++ break; ++ } ++ ++ rcu_read_lock(); ++ hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) ++ reader_changes_push( ++ &reader->changes, &change, &reader->wait_queue); ++ rcu_read_unlock(); ++} ++ ++KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state); ++ ++void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom) ++{ ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ const int slot = katom->slot_nr; ++ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); ++ ++ BUILD_BUG_ON(SLOT_RB_SIZE != 2); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) ++ return; ++ if (WARN_ON(!submitted)) ++ return; ++ ++ if (submitted == katom) ++ kbase_kinstr_jm_atom_state_start(katom); ++} ++ ++void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom) ++{ ++ struct kbase_context *const kctx = katom->kctx; ++ struct kbase_device *const kbdev = kctx->kbdev; ++ const int slot = katom->slot_nr; ++ struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); ++ struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1); ++ ++ BUILD_BUG_ON(SLOT_RB_SIZE != 2); ++ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) ++ return; ++ if (WARN_ON(!submitted)) ++ return; ++ if (WARN_ON((submitted != katom) && (queued != katom))) ++ return; ++ ++ if (queued == katom) ++ return; ++ ++ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) ++ kbase_kinstr_jm_atom_state_stop(katom); ++ if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) ++ kbase_kinstr_jm_atom_state_start(queued); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h +new file mode 100644 +index 0000000..2c904e5 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h +@@ -0,0 +1,275 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * mali_kbase_kinstr_jm.h ++ * Kernel driver public interface to job manager atom tracing. This API provides ++ * a method to get the atom state changes into user space. ++ * ++ * The flow of operation is: ++ * ++ * | kernel | user | ++ * | ----------------------------------- | ----------------------------------- | ++ * | Initialize API with | | ++ * | kbase_kinstr_jm_init() | | ++ * | | | ++ * | Kernel code injects states with | | ++ * | kbase_kinstr_jm_atom_state_*() APIs | | ++ * | | Call ioctl() to get file descriptor | ++ * | | via KBASE_IOCTL_KINSTR_JM_FD | ++ * | Allocates a reader attached to FD | | ++ * | Allocates circular buffer and | | ++ * | patches, via ASM goto, the | | ++ * | kbase_kinstr_jm_atom_state_*() | | ++ * | | loop: | ++ * | | Call poll() on FD for POLLIN | ++ * | When threshold of changes is hit, | | ++ * | the poll is interrupted with | | ++ * | POLLIN. If circular buffer is | | ++ * | full then store the missed count | | ++ * | and interrupt poll | Call read() to get data from | ++ * | | circular buffer via the fd | ++ * | Kernel advances tail of circular | | ++ * | buffer | | ++ * | | Close file descriptor | ++ * | Deallocates circular buffer | | ++ * | | | ++ * | Terminate API with | | ++ * | kbase_kinstr_jm_term() | | ++ * ++ * All tracepoints are guarded on a static key. The static key is activated when ++ * a user space reader gets created. This means that there is negligible cost ++ * inserting the tracepoints into code when there are no readers. ++ */ ++ ++#ifndef _KBASE_KINSTR_JM_H_ ++#define _KBASE_KINSTR_JM_H_ ++ ++#include ++ ++#ifdef __KERNEL__ ++#include ++#include ++#else ++/* empty wrapper macros for userspace */ ++#define static_branch_unlikely(key) (1) ++#define KERNEL_VERSION(a, b, c) (0) ++#define LINUX_VERSION_CODE (1) ++#endif /* __KERNEL__ */ ++ ++/* Forward declarations */ ++struct kbase_context; ++struct kbase_kinstr_jm; ++struct kbase_jd_atom; ++union kbase_kinstr_jm_fd; ++ ++/** ++ * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context. ++ * @ctx: Non-NULL pointer to where the pointer to the created context will ++ * be stored on success. ++ * ++ * Return: 0 on success, else error code. ++ */ ++int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx); ++ ++/** ++ * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context. ++ * @ctx: Pointer to context to be terminated. ++ */ ++void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); ++ ++/** ++ * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to ++ * read the atom state changes from userspace ++ * ++ * @ctx: Pointer to the initialized context ++ * @jm_fd_arg: Pointer to the union containing the in/out params ++ * Return: -1 on failure, valid file descriptor on success ++ */ ++int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, ++ union kbase_kinstr_jm_fd *jm_fd_arg); ++ ++/** ++ * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state ++ * @atom: The atom that has changed state ++ * @state: The new state of the atom ++ * ++ * This performs the actual storage of the state ready for user space to ++ * read the data. It is only called when the static key is enabled from ++ * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this ++ * function directly. ++ */ ++void kbasep_kinstr_jm_atom_state( ++ struct kbase_jd_atom *const atom, ++ const enum kbase_kinstr_jm_reader_atom_state state); ++ ++/* Allows ASM goto patching to reduce tracing overhead. This is ++ * incremented/decremented when readers are created and terminated. This really ++ * shouldn't be changed externally, but if you do, make sure you use ++ * a static_key_inc()/static_key_dec() pair. ++ */ ++extern struct static_key_false basep_kinstr_jm_reader_static_key; ++ ++/** ++ * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state ++ * @atom: The atom that has changed state ++ * @state: The new state of the atom ++ * ++ * This uses a static key to reduce overhead when tracing is disabled ++ */ ++static inline void kbase_kinstr_jm_atom_state( ++ struct kbase_jd_atom *const atom, ++ const enum kbase_kinstr_jm_reader_atom_state state) ++{ ++ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) ++ kbasep_kinstr_jm_atom_state(atom, state); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a ++ * hardware or software queue. ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_queue( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an ++ * atom ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_start( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an ++ * atom ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_stop( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed ++ * on an atom ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_state_complete( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state( ++ atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for ++ * execution ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_queue(atom); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully ++ * completed ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_complete( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_complete(atom); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_sw_start() - A software atom has started work ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_sw_start( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_start(atom); ++} ++ ++/** ++ * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work ++ * @atom: The atom that has changed state ++ */ ++static inline void kbase_kinstr_jm_atom_sw_stop( ++ struct kbase_jd_atom *const atom) ++{ ++ kbase_kinstr_jm_atom_state_stop(atom); ++} ++ ++/** ++ * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted ++ * @atom: The atom that has been submitted ++ * ++ * This private implementation should not be called directly, it is protected ++ * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead. ++ */ ++void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); ++ ++/** ++ * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted ++ * @atom: The atom that has been submitted ++ */ ++static inline void kbase_kinstr_jm_atom_hw_submit( ++ struct kbase_jd_atom *const atom) ++{ ++ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) ++ kbasep_kinstr_jm_atom_hw_submit(atom); ++} ++ ++/** ++ * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released ++ * @atom: The atom that has been released ++ * ++ * This private implementation should not be called directly, it is protected ++ * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead. ++ */ ++void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); ++ ++/** ++ * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released ++ * @atom: The atom that has been released ++ */ ++static inline void kbase_kinstr_jm_atom_hw_release( ++ struct kbase_jd_atom *const atom) ++{ ++ if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) ++ kbasep_kinstr_jm_atom_hw_release(atom); ++} ++ ++#endif /* _KBASE_KINSTR_JM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h +index 003ac9e..1d8d196 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_linux.h +- * Base kernel APIs, Linux implementation. ++ * DOC: Base kernel APIs, Linux implementation. + */ + + #ifndef _KBASE_LINUX_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c +index 4a1004b..a68e4ea 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -29,7 +28,7 @@ + #include + #include + #include +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + #include + #endif + +@@ -43,6 +42,7 @@ + #include + #include + #include ++#include + + /* + * Alignment of objects allocated by the GPU inside a just-in-time memory +@@ -89,7 +89,7 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) + #error "Unknown CPU VA width for this architecture" + #endif + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; + #endif +@@ -98,27 +98,34 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) + } + + /* This function finds out which RB tree the given pfn from the GPU VA belongs +- * to based on the memory zone the pfn refers to */ ++ * to based on the memory zone the pfn refers to ++ */ + static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, + u64 gpu_pfn) + { + struct rb_root *rbtree = NULL; ++ struct kbase_reg_zone *exec_va_zone = ++ kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + + /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA + * zone if this has been initialized. + */ +- if (gpu_pfn >= kctx->exec_va_start) ++ if (gpu_pfn >= exec_va_zone->base_pfn) + rbtree = &kctx->reg_rbtree_exec; + else { + u64 same_va_end; + +-#ifdef CONFIG_64BIT +- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) ++#if IS_ENABLED(CONFIG_64BIT) ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { + #endif /* CONFIG_64BIT */ + same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; +-#ifdef CONFIG_64BIT +- else +- same_va_end = kctx->same_va_end; ++#if IS_ENABLED(CONFIG_64BIT) ++ } else { ++ struct kbase_reg_zone *same_va_zone = ++ kbase_ctx_reg_zone_get(kctx, ++ KBASE_REG_ZONE_SAME_VA); ++ same_va_end = kbase_reg_zone_end_pfn(same_va_zone); ++ } + #endif /* CONFIG_64BIT */ + + if (gpu_pfn >= same_va_end) +@@ -228,7 +235,7 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + +- KBASE_DEBUG_ASSERT(NULL != kctx); ++ KBASE_DEBUG_ASSERT(kctx != NULL); + + lockdep_assert_held(&kctx->reg_lock); + +@@ -288,7 +295,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( + struct rb_root *rbtree = NULL; + + /* Note that this search is a linear search, as we do not have a target +- address in mind, so does not benefit from the rbtree search */ ++ * address in mind, so does not benefit from the rbtree search ++ */ + rbtree = reg_reqs->rbtree; + + for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { +@@ -303,7 +311,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( + * (start_pfn + align_mask) & ~(align_mask) + * + * Otherwise, it aligns to n*align + offset, for the +- * lowest value n that makes this still >start_pfn */ ++ * lowest value n that makes this still >start_pfn ++ */ + start_pfn += align_mask; + start_pfn -= (start_pfn - align_offset) & (align_mask); + +@@ -341,7 +350,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( + } + + /** +- * @brief Remove a region object from the global list. ++ * Remove a region object from the global list. ++ * @reg: Region object to remove + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context +@@ -367,8 +377,9 @@ int kbase_remove_va_region(struct kbase_va_region *reg) + if (rbprev) { + prev = rb_entry(rbprev, struct kbase_va_region, rblink); + if (prev->flags & KBASE_REG_FREE) { +- /* We're compatible with the previous VMA, +- * merge with it */ ++ /* We're compatible with the previous VMA, merge with ++ * it ++ */ + WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != + (reg->flags & KBASE_REG_ZONE_MASK)); + prev->nr_pages += reg->nr_pages; +@@ -511,8 +522,8 @@ int kbase_add_va_region(struct kbase_context *kctx, + int gpu_pc_bits = + kbdev->gpu_props.props.core_props.log2_program_counter_size; + +- KBASE_DEBUG_ASSERT(NULL != kctx); +- KBASE_DEBUG_ASSERT(NULL != reg); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(reg != NULL); + + lockdep_assert_held(&kctx->reg_lock); + +@@ -614,13 +625,15 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + size_t align_offset = align; + size_t align_mask = align - 1; + ++#if !MALI_USE_CSF + if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { + WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + __func__, + (unsigned long)align); +- align_mask = reg->extent - 1; +- align_offset = reg->extent - reg->initial_commit; ++ align_mask = reg->extension - 1; ++ align_offset = reg->extension - reg->initial_commit; + } ++#endif /* !MALI_USE_CSF */ + + tmp = kbase_region_tracker_find_region_meeting_reqs(reg, + nr_pages, align_offset, align_mask, +@@ -643,7 +656,7 @@ exit: + return err; + } + +-/** ++/* + * @brief Initialize the internal region tracker data structure. + */ + static void kbase_region_tracker_ds_init(struct kbase_context *kctx, +@@ -698,6 +711,9 @@ void kbase_region_tracker_term(struct kbase_context *kctx) + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); ++#if MALI_USE_CSF ++ WARN_ON(!list_empty(&kctx->csf.event_pages_head)); ++#endif + kbase_gpu_vm_unlock(kctx); + } + +@@ -720,23 +736,26 @@ int kbase_region_tracker_init(struct kbase_context *kctx) + u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; + u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + u64 same_va_pages; ++ u64 same_va_base = 1u; + int err; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); + +- same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; ++ same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; + /* all have SAME_VA */ +- same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, +- same_va_pages, +- KBASE_REG_ZONE_SAME_VA); ++ same_va_reg = ++ kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base, ++ same_va_pages, KBASE_REG_ZONE_SAME_VA); + + if (!same_va_reg) { + err = -ENOMEM; + goto fail_unlock; + } ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, ++ same_va_pages); + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + /* 32-bit clients have custom VA zones */ + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { + #endif +@@ -760,19 +779,28 @@ int kbase_region_tracker_init(struct kbase_context *kctx) + err = -ENOMEM; + goto fail_free_same_va; + } +-#ifdef CONFIG_64BIT ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, ++ KBASE_REG_ZONE_CUSTOM_VA_BASE, ++ custom_va_size); ++#if IS_ENABLED(CONFIG_64BIT) + } else { + custom_va_size = 0; + } + #endif ++ /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is ++ * initially U64_MAX ++ */ ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); ++ /* Other zones are 0: kbase_create_context() uses vzalloc */ + + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); + +- kctx->same_va_end = same_va_pages + 1; +- kctx->gpu_va_end = kctx->same_va_end + custom_va_size; +- kctx->exec_va_start = U64_MAX; ++ kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; + kctx->jit_va = false; + ++#if MALI_USE_CSF ++ INIT_LIST_HEAD(&kctx->csf.event_pages_head); ++#endif + + kbase_gpu_vm_unlock(kctx); + return 0; +@@ -784,44 +812,147 @@ fail_unlock: + return err; + } + +-#ifdef CONFIG_64BIT ++static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) ++{ ++ struct kbase_reg_zone *exec_va_zone; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); ++ ++ return (exec_va_zone->base_pfn != U64_MAX); ++} ++ ++bool kbase_has_exec_va_zone(struct kbase_context *kctx) ++{ ++ bool has_exec_va_zone; ++ ++ kbase_gpu_vm_lock(kctx); ++ has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); ++ kbase_gpu_vm_unlock(kctx); ++ ++ return has_exec_va_zone; ++} ++ ++/** ++ * Determine if any allocations have been made on a context's region tracker ++ * @kctx: KBase context ++ * ++ * Check the context to determine if any allocations have been made yet from ++ * any of its zones. This check should be done before resizing a zone, e.g. to ++ * make space to add a second zone. ++ * ++ * Whilst a zone without allocations can be resized whilst other zones have ++ * allocations, we still check all of @kctx 's zones anyway: this is a stronger ++ * guarantee and should be adhered to when creating new zones anyway. ++ * ++ * Allocations from kbdev zones are not counted. ++ * ++ * Return: true if any allocs exist on any zone, false otherwise ++ */ ++static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) ++{ ++ unsigned int zone_idx; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { ++ struct kbase_reg_zone *zone; ++ struct kbase_va_region *reg; ++ u64 zone_base_addr; ++ unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); ++ unsigned long reg_zone; ++ ++ zone = kbase_ctx_reg_zone_get(kctx, zone_bits); ++ zone_base_addr = zone->base_pfn << PAGE_SHIFT; ++ ++ reg = kbase_region_tracker_find_region_base_address( ++ kctx, zone_base_addr); ++ ++ if (!zone->va_size_pages) { ++ WARN(reg, ++ "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", ++ (unsigned long long)zone_base_addr, zone_bits); ++ continue; ++ } ++ ++ if (WARN(!reg, ++ "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", ++ (unsigned long long)zone_base_addr, zone_bits)) ++ return true; /* Safest return value */ ++ ++ reg_zone = reg->flags & KBASE_REG_ZONE_MASK; ++ if (WARN(reg_zone != zone_bits, ++ "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", ++ (unsigned long long)zone_base_addr, zone_bits, ++ reg_zone)) ++ return true; /* Safest return value */ ++ ++ /* Unless the region is completely free, of the same size as ++ * the original zone, then it has allocs ++ */ ++ if ((!(reg->flags & KBASE_REG_FREE)) || ++ (reg->nr_pages != zone->va_size_pages)) ++ return true; ++ } ++ ++ /* All zones are the same size as originally made, so there are no ++ * allocs ++ */ ++ return false; ++} ++ ++#if IS_ENABLED(CONFIG_64BIT) + static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, + u64 jit_va_pages) + { +- struct kbase_va_region *same_va; ++ struct kbase_va_region *same_va_reg; ++ struct kbase_reg_zone *same_va_zone; ++ u64 same_va_zone_base_addr; ++ const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; + struct kbase_va_region *custom_va_reg; ++ u64 jit_va_start; + + lockdep_assert_held(&kctx->reg_lock); + +- /* First verify that a JIT_VA zone has not been created already. */ +- if (kctx->jit_va) +- return -EINVAL; +- + /* +- * Modify the same VA free region after creation. Be careful to ensure +- * that allocations haven't been made as they could cause an overlap +- * to happen with existing same VA allocations and the custom VA zone. ++ * Modify the same VA free region after creation. The caller has ++ * ensured that allocations haven't been made, as any allocations could ++ * cause an overlap to happen with existing same VA allocations and the ++ * custom VA zone. + */ +- same_va = kbase_region_tracker_find_region_base_address(kctx, +- PAGE_SIZE); +- if (!same_va) ++ same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); ++ same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; ++ ++ same_va_reg = kbase_region_tracker_find_region_base_address( ++ kctx, same_va_zone_base_addr); ++ if (WARN(!same_va_reg, ++ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", ++ (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) + return -ENOMEM; + +- if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) ++ /* kbase_region_tracker_has_allocs() in the caller has already ensured ++ * that all of the zones have no allocs, so no need to check that again ++ * on same_va_reg ++ */ ++ WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || ++ same_va_reg->nr_pages != same_va_zone->va_size_pages); ++ ++ if (same_va_reg->nr_pages < jit_va_pages || ++ same_va_zone->va_size_pages < jit_va_pages) + return -ENOMEM; + + /* It's safe to adjust the same VA zone now */ +- same_va->nr_pages -= jit_va_pages; +- kctx->same_va_end -= jit_va_pages; ++ same_va_reg->nr_pages -= jit_va_pages; ++ same_va_zone->va_size_pages -= jit_va_pages; ++ jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. + */ +- custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, +- kctx->same_va_end, +- jit_va_pages, +- KBASE_REG_ZONE_CUSTOM_VA); ++ custom_va_reg = ++ kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start, ++ jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); + + /* + * The context will be destroyed if we fail here so no point +@@ -829,6 +960,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, + */ + if (!custom_va_reg) + return -ENOMEM; ++ /* Since this is 64-bit, the custom zone will not have been ++ * initialized, so initialize it now ++ */ ++ kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, ++ jit_va_pages); + + kbase_region_tracker_insert(custom_va_reg); + return 0; +@@ -847,16 +983,34 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + +-#if MALI_JIT_PRESSURE_LIMIT + if (phys_pages_limit > jit_va_pages) +-#else +- if (phys_pages_limit != jit_va_pages) +-#endif /* MALI_JIT_PRESSURE_LIMIT */ + return -EINVAL; + ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (phys_pages_limit != jit_va_pages) ++ kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ + kbase_gpu_vm_lock(kctx); + +-#ifdef CONFIG_64BIT ++ /* Verify that a JIT_VA zone has not been created already. */ ++ if (kctx->jit_va) { ++ err = -EINVAL; ++ goto exit_unlock; ++ } ++ ++ /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no ++ * allocs, we can ensure there are no allocs anywhere. ++ * ++ * This check is also useful in 32-bit, just to make sure init of the ++ * zone is always done before any allocs. ++ */ ++ if (kbase_region_tracker_has_allocs(kctx)) { ++ err = -ENOMEM; ++ goto exit_unlock; ++ } ++ ++#if IS_ENABLED(CONFIG_64BIT) + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); + #endif +@@ -870,13 +1024,14 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + kctx->trim_level = trim_level; + kctx->jit_va = true; + kctx->jit_group_id = group_id; +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", + phys_pages_limit); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + } + ++exit_unlock: + kbase_gpu_vm_unlock(kctx); + + return err; +@@ -884,24 +1039,33 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + + int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) + { +- struct kbase_va_region *shrinking_va_reg; + struct kbase_va_region *exec_va_reg; +- u64 exec_va_start, exec_va_base_addr; ++ struct kbase_reg_zone *exec_va_zone; ++ struct kbase_reg_zone *target_zone; ++ struct kbase_va_region *target_reg; ++ u64 target_zone_base_addr; ++ unsigned long target_zone_bits; ++ u64 exec_va_start; + int err; + +- /* The EXEC_VA zone shall be created by making space at the end of the +- * address space. Firstly, verify that the number of EXEC_VA pages +- * requested by the client is reasonable and then make sure that it is +- * not greater than the address space itself before calculating the base +- * address of the new zone. ++ /* The EXEC_VA zone shall be created by making space either: ++ * - for 64-bit clients, at the end of the process's address space ++ * - for 32-bit clients, in the CUSTOM zone ++ * ++ * Firstly, verify that the number of EXEC_VA pages requested by the ++ * client is reasonable and then make sure that it is not greater than ++ * the address space itself before calculating the base address of the ++ * new zone. + */ + if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) + return -EINVAL; + + kbase_gpu_vm_lock(kctx); + +- /* First verify that a JIT_VA zone has not been created already. */ +- if (kctx->jit_va) { ++ /* Verify that we've not already created a EXEC_VA zone, and that the ++ * EXEC_VA zone must come before JIT's CUSTOM_VA. ++ */ ++ if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { + err = -EPERM; + goto exit_unlock; + } +@@ -911,28 +1075,50 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages + goto exit_unlock; + } + +- exec_va_start = kctx->gpu_va_end - exec_va_pages; +- exec_va_base_addr = exec_va_start << PAGE_SHIFT; +- +- shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, +- exec_va_base_addr); +- if (!shrinking_va_reg) { ++ /* Verify no allocations have already been made */ ++ if (kbase_region_tracker_has_allocs(kctx)) { + err = -ENOMEM; + goto exit_unlock; + } + +- /* Make sure that the EXEC_VA region is still uninitialized */ +- if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == +- KBASE_REG_ZONE_EXEC_VA) { +- err = -EPERM; ++#if IS_ENABLED(CONFIG_64BIT) ++ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { ++#endif ++ /* 32-bit client: take from CUSTOM_VA zone */ ++ target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; ++#if IS_ENABLED(CONFIG_64BIT) ++ } else { ++ /* 64-bit client: take from SAME_VA zone */ ++ target_zone_bits = KBASE_REG_ZONE_SAME_VA; ++ } ++#endif ++ target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); ++ target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; ++ ++ target_reg = kbase_region_tracker_find_region_base_address( ++ kctx, target_zone_base_addr); ++ if (WARN(!target_reg, ++ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", ++ (unsigned long long)target_zone_base_addr, target_zone_bits)) { ++ err = -ENOMEM; + goto exit_unlock; + } ++ /* kbase_region_tracker_has_allocs() above has already ensured that all ++ * of the zones have no allocs, so no need to check that again on ++ * target_reg ++ */ ++ WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || ++ target_reg->nr_pages != target_zone->va_size_pages); + +- if (shrinking_va_reg->nr_pages <= exec_va_pages) { ++ if (target_reg->nr_pages <= exec_va_pages || ++ target_zone->va_size_pages <= exec_va_pages) { + err = -ENOMEM; + goto exit_unlock; + } + ++ /* Taken from the end of the target zone */ ++ exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; ++ + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, + exec_va_start, + exec_va_pages, +@@ -941,13 +1127,17 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages + err = -ENOMEM; + goto exit_unlock; + } ++ /* Update EXEC_VA zone ++ * ++ * not using kbase_ctx_reg_zone_init() - it was already initialized ++ */ ++ exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); ++ exec_va_zone->base_pfn = exec_va_start; ++ exec_va_zone->va_size_pages = exec_va_pages; + +- shrinking_va_reg->nr_pages -= exec_va_pages; +-#ifdef CONFIG_64BIT +- if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) +- kctx->same_va_end -= exec_va_pages; +-#endif +- kctx->exec_va_start = exec_va_start; ++ /* Update target zone and corresponding region */ ++ target_reg->nr_pages -= exec_va_pages; ++ target_zone->va_size_pages -= exec_va_pages; + + kbase_region_tracker_insert(exec_va_reg); + err = 0; +@@ -957,12 +1147,40 @@ exit_unlock: + return err; + } + ++#if MALI_USE_CSF ++void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) ++{ ++ kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); ++} ++ ++int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) ++{ ++ struct kbase_va_region *shared_reg; ++ u64 shared_reg_start_pfn; ++ u64 shared_reg_size; ++ ++ shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; ++ shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; ++ ++ kbdev->csf.shared_reg_rbtree = RB_ROOT; ++ ++ shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, ++ shared_reg_start_pfn, ++ shared_reg_size, ++ KBASE_REG_ZONE_MCU_SHARED); ++ if (!shared_reg) ++ return -ENOMEM; ++ ++ kbase_region_tracker_insert(shared_reg); ++ return 0; ++} ++#endif + + int kbase_mem_init(struct kbase_device *kbdev) + { + int err = 0; + struct kbasep_mem_device *memdev; +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + struct device_node *mgm_node = NULL; + #endif + +@@ -976,6 +1194,12 @@ int kbase_mem_init(struct kbase_device *kbdev) + /* Initialize memory usage */ + atomic_set(&memdev->used_pages, 0); + ++ spin_lock_init(&kbdev->gpu_mem_usage_lock); ++ kbdev->total_gpu_pages = 0; ++ kbdev->process_root = RB_ROOT; ++ kbdev->dma_buf_root = RB_ROOT; ++ mutex_init(&kbdev->dma_buf_lock); ++ + #ifdef IR_THRESHOLD + atomic_set(&memdev->ir_threshold, IR_THRESHOLD); + #else +@@ -984,7 +1208,7 @@ int kbase_mem_init(struct kbase_device *kbdev) + + kbdev->mgm_dev = &kbase_native_mgm_dev; + +-#ifdef CONFIG_OF ++#if IS_ENABLED(CONFIG_OF) + /* Check to see whether or not a platform-specific memory group manager + * is configured and available. + */ +@@ -1053,13 +1277,22 @@ void kbase_mem_term(struct kbase_device *kbdev) + + kbase_mem_pool_group_term(&kbdev->mem_pools); + ++ WARN_ON(kbdev->total_gpu_pages); ++ WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); ++ WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); ++ mutex_destroy(&kbdev->dma_buf_lock); ++ + if (kbdev->mgm_dev) + module_put(kbdev->mgm_dev->owner); + } + KBASE_EXPORT_TEST_API(kbase_mem_term); + + /** +- * @brief Allocate a free region object. ++ * Allocate a free region object. ++ * @rbtree: Backlink to the red-black tree of memory regions. ++ * @start_pfn: The Page Frame Number in GPU virtual address space. ++ * @nr_pages: The size of the region in pages. ++ * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA + * + * The allocated object is not part of any list yet, and is flagged as + * KBASE_REG_FREE. No mapping is allocated yet. +@@ -1132,7 +1365,8 @@ static struct kbase_context *kbase_reg_flags_to_kctx( + } + + /** +- * @brief Free a region object. ++ * Free a region object. ++ * @reg: Region + * + * The described region must be freed of any mapping. + * +@@ -1143,6 +1377,13 @@ static struct kbase_context *kbase_reg_flags_to_kctx( + */ + void kbase_free_alloced_region(struct kbase_va_region *reg) + { ++#if MALI_USE_CSF ++ if ((reg->flags & KBASE_REG_ZONE_MASK) == ++ KBASE_REG_ZONE_MCU_SHARED) { ++ kfree(reg); ++ return; ++ } ++#endif + if (!(reg->flags & KBASE_REG_FREE)) { + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + +@@ -1152,8 +1393,12 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) + if (WARN_ON(kbase_is_region_invalid(reg))) + return; + +- dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", ++ dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", + (void *)reg); ++#if MALI_USE_CSF ++ if (reg->flags & KBASE_REG_CSF_EVENT) ++ kbase_unlink_event_mem_page(kctx, reg); ++#endif + + mutex_lock(&kctx->jit_evict_lock); + +@@ -1233,8 +1478,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 + else + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + +- KBASE_DEBUG_ASSERT(NULL != kctx); +- KBASE_DEBUG_ASSERT(NULL != reg); ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(reg != NULL); + + err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); + if (err) +@@ -1260,7 +1505,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 + if (err) + goto bad_insert; + +- kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); ++ /* Note: mapping count is tracked at alias ++ * creation time ++ */ + } else { + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + i * stride, +@@ -1319,13 +1566,6 @@ bad_insert: + reg->start_pfn, reg->nr_pages, + kctx->as_nr); + +- if (alloc->type == KBASE_MEM_TYPE_ALIAS) { +- KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); +- while (i--) +- if (alloc->imported.alias.aliased[i].alloc) +- kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc); +- } +- + kbase_remove_va_region(reg); + + return err; +@@ -1339,7 +1579,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) + { + int err = 0; +- size_t i; + + if (reg->start_pfn == 0) + return 0; +@@ -1364,10 +1603,9 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) + /* Update tracking, and other cleanup, depending on memory type. */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_ALIAS: +- KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); +- for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) +- if (reg->gpu_alloc->imported.alias.aliased[i].alloc) +- kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); ++ /* We mark the source allocs as unmapped from the GPU when ++ * putting reg's allocs ++ */ + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + struct kbase_alloc_import_user_buf *user_buf = +@@ -1404,7 +1642,7 @@ static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( + unsigned long map_start; + size_t map_size; + +- lockdep_assert_held(¤t->mm->mmap_sem); ++ lockdep_assert_held(kbase_mem_get_process_mmap_lock()); + + if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ + return NULL; +@@ -1676,9 +1914,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re + { + int err; + +- KBASE_DEBUG_ASSERT(NULL != kctx); +- KBASE_DEBUG_ASSERT(NULL != reg); +- dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", ++ KBASE_DEBUG_ASSERT(kctx != NULL); ++ KBASE_DEBUG_ASSERT(reg != NULL); ++ dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", + __func__, (void *)reg, (void *)kctx); + lockdep_assert_held(&kctx->reg_lock); + +@@ -1724,7 +1962,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re + KBASE_EXPORT_TEST_API(kbase_mem_free_region); + + /** +- * @brief Free the region from the GPU and unregister it. ++ * Free the region from the GPU and unregister it. ++ * @kctx: KBase context ++ * @gpu_addr: GPU address to free + * + * This function implements the free operation on a memory segment. + * It will loudly fail if called with outstanding mappings. +@@ -1735,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); +- dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", ++ dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", + __func__, gpu_addr, (void *)kctx); + + if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { +@@ -1743,7 +1983,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) + return -EINVAL; + } + +- if (0 == gpu_addr) { ++ if (gpu_addr == 0) { + dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); + return -EINVAL; + } +@@ -1796,7 +2036,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_free); + int kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags) + { +- KBASE_DEBUG_ASSERT(NULL != reg); ++ KBASE_DEBUG_ASSERT(reg != NULL); + KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + + reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); +@@ -1835,9 +2075,25 @@ int kbase_update_region_flags(struct kbase_context *kctx, + reg->flags |= KBASE_REG_SHARE_IN; + } + ++#if !MALI_USE_CSF + if (flags & BASE_MEM_TILER_ALIGN_TOP) + reg->flags |= KBASE_REG_TILER_ALIGN_TOP; ++#endif /* !MALI_USE_CSF */ ++ ++#if MALI_USE_CSF ++ if (flags & BASE_MEM_CSF_EVENT) { ++ reg->flags |= KBASE_REG_CSF_EVENT; ++ reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + ++ if (!(reg->flags & KBASE_REG_SHARE_BOTH)) { ++ /* On non coherent platforms need to map as uncached on ++ * both sides. ++ */ ++ reg->flags &= ~KBASE_REG_CPU_CACHED; ++ reg->flags &= ~KBASE_REG_GPU_CACHED; ++ } ++ } ++#endif + + /* Set up default MEMATTR usage */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { +@@ -1851,6 +2107,13 @@ int kbase_update_region_flags(struct kbase_context *kctx, + "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); + return -EINVAL; + } ++#if MALI_USE_CSF ++ } else if (reg->flags & KBASE_REG_CSF_EVENT) { ++ WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH)); ++ ++ reg->flags |= ++ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); ++#endif + } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && + (reg->flags & KBASE_REG_SHARE_BOTH)) { + reg->flags |= +@@ -1905,7 +2168,8 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this +- * allocation is visible to the OOM killer */ ++ * allocation is visible to the OOM killer ++ */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; +@@ -2033,6 +2297,9 @@ no_new_partial: + (u64)new_page_count); + + alloc->nents += nr_pages_requested; ++ ++ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); ++ + done: + return 0; + +@@ -2209,6 +2476,9 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + (u64)new_page_count); + + alloc->nents += nr_pages_requested; ++ ++ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); ++ + done: + return new_pages; + +@@ -2303,7 +2573,7 @@ int kbase_free_phy_pages_helper( + } + + /* early out if nothing to do */ +- if (0 == nr_pages_to_free) ++ if (nr_pages_to_free == 0) + return 0; + + start_free = alloc->pages + alloc->nents - nr_pages_to_free; +@@ -2374,6 +2644,8 @@ int kbase_free_phy_pages_helper( + kbdev, + kctx->id, + (u64)new_page_count); ++ ++ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); + } + + return 0; +@@ -2496,9 +2768,19 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + kbdev, + kctx->id, + (u64)new_page_count); ++ ++ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); + } + } ++KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); + ++#if MALI_USE_CSF ++/** ++ * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. ++ * @alloc: The allocation for the imported user buffer. ++ */ ++static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); ++#endif + + void kbase_mem_kref_free(struct kref *kref) + { +@@ -2540,8 +2822,10 @@ void kbase_mem_kref_free(struct kref *kref) + aliased = alloc->imported.alias.aliased; + if (aliased) { + for (i = 0; i < alloc->imported.alias.nents; i++) +- if (aliased[i].alloc) ++ if (aliased[i].alloc) { ++ kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc); + kbase_mem_phy_alloc_put(aliased[i].alloc); ++ } + vfree(aliased); + } + break; +@@ -2558,12 +2842,17 @@ void kbase_mem_kref_free(struct kref *kref) + alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, + DMA_BIDIRECTIONAL); ++ kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, ++ alloc); + } + dma_buf_detach(alloc->imported.umm.dma_buf, + alloc->imported.umm.dma_attachment); + dma_buf_put(alloc->imported.umm.dma_buf); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: ++#if MALI_USE_CSF ++ kbase_jd_user_buf_unpin_pages(alloc); ++#endif + if (alloc->imported.user_buf.mm) + mmdrop(alloc->imported.user_buf.mm); + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) +@@ -2587,7 +2876,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + + int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) + { +- KBASE_DEBUG_ASSERT(NULL != reg); ++ KBASE_DEBUG_ASSERT(reg != NULL); + KBASE_DEBUG_ASSERT(vsize > 0); + + /* validate user provided arguments */ +@@ -2600,7 +2889,7 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size + if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) + goto out_term; + +- KBASE_DEBUG_ASSERT(0 != vsize); ++ KBASE_DEBUG_ASSERT(vsize != 0); + + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) + goto out_term; +@@ -2643,22 +2932,37 @@ bool kbase_check_alloc_flags(unsigned long flags) + /* GPU executable memory cannot: + * - Be written by the GPU + * - Be grown on GPU page fault +- * - Have the top of its initial commit aligned to 'extent' */ ++ */ ++ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & ++ (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) ++ return false; ++ ++#if !MALI_USE_CSF ++ /* GPU executable memory also cannot have the top of its initial ++ * commit aligned to 'extension' ++ */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & +- (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | +- BASE_MEM_TILER_ALIGN_TOP))) ++ BASE_MEM_TILER_ALIGN_TOP)) + return false; ++#endif /* !MALI_USE_CSF */ + + /* To have an allocation lie within a 4GB chunk is required only for +- * TLS memory, which will never be used to contain executable code +- * and also used for Tiler heap. ++ * TLS memory, which will never be used to contain executable code. + */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & +- (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) ++ BASE_MEM_PROT_GPU_EX)) + return false; + ++#if !MALI_USE_CSF ++ /* TLS memory should also not be used for tiler heap */ ++ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & ++ BASE_MEM_TILER_ALIGN_TOP)) ++ return false; ++#endif /* !MALI_USE_CSF */ ++ + /* GPU should have at least read or write access otherwise there is no +- reason for allocating. */ ++ * reason for allocating. ++ */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + +@@ -2666,14 +2970,15 @@ bool kbase_check_alloc_flags(unsigned long flags) + if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) + return false; + +- /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported +- * memory */ ++ /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory ++ */ + if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == + BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return false; + + /* Should not combine BASE_MEM_COHERENT_LOCAL with +- * BASE_MEM_COHERENT_SYSTEM */ ++ * BASE_MEM_COHERENT_SYSTEM ++ */ + if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == + (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) + return false; +@@ -2699,12 +3004,15 @@ bool kbase_check_import_flags(unsigned long flags) + if (flags & BASE_MEM_GROW_ON_GPF) + return false; + ++#if !MALI_USE_CSF + /* Imported memory cannot be aligned to the end of its initial commit */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) + return false; ++#endif /* !MALI_USE_CSF */ + + /* GPU should have at least read or write access otherwise there is no +- reason for importing. */ ++ * reason for importing. ++ */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + +@@ -2716,19 +3024,19 @@ bool kbase_check_import_flags(unsigned long flags) + } + + int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, +- u64 va_pages, u64 commit_pages, u64 large_extent) ++ u64 va_pages, u64 commit_pages, u64 large_extension) + { + struct device *dev = kctx->kbdev->dev; + int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; + struct kbase_va_region test_reg; + +- /* kbase_va_region's extent member can be of variable size, so check against that type */ +- test_reg.extent = large_extent; ++ /* kbase_va_region's extension member can be of variable size, so check against that type */ ++ test_reg.extension = large_extension; + + #define KBASE_MSG_PRE "GPU allocation attempted with " + +- if (0 == va_pages) { ++ if (va_pages == 0) { + dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); + return -EINVAL; + } +@@ -2740,7 +3048,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + } + + /* Note: commit_pages is checked against va_pages during +- * kbase_alloc_phy_pages() */ ++ * kbase_alloc_phy_pages() ++ */ + + /* Limit GPU executable allocs to GPU PC size */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { +@@ -2751,47 +3060,73 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + return -EINVAL; + } + +- if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && +- test_reg.extent == 0) { +- dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); ++ if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) { ++ dev_warn(dev, KBASE_MSG_PRE ++ "BASE_MEM_GROW_ON_GPF but extension == 0\n"); ++ return -EINVAL; ++ } ++ ++#if !MALI_USE_CSF ++ if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) { ++ dev_warn(dev, KBASE_MSG_PRE ++ "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); + return -EINVAL; + } + + if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && +- test_reg.extent != 0) { +- dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); ++ test_reg.extension != 0) { ++ dev_warn( ++ dev, KBASE_MSG_PRE ++ "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n"); ++ return -EINVAL; ++ } ++#else ++ if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) { ++ dev_warn(dev, KBASE_MSG_PRE ++ "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); + return -EINVAL; + } ++#endif /* !MALI_USE_CSF */ + ++#if !MALI_USE_CSF + /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) { + #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " +- unsigned long small_extent; +- +- if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { +- dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", +- (unsigned long long)large_extent, +- BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); ++ unsigned long small_extension; ++ ++ if (large_extension > ++ BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { ++ dev_warn(dev, ++ KBASE_MSG_PRE_FLAG ++ "extension==%lld pages exceeds limit %lld", ++ (unsigned long long)large_extension, ++ BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES); + return -EINVAL; + } + /* For use with is_power_of_2, which takes unsigned long, so +- * must ensure e.g. on 32-bit kernel it'll fit in that type */ +- small_extent = (unsigned long)large_extent; ++ * must ensure e.g. on 32-bit kernel it'll fit in that type ++ */ ++ small_extension = (unsigned long)large_extension; + +- if (!is_power_of_2(small_extent)) { +- dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", +- small_extent); ++ if (!is_power_of_2(small_extension)) { ++ dev_warn(dev, ++ KBASE_MSG_PRE_FLAG ++ "extension==%ld not a non-zero power of 2", ++ small_extension); + return -EINVAL; + } + +- if (commit_pages > large_extent) { +- dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", +- (unsigned long)commit_pages, +- (unsigned long)large_extent); ++ if (commit_pages > large_extension) { ++ dev_warn(dev, ++ KBASE_MSG_PRE_FLAG ++ "commit_pages==%ld exceeds extension==%ld", ++ (unsigned long)commit_pages, ++ (unsigned long)large_extension); + return -EINVAL; + } + #undef KBASE_MSG_PRE_FLAG + } ++#endif /* !MALI_USE_CSF */ + + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && + (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { +@@ -2805,7 +3140,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + } + + /** +- * @brief Acquire the per-context region list lock ++ * Acquire the per-context region list lock ++ * @kctx: KBase context + */ + void kbase_gpu_vm_lock(struct kbase_context *kctx) + { +@@ -2816,7 +3152,8 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) + KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); + + /** +- * @brief Release the per-context region list lock ++ * Release the per-context region list lock ++ * @kctx: KBase context + */ + void kbase_gpu_vm_unlock(struct kbase_context *kctx) + { +@@ -2826,7 +3163,7 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) + + KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_jit_debugfs_data { + int (*func)(struct kbase_jit_debugfs_data *); + struct mutex lock; +@@ -2879,7 +3216,7 @@ static ssize_t kbase_jit_debugfs_common_read(struct file *file, + } + + size = scnprintf(data->buffer, sizeof(data->buffer), +- "%llu,%llu,%llu", data->active_value, ++ "%llu,%llu,%llu\n", data->active_value, + data->pool_value, data->destroy_value); + } + +@@ -2983,19 +3320,23 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) + KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, + kbase_jit_debugfs_phys_get); + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) + { + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + ++#if !MALI_USE_CSF + mutex_lock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->used_pages; + } + mutex_unlock(&kctx->jit_evict_lock); ++#if !MALI_USE_CSF + mutex_unlock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + + return 0; + } +@@ -3012,7 +3353,9 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + ++#if !MALI_USE_CSF + mutex_lock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { +@@ -3031,14 +3374,16 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); ++#if !MALI_USE_CSF + mutex_unlock(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + + return 0; + } + + KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, + kbase_jit_debugfs_trim_get); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + void kbase_jit_debugfs_init(struct kbase_context *kctx) + { +@@ -3078,7 +3423,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) + */ + debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_phys_fops); +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /* + * Debugfs entry for getting the number of pages used + * by JIT allocations for estimating the physical pressure +@@ -3093,7 +3438,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) + */ + debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_trim_fops); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + } + #endif /* CONFIG_DEBUG_FS */ + +@@ -3138,8 +3483,13 @@ int kbase_jit_init(struct kbase_context *kctx) + INIT_LIST_HEAD(&kctx->jit_destroy_head); + INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + ++#if MALI_USE_CSF ++ INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); ++ INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); ++#else /* !MALI_USE_CSF */ + INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); + INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); ++#endif /* MALI_USE_CSF */ + mutex_unlock(&kctx->jit_evict_lock); + + kctx->jit_max_allocations = 0; +@@ -3153,25 +3503,29 @@ int kbase_jit_init(struct kbase_context *kctx) + * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets + * the alignment requirements. + */ +-static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, +- struct kbase_va_region *walker, const struct base_jit_alloc_info *info) ++static bool meet_size_and_tiler_align_top_requirements( ++ const struct kbase_va_region *walker, ++ const struct base_jit_alloc_info *info) + { + bool meet_reqs = true; + + if (walker->nr_pages != info->va_pages) + meet_reqs = false; +- else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { +- size_t align = info->extent; ++ ++#if !MALI_USE_CSF ++ if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { ++ size_t align = info->extension; + size_t align_mask = align - 1; + + if ((walker->start_pfn + info->commit_pages) & align_mask) + meet_reqs = false; + } ++#endif /* !MALI_USE_CSF */ + + return meet_reqs; + } + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Function will guarantee *@freed will not exceed @pages_needed + */ + static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, +@@ -3185,7 +3539,9 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + size_t to_free = 0u; + size_t max_allowed_pages = old_pages; + ++#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + + /* Is this a JIT allocation that has been reported on? */ +@@ -3213,20 +3569,20 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* The GPU could report being ready to write to the next +- * 'extent' sized chunk, but didn't actually write to it, so we +- * can report up to 'extent' size pages more than the backed ++ * 'extension' sized chunk, but didn't actually write to it, so we ++ * can report up to 'extension' size pages more than the backed + * size. + * + * Note, this is allowed to exceed reg->nr_pages. + */ +- max_allowed_pages += reg->extent; ++ max_allowed_pages += reg->extension; + + /* Also note that in these GPUs, the GPU may make a large (>1 + * page) initial allocation but not actually write out to all + * of it. Hence it might report that a much higher amount of + * memory was used than actually was written to. This does not + * result in a real warning because on growing this memory we +- * round up the size of the allocation up to an 'extent' sized ++ * round up the size of the allocation up to an 'extension' sized + * chunk, hence automatically bringing the backed size up to + * the reported size. + */ +@@ -3308,8 +3664,12 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, *tmp; + size_t total_freed = 0; + +- kbase_gpu_vm_lock(kctx); +- mutex_lock(&kctx->jit_evict_lock); ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->jit_evict_lock); ++ + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; +@@ -3328,18 +3688,17 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + if (!pages_needed) + break; + } +- mutex_unlock(&kctx->jit_evict_lock); +- kbase_gpu_vm_unlock(kctx); + + trace_mali_jit_trim(total_freed); + + return total_freed; + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + static int kbase_jit_grow(struct kbase_context *kctx, +- const struct base_jit_alloc_info *info, +- struct kbase_va_region *reg) ++ const struct base_jit_alloc_info *info, ++ struct kbase_va_region *reg, ++ struct kbase_sub_alloc **prealloc_sas) + { + size_t delta; + size_t pages_required; +@@ -3347,15 +3706,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, + struct kbase_mem_pool *pool; + int ret = -ENOMEM; + struct tagged_addr *gpu_pages; +- struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; +- int i; + + if (info->commit_pages > reg->nr_pages) { + /* Attempted to grow larger than maximum size */ + return -EINVAL; + } + +- kbase_gpu_vm_lock(kctx); ++ lockdep_assert_held(&kctx->reg_lock); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) +@@ -3372,14 +3729,6 @@ static int kbase_jit_grow(struct kbase_context *kctx, + pages_required = delta; + + #ifdef CONFIG_MALI_2MB_ALLOC +- /* Preallocate memory for the sub-allocation structs */ +- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { +- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), +- GFP_KERNEL); +- if (!prealloc_sas[i]) +- goto update_failed; +- } +- + if (pages_required >= (SZ_2M / SZ_4K)) { + pool = &kctx->mem_pools.large[kctx->jit_group_id]; + /* Round up to number of 2 MB pages required */ +@@ -3405,15 +3754,18 @@ static int kbase_jit_grow(struct kbase_context *kctx, + */ + while (kbase_mem_pool_size(pool) < pages_required) { + int pool_delta = pages_required - kbase_mem_pool_size(pool); ++ int ret; + + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); ++ + kbase_gpu_vm_unlock(kctx); ++ ret = kbase_mem_pool_grow(pool, pool_delta); ++ kbase_gpu_vm_lock(kctx); + +- if (kbase_mem_pool_grow(pool, pool_delta)) +- goto update_failed_unlocked; ++ if (ret) ++ goto update_failed; + +- kbase_gpu_vm_lock(kctx); + spin_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + } +@@ -3456,14 +3808,9 @@ done: + + /* Update attributes of JIT allocation taken from the pool */ + reg->initial_commit = info->commit_pages; +- reg->extent = info->extent; ++ reg->extension = info->extension; + + update_failed: +- kbase_gpu_vm_unlock(kctx); +-update_failed_unlocked: +- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) +- kfree(prealloc_sas[i]); +- + return ret; + } + +@@ -3492,9 +3839,9 @@ static void trace_jit_stats(struct kbase_context *kctx, + max_allocations, alloc_count, va_pages, ph_pages); + } + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /** +- * get_jit_backed_pressure() - calculate the physical backing of all JIT ++ * get_jit_phys_backing() - calculate the physical backing of all JIT + * allocations + * + * @kctx: Pointer to the kbase context whose active JIT allocations will be +@@ -3502,83 +3849,50 @@ static void trace_jit_stats(struct kbase_context *kctx, + * + * Return: number of pages that are committed by JIT allocations + */ +-static size_t get_jit_backed_pressure(struct kbase_context *kctx) ++static size_t get_jit_phys_backing(struct kbase_context *kctx) + { +- size_t backed_pressure = 0; +- int jit_id; +- +- lockdep_assert_held(&kctx->jctx.lock); ++ struct kbase_va_region *walker; ++ size_t backing = 0; + +- kbase_gpu_vm_lock(kctx); +- for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) { +- struct kbase_va_region *reg = kctx->jit_alloc[jit_id]; ++ lockdep_assert_held(&kctx->jit_evict_lock); + +- if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) { +- /* If region has no report, be pessimistic */ +- if (reg->used_pages == reg->nr_pages) { +- backed_pressure += reg->nr_pages; +- } else { +- backed_pressure += +- kbase_reg_current_backed_size(reg); +- } +- } ++ list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { ++ backing += kbase_reg_current_backed_size(walker); + } +- kbase_gpu_vm_unlock(kctx); + +- return backed_pressure; ++ return backing; + } + +-/** +- * jit_trim_necessary_pages() - calculate and trim the least pages possible to +- * satisfy a new JIT allocation +- * +- * @kctx: Pointer to the kbase context +- * @info: Pointer to JIT allocation information for the new allocation +- * +- * Before allocating a new just-in-time memory region or reusing a previous +- * one, ensure that the total JIT physical page usage also will not exceed the +- * pressure limit. +- * +- * If there are no reported-on allocations, then we already guarantee this will +- * be the case - because our current pressure then only comes from the va_pages +- * of each JIT region, hence JIT physical page usage is guaranteed to be +- * bounded by this. +- * +- * However as soon as JIT allocations become "reported on", the pressure is +- * lowered to allow new JIT regions to be allocated. It is after such a point +- * that the total JIT physical page usage could (either now or in the future on +- * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly +- * allocated JIT regions. Hence, trim any "reported on" regions. +- * +- * Any pages freed will go into the pool and be allocated from there in +- * kbase_mem_alloc(). +- */ +-static void jit_trim_necessary_pages(struct kbase_context *kctx, +- const struct base_jit_alloc_info *info) ++void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, ++ size_t needed_pages) + { +- size_t backed_pressure = 0; +- size_t needed_pages = 0; ++ size_t jit_backing = 0; ++ size_t pages_to_trim = 0; + +- backed_pressure = get_jit_backed_pressure(kctx); ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->jit_evict_lock); ++ ++ jit_backing = get_jit_phys_backing(kctx); + + /* It is possible that this is the case - if this is the first + * allocation after "ignore_pressure_limit" allocation. + */ +- if (backed_pressure > kctx->jit_phys_pages_limit) { +- needed_pages += +- (backed_pressure - kctx->jit_phys_pages_limit) +- + info->va_pages; ++ if (jit_backing > kctx->jit_phys_pages_limit) { ++ pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + ++ needed_pages; + } else { +- size_t backed_diff = +- kctx->jit_phys_pages_limit - backed_pressure; ++ size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; + +- if (info->va_pages > backed_diff) +- needed_pages += info->va_pages - backed_diff; ++ if (needed_pages > backed_diff) ++ pages_to_trim += needed_pages - backed_diff; + } + +- if (needed_pages) { +- size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, +- needed_pages); ++ if (pages_to_trim) { ++ size_t trimmed_pages = ++ kbase_mem_jit_trim_pages(kctx, pages_to_trim); + + /* This should never happen - we already asserted that + * we are not violating JIT pressure limit in earlier +@@ -3586,10 +3900,10 @@ static void jit_trim_necessary_pages(struct kbase_context *kctx, + * must have enough unused pages to satisfy the new + * allocation + */ +- WARN_ON(trimmed_pages < needed_pages); ++ WARN_ON(trimmed_pages < pages_to_trim); + } + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /** + * jit_allow_allocate() - check whether basic conditions are satisfied to allow +@@ -3606,10 +3920,14 @@ static bool jit_allow_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) + { ++#if MALI_USE_CSF ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++#else + lockdep_assert_held(&kctx->jctx.lock); ++#endif + +-#if MALI_JIT_PRESSURE_LIMIT +- if (likely(!ignore_pressure_limit) && ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit && + ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || + (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { + dev_dbg(kctx->kbdev->dev, +@@ -3618,7 +3936,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx, + kctx->jit_phys_pages_limit); + return false; + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { + /* Too many current allocations */ +@@ -3644,123 +3962,156 @@ static bool jit_allow_allocate(struct kbase_context *kctx, + return true; + } + ++static struct kbase_va_region * ++find_reasonable_region(const struct base_jit_alloc_info *info, ++ struct list_head *pool_head, bool ignore_usage_id) ++{ ++ struct kbase_va_region *closest_reg = NULL; ++ struct kbase_va_region *walker; ++ size_t current_diff = SIZE_MAX; ++ ++ list_for_each_entry(walker, pool_head, jit_node) { ++ if ((ignore_usage_id || ++ walker->jit_usage_id == info->usage_id) && ++ walker->jit_bin_id == info->bin_id && ++ meet_size_and_tiler_align_top_requirements(walker, info)) { ++ size_t min_size, max_size, diff; ++ ++ /* ++ * The JIT allocations VA requirements have been met, ++ * it's suitable but other allocations might be a ++ * better fit. ++ */ ++ min_size = min_t(size_t, walker->gpu_alloc->nents, ++ info->commit_pages); ++ max_size = max_t(size_t, walker->gpu_alloc->nents, ++ info->commit_pages); ++ diff = max_size - min_size; ++ ++ if (current_diff > diff) { ++ current_diff = diff; ++ closest_reg = walker; ++ } ++ ++ /* The allocation is an exact match */ ++ if (current_diff == 0) ++ break; ++ } ++ } ++ ++ return closest_reg; ++} ++ + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) + { + struct kbase_va_region *reg = NULL; ++ struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; ++ int i; + ++#if MALI_USE_CSF ++ lockdep_assert_held(&kctx->csf.kcpu_queues.lock); ++#else + lockdep_assert_held(&kctx->jctx.lock); ++#endif + + if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) + return NULL; + +-#if MALI_JIT_PRESSURE_LIMIT +- if (!ignore_pressure_limit) +- jit_trim_necessary_pages(kctx, info); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#ifdef CONFIG_MALI_2MB_ALLOC ++ /* Preallocate memory for the sub-allocation structs */ ++ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { ++ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); ++ if (!prealloc_sas[i]) ++ goto end; ++ } ++#endif + ++ kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + + /* + * Scan the pool for an existing allocation which meets our + * requirements and remove it. + */ +- if (info->usage_id != 0) { ++ if (info->usage_id != 0) + /* First scan for an allocation with the same usage ID */ +- struct kbase_va_region *walker; +- size_t current_diff = SIZE_MAX; +- +- list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { +- +- if (walker->jit_usage_id == info->usage_id && +- walker->jit_bin_id == info->bin_id && +- meet_size_and_tiler_align_top_requirements( +- kctx, walker, info)) { +- size_t min_size, max_size, diff; +- +- /* +- * The JIT allocations VA requirements have been +- * met, it's suitable but other allocations +- * might be a better fit. +- */ +- min_size = min_t(size_t, +- walker->gpu_alloc->nents, +- info->commit_pages); +- max_size = max_t(size_t, +- walker->gpu_alloc->nents, +- info->commit_pages); +- diff = max_size - min_size; +- +- if (current_diff > diff) { +- current_diff = diff; +- reg = walker; +- } +- +- /* The allocation is an exact match */ +- if (current_diff == 0) +- break; +- } +- } +- } ++ reg = find_reasonable_region(info, &kctx->jit_pool_head, false); + +- if (!reg) { ++ if (!reg) + /* No allocation with the same usage ID, or usage IDs not in + * use. Search for an allocation we can reuse. + */ +- struct kbase_va_region *walker; +- size_t current_diff = SIZE_MAX; +- +- list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { +- +- if (walker->jit_bin_id == info->bin_id && +- meet_size_and_tiler_align_top_requirements( +- kctx, walker, info)) { +- size_t min_size, max_size, diff; +- +- /* +- * The JIT allocations VA requirements have been +- * met, it's suitable but other allocations +- * might be a better fit. +- */ +- min_size = min_t(size_t, +- walker->gpu_alloc->nents, +- info->commit_pages); +- max_size = max_t(size_t, +- walker->gpu_alloc->nents, +- info->commit_pages); +- diff = max_size - min_size; +- +- if (current_diff > diff) { +- current_diff = diff; +- reg = walker; +- } +- +- /* The allocation is an exact match, so stop +- * looking. +- */ +- if (current_diff == 0) +- break; +- } +- } +- } ++ reg = find_reasonable_region(info, &kctx->jit_pool_head, true); + + if (reg) { ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ size_t needed_pages = 0; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ int ret; ++ + /* + * Remove the found region from the pool and add it to the + * active list. + */ + list_move(®->jit_node, &kctx->jit_active_head); + ++ WARN_ON(reg->gpu_alloc->evicted); ++ + /* + * Remove the allocation from the eviction list as it's no + * longer eligible for eviction. This must be done before + * dropping the jit_evict_lock + */ + list_del_init(®->gpu_alloc->evict_node); ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit) { ++ if (info->commit_pages > reg->gpu_alloc->nents) ++ needed_pages = info->commit_pages - ++ reg->gpu_alloc->nents; ++ ++ /* Update early the recycled JIT region's estimate of ++ * used_pages to ensure it doesn't get trimmed ++ * undesirably. This is needed as the recycled JIT ++ * region has been added to the active list but the ++ * number of used pages for it would be zero, so it ++ * could get trimmed instead of other allocations only ++ * to be regrown later resulting in a breach of the JIT ++ * physical pressure limit. ++ * Also that trimming would disturb the accounting of ++ * physical pages, i.e. the VM stats, as the number of ++ * backing pages would have changed when the call to ++ * kbase_mem_evictable_unmark_reclaim is made. ++ * ++ * The second call to update pressure at the end of ++ * this function would effectively be a nop. ++ */ ++ kbase_jit_report_update_pressure( ++ kctx, reg, info->va_pages, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++ ++ kbase_jit_request_phys_increase_locked(kctx, ++ needed_pages); ++ } ++#endif + mutex_unlock(&kctx->jit_evict_lock); + +- if (kbase_jit_grow(kctx, info, reg) < 0) { ++ /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', ++ * so any state protected by that lock might need to be ++ * re-evaluated if more code is added here in future. ++ */ ++ ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit) ++ kbase_jit_done_phys_increase(kctx, needed_pages); ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++ kbase_gpu_vm_unlock(kctx); ++ ++ if (ret < 0) { + /* + * An update to an allocation from the pool failed, + * chances are slim a new allocation would fair any +@@ -3770,10 +4121,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + dev_dbg(kctx->kbdev->dev, + "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* Undo the early change made to the recycled JIT ++ * region's estimate of used_pages. ++ */ ++ if (!ignore_pressure_limit) { ++ kbase_jit_report_update_pressure( ++ kctx, reg, 0, ++ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); +- return NULL; ++ reg = NULL; ++ goto end; + } + } else { + /* No suitable JIT allocation was found so create a new one */ +@@ -3783,15 +4145,28 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + BASEP_MEM_NO_USER_FREE; + u64 gpu_addr; + +- mutex_unlock(&kctx->jit_evict_lock); +- ++#if !MALI_USE_CSF + if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) + flags |= BASE_MEM_TILER_ALIGN_TOP; ++#endif /* !MALI_USE_CSF */ + + flags |= base_mem_group_id_set(kctx->jit_group_id); ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (!ignore_pressure_limit) { ++ flags |= BASEP_MEM_PERFORM_JIT_TRIM; ++ /* The corresponding call to 'done_phys_increase' would ++ * be made inside the kbase_mem_alloc(). ++ */ ++ kbase_jit_request_phys_increase_locked( ++ kctx, info->commit_pages); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++ mutex_unlock(&kctx->jit_evict_lock); ++ kbase_gpu_vm_unlock(kctx); + + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, +- info->extent, &flags, &gpu_addr); ++ info->extension, &flags, &gpu_addr); + if (!reg) { + /* Most likely not enough GPU virtual space left for + * the new JIT allocation. +@@ -3799,12 +4174,22 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + dev_dbg(kctx->kbdev->dev, + "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); +- return NULL; ++ goto end; + } + +- mutex_lock(&kctx->jit_evict_lock); +- list_add(®->jit_node, &kctx->jit_active_head); +- mutex_unlock(&kctx->jit_evict_lock); ++ if (!ignore_pressure_limit) { ++ /* Due to enforcing of pressure limit, kbase_mem_alloc ++ * was instructed to perform the trimming which in turn ++ * would have ensured that the new JIT allocation is ++ * already in the jit_active_head list, so nothing to ++ * do here. ++ */ ++ WARN_ON(list_empty(®->jit_node)); ++ } else { ++ mutex_lock(&kctx->jit_evict_lock); ++ list_add(®->jit_node, &kctx->jit_active_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++ } + } + + trace_mali_jit_alloc(reg, info->id); +@@ -3816,13 +4201,18 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + + reg->jit_usage_id = info->usage_id; + reg->jit_bin_id = info->bin_id; +-#if MALI_JIT_PRESSURE_LIMIT ++ reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; ++#if MALI_JIT_PRESSURE_LIMIT_BASE + if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; + reg->heap_info_gpu_addr = info->heap_info_gpu_addr; + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ ++end: ++ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) ++ kfree(prealloc_sas[i]); + + return reg; + } +@@ -3844,15 +4234,18 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) + div_u64(old_pages * (100 - kctx->trim_level), 100)); + u64 delta = old_pages - new_size; + +- if (delta) ++ if (delta) { ++ mutex_lock(&kctx->reg_lock); + kbase_mem_shrink(kctx, reg, old_pages - delta); ++ mutex_unlock(&kctx->reg_lock); ++ } + } + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + reg->heap_info_gpu_addr = 0; + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kctx->jit_current_allocations--; + kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; +@@ -3863,6 +4256,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) + + kbase_gpu_vm_lock(kctx); + reg->flags |= KBASE_REG_DONT_NEED; ++ reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); + kbase_gpu_vm_unlock(kctx); + +@@ -3875,6 +4269,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(®->gpu_alloc->evict_node)); + list_add(®->gpu_alloc->evict_node, &kctx->evict_list); ++ atomic_add(reg->gpu_alloc->nents, &kctx->evict_nents); + + list_move(®->jit_node, &kctx->jit_pool_head); + +@@ -3962,6 +4357,9 @@ void kbase_jit_term(struct kbase_context *kctx) + kbase_mem_free_region(kctx, walker); + mutex_lock(&kctx->jit_evict_lock); + } ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ WARN_ON(kctx->jit_phys_pages_to_be_allocated); ++#endif + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + +@@ -3972,7 +4370,7 @@ void kbase_jit_term(struct kbase_context *kctx) + cancel_work_sync(&kctx->jit_work); + } + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags) + { +@@ -4015,16 +4413,18 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + out: + return; + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags) + { + u64 diff; + ++#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ + + trace_mali_jit_report_pressure(reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - +@@ -4053,19 +4453,22 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, + } + + } +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +-bool kbase_has_exec_va_zone(struct kbase_context *kctx) ++#if MALI_USE_CSF ++static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) + { +- bool has_exec_va_zone; ++ if (alloc->nents) { ++ struct page **pages = alloc->imported.user_buf.pages; ++ long i; + +- kbase_gpu_vm_lock(kctx); +- has_exec_va_zone = (kctx->exec_va_start != U64_MAX); +- kbase_gpu_vm_unlock(kctx); ++ WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); + +- return has_exec_va_zone; ++ for (i = 0; i < alloc->nents; i++) ++ put_page(pages[i]); ++ } + } +- ++#endif + + int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, + struct kbase_va_region *reg) +@@ -4090,7 +4493,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, + if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) + return -EINVAL; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, +@@ -4102,24 +4505,30 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); + #endif +-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) ++#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +-#else ++#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL, NULL); ++#else ++ pinned_pages = get_user_pages_remote(mm, ++ address, ++ alloc->imported.user_buf.nr_pages, ++ reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, ++ pages, NULL, NULL); + #endif + + if (pinned_pages <= 0) +@@ -4232,12 +4641,16 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); ++#if !MALI_USE_CSF + put_page(pages[i]); + pages[i] = NULL; ++#endif + + size -= local_size; + } ++#if !MALI_USE_CSF + alloc->nents = 0; ++#endif + } + + int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, +@@ -4296,7 +4709,8 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( + goto exit; + + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; +- if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { ++ if (reg->gpu_alloc->imported.user_buf ++ .current_mapping_usage_count == 1) { + err = kbase_jd_user_buf_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; +@@ -4331,7 +4745,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + +- if (0 == alloc->imported.user_buf.current_mapping_usage_count) { ++ if (alloc->imported.user_buf.current_mapping_usage_count == 0) { + bool writeable = true; + + if (!kbase_is_region_invalid_or_free(reg) && +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h +index 6e921ec..e9ac809 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_mem.h +- * Base kernel memory APIs ++ * DOC: Base kernel memory APIs + */ + + #ifndef _KBASE_MEM_H_ +@@ -35,7 +31,7 @@ + #endif + + #include +-#include "mali_base_kernel.h" ++#include + #include + #include "mali_kbase_pm.h" + #include "mali_kbase_defs.h" +@@ -48,10 +44,13 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, + /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ + #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + +-/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. +-The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and +-page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table +-updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ ++/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by ++ * 8 pages. The MMU reads in 8 page table entries from memory at a time, if we ++ * have more than one page fault within the same 8 pages and page tables are ++ * updated accordingly, the MMU does not re-read the page table entries from ++ * memory for the subsequent page table updates and generates duplicate page ++ * faults as the page table information used by the MMU is not valid. ++ */ + #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + + #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ +@@ -60,7 +59,8 @@ updates and generates duplicate page faults as the page table information used b + #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) + #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) + #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +-/** ++ ++/* + * A CPU mapping + */ + struct kbase_cpu_mapping { +@@ -81,16 +81,15 @@ enum kbase_memory_type { + }; + + /* internal structure, mirroring base_mem_aliasing_info, +- * but with alloc instead of a gpu va (handle) */ ++ * but with alloc instead of a gpu va (handle) ++ */ + struct kbase_aliased { + struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ + u64 offset; /* in pages */ + u64 length; /* in pages */ + }; + +-/** +- * @brief Physical pages tracking object properties +- */ ++/* Physical pages tracking object properties */ + #define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) + #define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) + +@@ -105,7 +104,13 @@ struct kbase_aliased { + * updated as part of the change. + * + * @kref: number of users of this alloc +- * @gpu_mappings: count number of times mapped on the GPU ++ * @gpu_mappings: count number of times mapped on the GPU. Indicates the number ++ * of references there are to the physical pages from different ++ * GPU VA regions. ++ * @kernel_mappings: count number of times mapped on the CPU, specifically in ++ * the kernel. Indicates the number of references there are ++ * to the physical pages to prevent flag changes or shrink ++ * while maps are still held. + * @nents: 0..N + * @pages: N elements, only 0..nents are valid + * @mappings: List of CPU mappings of this physical memory allocation. +@@ -128,6 +133,7 @@ struct kbase_aliased { + struct kbase_mem_phy_alloc { + struct kref kref; + atomic_t gpu_mappings; ++ atomic_t kernel_mappings; + size_t nents; + struct tagged_addr *pages; + struct list_head mappings; +@@ -141,6 +147,7 @@ struct kbase_mem_phy_alloc { + + union { + struct { ++ struct kbase_context *kctx; + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + unsigned int current_mapping_usage_count; +@@ -210,12 +217,36 @@ static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc * + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) +- if (0 > atomic_dec_return(&alloc->gpu_mappings)) { ++ if (atomic_dec_return(&alloc->gpu_mappings) < 0) { + pr_err("Mismatched %s:\n", __func__); + dump_stack(); + } + } + ++/** ++ * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings ++ * counter for a memory region to prevent commit and flag changes ++ * ++ * @alloc: Pointer to physical pages tracking object ++ */ ++static inline void ++kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) ++{ ++ atomic_inc(&alloc->kernel_mappings); ++} ++ ++/** ++ * kbase_mem_phy_alloc_kernel_unmapped - Decrement kernel_mappings ++ * counter for a memory region to allow commit and flag changes ++ * ++ * @alloc: Pointer to physical pages tracking object ++ */ ++static inline void ++kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) ++{ ++ WARN_ON(atomic_dec_return(&alloc->kernel_mappings) < 0); ++} ++ + /** + * kbase_mem_is_imported - Indicate whether a memory type is imported + * +@@ -248,7 +279,7 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m + } + + /** +- * A GPU memory region, and attributes for CPU mappings. ++ * struct kbase_va_region - A GPU memory region, and attributes for CPU mappings + * + * @rblink: Node in a red-black tree of memory regions within the same zone of + * the GPU's virtual address space. +@@ -262,13 +293,31 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m + * @threshold_pages: If non-zero and the amount of memory committed to a region + * that can grow on page fault exceeds this number of pages + * then the driver switches to incremental rendering. +- * @extent: Number of pages allocated on page fault. ++ * @flags: Flags ++ * @extension: Number of pages allocated on page fault. + * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. + * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. + * @jit_node: Links to neighboring regions in the just-in-time memory pool. + * @jit_usage_id: The last just-in-time memory usage ID for this region. + * @jit_bin_id: The just-in-time memory bin this region came from. + * @va_refcnt: Number of users of this region. Protected by reg_lock. ++ * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of ++ * an allocated region ++ * The object can be one of: ++ * - u32 value defining the size of the region ++ * - u64 pointer first unused byte in the region ++ * The interpretation of the object depends on ++ * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in ++ * jit_info_flags - if it is set, the heap info object ++ * should be interpreted as size. ++ * @used_pages: The current estimate of the number of pages used, which in ++ * normal use is either: ++ * - the initial estimate == va_pages ++ * - the actual pages used, as found by a JIT usage report ++ * Note that since the value is calculated from GPU memory after a ++ * JIT usage report, at any point in time it is allowed to take a ++ * random value that is no greater than va_pages (e.g. it may be ++ * greater than gpu_alloc->nents) + */ + struct kbase_va_region { + struct rb_node rblink; +@@ -308,8 +357,13 @@ struct kbase_va_region { + #define KBASE_REG_SHARE_BOTH (1ul << 10) + + /* Space for 4 different zones */ +-#define KBASE_REG_ZONE_MASK (3ul << 11) +-#define KBASE_REG_ZONE(x) (((x) & 3) << 11) ++#define KBASE_REG_ZONE_MASK ((KBASE_REG_ZONE_MAX - 1ul) << 11) ++#define KBASE_REG_ZONE(x) (((x) & (KBASE_REG_ZONE_MAX - 1ul)) << 11) ++#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11) ++ ++#if ((KBASE_REG_ZONE_MAX - 1) & 0x3) != (KBASE_REG_ZONE_MAX - 1) ++#error KBASE_REG_ZONE_MAX too large for allocation of KBASE_REG_<...> bits ++#endif + + /* GPU read access */ + #define KBASE_REG_GPU_RD (1ul<<13) +@@ -328,14 +382,29 @@ struct kbase_va_region { + /* Imported buffer is padded? */ + #define KBASE_REG_IMPORT_PAD (1ul << 21) + ++#if MALI_USE_CSF ++/* CSF event memory */ ++#define KBASE_REG_CSF_EVENT (1ul << 22) ++#else + /* Bit 22 is reserved. + * +- * Do not remove, use the next unreserved bit for new flags */ ++ * Do not remove, use the next unreserved bit for new flags ++ */ + #define KBASE_REG_RESERVED_BIT_22 (1ul << 22) ++#endif + +-/* The top of the initial commit is aligned to extent pages. +- * Extent must be a power of 2 */ ++#if !MALI_USE_CSF ++/* The top of the initial commit is aligned to extension pages. ++ * Extent must be a power of 2 ++ */ + #define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) ++#else ++/* Bit 23 is reserved. ++ * ++ * Do not remove, use the next unreserved bit for new flags ++ */ ++#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) ++#endif /* !MALI_USE_CSF */ + + /* Whilst this flag is set the GPU allocation is not supposed to be freed by + * user space. The flag will remain set for the lifetime of JIT allocations. +@@ -367,6 +436,9 @@ struct kbase_va_region { + */ + #define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + ++/* Allocation is actively used for JIT memory */ ++#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) ++ + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + + /* only used with 32-bit clients */ +@@ -390,15 +462,21 @@ struct kbase_va_region { + #define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) + #define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ + ++#if MALI_USE_CSF ++#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3) ++#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) ++#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \ ++ KBASE_REG_ZONE_MCU_SHARED_BASE) ++#endif + + unsigned long flags; +- size_t extent; ++ size_t extension; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + struct list_head jit_node; + u16 jit_usage_id; + u8 jit_bin_id; +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Pointer to an object in GPU memory defining an end of an allocated + * region + * +@@ -423,7 +501,7 @@ struct kbase_va_region { + * gpu_alloc->nents) + */ + size_t used_pages; +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + int va_refcnt; + }; +@@ -471,7 +549,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( + WARN_ON(!region->va_refcnt); + + /* non-atomic as kctx->reg_lock is held */ +- dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", ++ dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", + region->va_refcnt, (void *)region); + region->va_refcnt++; + +@@ -488,7 +566,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( + + /* non-atomic as kctx->reg_lock is held */ + region->va_refcnt--; +- dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", ++ dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", + region->va_refcnt, (void *)region); + if (!region->va_refcnt) + kbase_region_refcnt_free(region); +@@ -580,6 +658,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create( + + kref_init(&alloc->kref); + atomic_set(&alloc->gpu_mappings, 0); ++ atomic_set(&alloc->kernel_mappings, 0); + alloc->nents = 0; + alloc->pages = (void *)(alloc + 1); + INIT_LIST_HEAD(&alloc->mappings); +@@ -1019,7 +1098,9 @@ struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr); + + /** +- * @brief Check that a pointer is actually a valid region. ++ * Check that a pointer is actually a valid region. ++ * @kctx: kbase context containing the region ++ * @gpu_addr: pointer to check + * + * Must be called with context lock held. + */ +@@ -1048,7 +1129,7 @@ bool kbase_check_import_flags(unsigned long flags); + * @flags: The flags passed from user space + * @va_pages: The size of the requested region, in pages. + * @commit_pages: Number of pages to commit initially. +- * @extent: Number of pages to grow by on GPU page fault and/or alignment ++ * @extension: Number of pages to grow by on GPU page fault and/or alignment + * (depending on flags) + * + * Makes checks on the size parameters passed in from user space for a memory +@@ -1057,7 +1138,7 @@ bool kbase_check_import_flags(unsigned long flags); + * Return: 0 if sizes are valid for these flags, negative error code otherwise + */ + int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, +- u64 va_pages, u64 commit_pages, u64 extent); ++ u64 va_pages, u64 commit_pages, u64 extension); + + /** + * kbase_update_region_flags - Convert user space flags to kernel region flags +@@ -1080,14 +1161,21 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx); + int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + + /** +- * @brief Register region and map it on the GPU. ++ * Register region and map it on the GPU. ++ * @kctx: kbase context containing the region ++ * @reg: the region to add ++ * @addr: the address to insert the region at ++ * @nr_pages: the number of pages in the region ++ * @align: the minimum alignment in pages + * + * Call kbase_add_va_region() and map the region on the GPU. + */ + int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + + /** +- * @brief Remove the region from the GPU and unregister it. ++ * Remove the region from the GPU and unregister it. ++ * @kctx: KBase context ++ * @reg: The region to remove + * + * Must be called with context lock held. + */ +@@ -1136,20 +1224,23 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); + + void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +-/** Dump the MMU tables to a buffer ++/** ++ * kbase_mmu_dump() - Dump the MMU tables to a buffer. + * +- * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the +- * buffer is too small then the return value will be NULL. ++ * This function allocates a buffer (of @c nr_pages pages) to hold a dump ++ * of the MMU tables and fills it. If the buffer is too small ++ * then the return value will be NULL. + * + * The GPU vm lock must be held when calling this function. + * +- * The buffer returned should be freed with @ref vfree when it is no longer required. ++ * The buffer returned should be freed with @ref vfree when it is no longer ++ * required. + * +- * @param[in] kctx The kbase context to dump +- * @param[in] nr_pages The number of pages to allocate for the buffer. ++ * @kctx: The kbase context to dump ++ * @nr_pages: The number of pages to allocate for the buffer. + * +- * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too +- * small) ++ * Return: The address of the buffer containing the MMU dump or NULL on error ++ * (including if the @c nr_pages is too small) + */ + void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); + +@@ -1174,25 +1265,27 @@ void kbase_os_mem_map_lock(struct kbase_context *kctx); + void kbase_os_mem_map_unlock(struct kbase_context *kctx); + + /** +- * @brief Update the memory allocation counters for the current process ++ * kbasep_os_process_page_usage_update() - Update the memory allocation ++ * counters for the current process. + * +- * OS specific call to updates the current memory allocation counters for the current process with +- * the supplied delta. ++ * OS specific call to updates the current memory allocation counters ++ * for the current process with the supplied delta. + * +- * @param[in] kctx The kbase context +- * @param[in] pages The desired delta to apply to the memory usage counters. ++ * @kctx: The kbase context ++ * @pages: The desired delta to apply to the memory usage counters. + */ + + void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); + + /** +- * @brief Add to the memory allocation counters for the current process ++ * kbase_process_page_usage_inc() - Add to the memory allocation counters for ++ * the current process + * +- * OS specific call to add to the current memory allocation counters for the current process by +- * the supplied amount. ++ * OS specific call to add to the current memory allocation counters for ++ * the current process by the supplied amount. + * +- * @param[in] kctx The kernel base context used for the allocation. +- * @param[in] pages The desired delta to apply to the memory usage counters. ++ * @kctx: The kernel base context used for the allocation. ++ * @pages: The desired delta to apply to the memory usage counters. + */ + + static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) +@@ -1201,13 +1294,14 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int + } + + /** +- * @brief Subtract from the memory allocation counters for the current process ++ * kbase_process_page_usage_dec() - Subtract from the memory allocation ++ * counters for the current process. + * +- * OS specific call to subtract from the current memory allocation counters for the current process by +- * the supplied amount. ++ * OS specific call to subtract from the current memory allocation counters ++ * for the current process by the supplied amount. + * +- * @param[in] kctx The kernel base context used for the allocation. +- * @param[in] pages The desired delta to apply to the memory usage counters. ++ * @kctx: The kernel base context used for the allocation. ++ * @pages: The desired delta to apply to the memory usage counters. + */ + + static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) +@@ -1332,15 +1426,15 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_sub_alloc **prealloc_sa); + + /** +-* @brief Free physical pages. +-* +-* Frees \a nr_pages and updates the alloc object. +-* +-* @param[in] alloc allocation object to free pages from +-* @param[in] nr_pages_to_free number of physical pages to free +-* +-* Return: 0 on success, otherwise a negative error code +-*/ ++ * kbase_free_phy_pages_helper() - Free physical pages. ++ * ++ * Frees \a nr_pages and updates the alloc object. ++ * ++ * @alloc: allocation object to free pages from ++ * @nr_pages_to_free: number of physical pages to free ++ * ++ * Return: 0 on success, otherwise a negative error code ++ */ + int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); + + /** +@@ -1370,7 +1464,8 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) + /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the + * private field stays the same. So we have to be clever and + * use the fact that we only store DMA addresses of whole pages, +- * so the low bits should be zero */ ++ * so the low bits should be zero ++ */ + KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); + set_page_private(p, dma_addr >> PAGE_SHIFT); + } else { +@@ -1392,26 +1487,11 @@ static inline void kbase_clear_dma_addr(struct page *p) + } + + /** +- * @brief Process a page fault. +- * +- * @param[in] data work_struct passed by queue_work() +- */ +-void page_fault_worker(struct work_struct *data); +- +-/** +- * @brief Process a bus fault. +- * +- * @param[in] data work_struct passed by queue_work() +- */ +-void bus_fault_worker(struct work_struct *data); +- +-/** +- * @brief Flush MMU workqueues. ++ * kbase_flush_mmu_wqs() - Flush MMU workqueues. ++ * @kbdev: Device pointer. + * + * This function will cause any outstanding page or bus faults to be processed. + * It should be called prior to powering off the GPU. +- * +- * @param[in] kbdev Device pointer + */ + void kbase_flush_mmu_wqs(struct kbase_device *kbdev); + +@@ -1437,7 +1517,7 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + /** + * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. + * @kctx: kbase context +@@ -1497,7 +1577,7 @@ bool kbase_jit_evict(struct kbase_context *kctx); + */ + void kbase_jit_term(struct kbase_context *kctx); + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /** + * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of + * kbase_trace_jit_report_gpu_mem() that should only be called once the +@@ -1508,7 +1588,7 @@ void kbase_jit_term(struct kbase_context *kctx); + */ + void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /** + * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used +@@ -1530,7 +1610,7 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + * been included. Also gives no opportunity for the compiler to mess up + * inlining it. + */ +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + do { \ + if (trace_mali_jit_report_gpu_mem_enabled()) \ +@@ -1540,9 +1620,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + #else + #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + CSTD_NOP(kctx, reg, flags) +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /** + * kbase_jit_report_update_pressure - safely update the JIT physical page + * pressure and JIT region's estimate of used_pages +@@ -1562,7 +1642,127 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++ ++/** ++ * jit_trim_necessary_pages() - calculate and trim the least pages possible to ++ * satisfy a new JIT allocation ++ * ++ * @kctx: Pointer to the kbase context ++ * @needed_pages: Number of JIT physical pages by which trimming is requested. ++ * The actual number of pages trimmed could differ. ++ * ++ * Before allocating a new just-in-time memory region or reusing a previous ++ * one, ensure that the total JIT physical page usage also will not exceed the ++ * pressure limit. ++ * ++ * If there are no reported-on allocations, then we already guarantee this will ++ * be the case - because our current pressure then only comes from the va_pages ++ * of each JIT region, hence JIT physical page usage is guaranteed to be ++ * bounded by this. ++ * ++ * However as soon as JIT allocations become "reported on", the pressure is ++ * lowered to allow new JIT regions to be allocated. It is after such a point ++ * that the total JIT physical page usage could (either now or in the future on ++ * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly ++ * allocated JIT regions. Hence, trim any "reported on" regions. ++ * ++ * Any pages freed will go into the pool and be allocated from there in ++ * kbase_mem_alloc(). ++ */ ++void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, ++ size_t needed_pages); ++ ++/* ++ * Same as kbase_jit_request_phys_increase(), except that Caller is supposed ++ * to take jit_evict_lock also on @kctx before calling this function. ++ */ ++static inline void ++kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, ++ size_t needed_pages) ++{ ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ lockdep_assert_held(&kctx->jit_evict_lock); ++ ++ kctx->jit_phys_pages_to_be_allocated += needed_pages; ++ ++ kbase_jit_trim_necessary_pages(kctx, ++ kctx->jit_phys_pages_to_be_allocated); ++} ++ ++/** ++ * kbase_jit_request_phys_increase() - Increment the backing pages count and do ++ * the required trimming before allocating pages for a JIT allocation. ++ * ++ * @kctx: Pointer to the kbase context ++ * @needed_pages: Number of pages to be allocated for the JIT allocation. ++ * ++ * This function needs to be called before allocating backing pages for a ++ * just-in-time memory region. The backing pages are currently allocated when, ++ * ++ * - A new JIT region is created. ++ * - An old JIT region is reused from the cached pool. ++ * - GPU page fault occurs for the active JIT region. ++ * - Backing is grown for the JIT region through the commit ioctl. ++ * ++ * This function would ensure that the total JIT physical page usage does not ++ * exceed the pressure limit even when the backing pages get allocated ++ * simultaneously for multiple JIT allocations from different threads. ++ * ++ * There should be a matching call to kbase_jit_done_phys_increase(), after ++ * the pages have been allocated and accounted against the active JIT ++ * allocation. ++ * ++ * Caller is supposed to take reg_lock on @kctx before calling this function. ++ */ ++static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, ++ size_t needed_pages) ++{ ++#if !MALI_USE_CSF ++ lockdep_assert_held(&kctx->jctx.lock); ++#endif /* !MALI_USE_CSF */ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ mutex_lock(&kctx->jit_evict_lock); ++ kbase_jit_request_phys_increase_locked(kctx, needed_pages); ++ mutex_unlock(&kctx->jit_evict_lock); ++} ++ ++/** ++ * kbase_jit_done_phys_increase() - Decrement the backing pages count after the ++ * allocation of pages for a JIT allocation. ++ * ++ * @kctx: Pointer to the kbase context ++ * @needed_pages: Number of pages that were allocated for the JIT allocation. ++ * ++ * This function should be called after backing pages have been allocated and ++ * accounted against the active JIT allocation. ++ * The call should be made when the following have been satisfied: ++ * when the allocation is on the jit_active_head. ++ * when additional needed_pages have been allocated. ++ * kctx->reg_lock was held during the above and has not yet been unlocked. ++ * Failure to call this function before unlocking the kctx->reg_lock when ++ * either the above have changed may result in over-accounting the memory. ++ * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of ++ * the memory. ++ * ++ * A matching call to kbase_jit_request_phys_increase() should have been made, ++ * before the allocation of backing pages. ++ * ++ * Caller is supposed to take reg_lock on @kctx before calling this function. ++ */ ++static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, ++ size_t needed_pages) ++{ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages); ++ ++ kctx->jit_phys_pages_to_be_allocated -= needed_pages; ++} ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + /** + * kbase_has_exec_va_zone - EXEC_VA zone predicate +@@ -1693,6 +1893,63 @@ static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) + */ + void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); + ++#if MALI_USE_CSF ++/** ++ * kbase_link_event_mem_page - Add the new event memory region to the per ++ * context list of event pages. ++ * @kctx: Pointer to kbase context ++ * @reg: Pointer to the region allocated for event memory. ++ * ++ * The region being linked shouldn't have been marked as free and should ++ * have KBASE_REG_CSF_EVENT flag set for it. ++ */ ++static inline void kbase_link_event_mem_page(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ WARN_ON(reg->flags & KBASE_REG_FREE); ++ WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); ++ ++ list_add(®->link, &kctx->csf.event_pages_head); ++} ++ ++/** ++ * kbase_unlink_event_mem_page - Remove the event memory region from the per ++ * context list of event pages. ++ * @kctx: Pointer to kbase context ++ * @reg: Pointer to the region allocated for event memory. ++ * ++ * The region being un-linked shouldn't have been marked as free and should ++ * have KBASE_REG_CSF_EVENT flag set for it. ++ */ ++static inline void kbase_unlink_event_mem_page(struct kbase_context *kctx, ++ struct kbase_va_region *reg) ++{ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ WARN_ON(reg->flags & KBASE_REG_FREE); ++ WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); ++ ++ list_del(®->link); ++} ++ ++/** ++ * kbase_mcu_shared_interface_region_tracker_init - Initialize the rb tree to ++ * manage the shared interface segment of MCU firmware address space. ++ * @kbdev: Pointer to the kbase device ++ * ++ * Returns zero on success or negative error number on failure. ++ */ ++int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev); ++ ++/** ++ * kbase_mcu_shared_interface_region_tracker_term - Teardown the rb tree ++ * managing the shared interface segment of MCU firmware address space. ++ * @kbdev: Pointer to the kbase device ++ */ ++void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev); ++#endif + + /** + * kbase_mem_umm_map - Map dma-buf +@@ -1742,7 +1999,6 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, + int kbase_mem_do_sync_imported(struct kbase_context *kctx, + struct kbase_va_region *reg, enum kbase_sync_type sync_fn); + +- + /** + * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to + * an unaligned address at a given offset from the start of a target page. +@@ -1768,4 +2024,76 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset); + ++/** ++ * kbase_ctx_reg_zone_end_pfn - return the end Page Frame Number of @zone ++ * @zone: zone to query ++ * ++ * Return: The end of the zone corresponding to @zone ++ */ ++static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) ++{ ++ return zone->base_pfn + zone->va_size_pages; ++} ++ ++/** ++ * kbase_ctx_reg_zone_init - initialize a zone in @kctx ++ * @kctx: Pointer to kbase context ++ * @zone_bits: A KBASE_REG_ZONE_<...> to initialize ++ * @base_pfn: Page Frame Number in GPU virtual address space for the start of ++ * the Zone ++ * @va_size_pages: Size of the Zone in pages ++ */ ++static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx, ++ unsigned long zone_bits, ++ u64 base_pfn, u64 va_size_pages) ++{ ++ struct kbase_reg_zone *zone; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); ++ ++ zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; ++ *zone = (struct kbase_reg_zone){ ++ .base_pfn = base_pfn, .va_size_pages = va_size_pages, ++ }; ++} ++ ++/** ++ * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does ++ * not have @kctx 's region lock ++ * @kctx: Pointer to kbase context ++ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve ++ * ++ * This should only be used in performance-critical paths where the code is ++ * resilient to a race with the zone changing. ++ * ++ * Return: The zone corresponding to @zone_bits ++ */ ++static inline struct kbase_reg_zone * ++kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, ++ unsigned long zone_bits) ++{ ++ WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); ++ ++ return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; ++} ++ ++/** ++ * kbase_ctx_reg_zone_get - get a zone from @kctx ++ * @kctx: Pointer to kbase context ++ * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve ++ * ++ * The get is not refcounted - there is no corresponding 'put' operation ++ * ++ * Return: The zone corresponding to @zone_bits ++ */ ++static inline struct kbase_reg_zone * ++kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) ++{ ++ lockdep_assert_held(&kctx->reg_lock); ++ WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); ++ ++ return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; ++} ++ + #endif /* _KBASE_MEM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +index b669f2a..21302c1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_mem_linux.c +- * Base kernel memory APIs, Linux implementation. ++ * DOC: Base kernel memory APIs, Linux implementation. + */ + + #include +@@ -35,10 +31,9 @@ + #include + #include + #include +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ +- (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) ++#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) + #include +-#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ ++#endif /* LINUX_VERSION_CODE < 4.8.0 */ + #include + #include + #include +@@ -47,8 +42,11 @@ + #include + #include + #include +-#include ++#include + #include ++#include ++#include ++#include + + #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) +@@ -87,6 +85,12 @@ + + #define IR_THRESHOLD_STEPS (256u) + ++#if MALI_USE_CSF ++static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, ++ struct vm_area_struct *vma); ++static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, ++ struct vm_area_struct *vma); ++#endif + + static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, +@@ -107,6 +111,25 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + static struct kbase_va_region *kbase_find_event_mem_region( + struct kbase_context *kctx, u64 gpu_addr) + { ++#if MALI_USE_CSF ++ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; ++ struct kbase_va_region *reg; ++ ++ lockdep_assert_held(&kctx->reg_lock); ++ ++ list_for_each_entry(reg, &kctx->csf.event_pages_head, link) { ++ if ((reg->start_pfn <= gpu_pfn) && ++ (gpu_pfn < (reg->start_pfn + reg->nr_pages))) { ++ if (WARN_ON(reg->flags & KBASE_REG_FREE)) ++ return NULL; ++ ++ if (WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT))) ++ return NULL; ++ ++ return reg; ++ } ++ } ++#endif + + return NULL; + } +@@ -269,8 +292,8 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + } + + struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, +- u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, +- u64 *gpu_va) ++ u64 va_pages, u64 commit_pages, ++ u64 extension, u64 *flags, u64 *gpu_va) + { + int zone; + struct kbase_va_region *reg; +@@ -282,15 +305,20 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + KBASE_DEBUG_ASSERT(gpu_va); + + dev = kctx->kbdev->dev; +- dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n", +- va_pages, commit_pages, extent, *flags); ++ dev_dbg(dev, ++ "Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n", ++ va_pages, commit_pages, extension, *flags); + ++#if MALI_USE_CSF ++ *gpu_va = 0; /* return 0 on failure */ ++#else + if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) + *gpu_va = 0; /* return 0 on failure */ + else + dev_err(dev, + "Keeping requested GPU VA of 0x%llx\n", + (unsigned long long)*gpu_va); ++#endif + + if (!kbase_check_alloc_flags(*flags)) { + dev_warn(dev, +@@ -299,7 +327,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + goto bad_flags; + } + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { + /* Mask coherency flags if infinite cache is enabled to prevent + * the skipping of syncs from BASE side. +@@ -325,7 +353,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + +- if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent)) ++ if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, ++ extension)) + goto bad_sizes; + + #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED +@@ -363,6 +392,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + goto prepare_failed; + } + ++ if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) ++ *flags |= BASE_MEM_KERNEL_SYNC; ++ ++ /* make sure base knows if the memory is actually cached or not */ ++ if (reg->flags & KBASE_REG_CPU_CACHED) ++ *flags |= BASE_MEM_CACHED_CPU; ++ else ++ *flags &= ~BASE_MEM_CACHED_CPU; ++ + if (*flags & BASE_MEM_GROW_ON_GPF) { + unsigned int const ir_threshold = atomic_read( + &kctx->kbdev->memdev.ir_threshold); +@@ -372,12 +410,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + } else + reg->threshold_pages = 0; + +- if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { +- /* kbase_check_alloc_sizes() already checks extent is valid for +- * assigning to reg->extent */ +- reg->extent = extent; ++ if (*flags & BASE_MEM_GROW_ON_GPF) { ++ /* kbase_check_alloc_sizes() already checks extension is valid for ++ * assigning to reg->extension ++ */ ++ reg->extension = extension; ++#if !MALI_USE_CSF ++ } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) { ++ reg->extension = extension; ++#endif /* !MALI_USE_CSF */ + } else { +- reg->extent = 0; ++ reg->extension = 0; + } + + if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { +@@ -404,7 +447,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + } + } + +- + /* mmap needed to setup VA? */ + if (*flags & BASE_MEM_SAME_VA) { + unsigned long cookie, cookie_nr; +@@ -436,6 +478,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { ++ kbase_jit_done_phys_increase(kctx, commit_pages); ++ ++ mutex_lock(&kctx->jit_evict_lock); ++ WARN_ON(!list_empty(®->jit_node)); ++ list_add(®->jit_node, &kctx->jit_active_head); ++ mutex_unlock(&kctx->jit_evict_lock); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++ + kbase_gpu_vm_unlock(kctx); + return reg; + +@@ -443,6 +496,13 @@ no_mmap: + no_cookie: + no_kern_mapping: + no_mem: ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { ++ kbase_gpu_vm_lock(kctx); ++ kbase_jit_done_phys_increase(kctx, commit_pages); ++ kbase_gpu_vm_unlock(kctx); ++ } ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + invalid_flags: +@@ -511,21 +571,36 @@ int kbase_mem_query(struct kbase_context *kctx, + *out |= BASE_MEM_COHERENT_SYSTEM; + if (KBASE_REG_SHARE_IN & reg->flags) + *out |= BASE_MEM_COHERENT_LOCAL; +- if (kctx->api_version >= KBASE_API_VERSION(11, 2)) { +- /* Prior to 11.2, these were known about by user-side +- * but we did not return them. Returning some of these +- * caused certain clients that were not expecting them +- * to fail, so we omit all of them as a special-case +- * for compatibility reasons */ ++ if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { ++ /* Prior to this version, this was known about by ++ * user-side but we did not return them. Returning ++ * it caused certain clients that were not expecting ++ * it to fail, so we omit it as a special-case for ++ * compatibility reasons ++ */ + if (KBASE_REG_PF_GROW & reg->flags) + *out |= BASE_MEM_GROW_ON_GPF; ++ } ++ if (mali_kbase_supports_mem_protected(kctx->api_version)) { ++ /* Prior to this version, this was known about by ++ * user-side but we did not return them. Returning ++ * it caused certain clients that were not expecting ++ * it to fail, so we omit it as a special-case for ++ * compatibility reasons ++ */ + if (KBASE_REG_PROTECTED & reg->flags) + *out |= BASE_MEM_PROTECTED; + } ++#if !MALI_USE_CSF + if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) + *out |= BASE_MEM_TILER_ALIGN_TOP; ++#endif /* !MALI_USE_CSF */ + if (!(KBASE_REG_GPU_CACHED & reg->flags)) + *out |= BASE_MEM_UNCACHED_GPU; ++#if MALI_USE_CSF ++ if (KBASE_REG_CSF_EVENT & reg->flags) ++ *out |= BASE_MEM_CSF_EVENT; ++#endif + if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) + *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + +@@ -561,18 +636,17 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) + { + struct kbase_context *kctx; +- struct kbase_mem_phy_alloc *alloc; +- unsigned long pages = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + +- mutex_lock(&kctx->jit_evict_lock); +- +- list_for_each_entry(alloc, &kctx->evict_list, evict_node) +- pages += alloc->nents; ++ WARN((sc->gfp_mask & __GFP_ATOMIC), ++ "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n", ++ sc->gfp_mask); ++ WARN(in_atomic(), ++ "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n", ++ sc->gfp_mask); + +- mutex_unlock(&kctx->jit_evict_lock); +- return pages; ++ return atomic_read(&kctx->evict_nents); + } + + /** +@@ -604,6 +678,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + unsigned long freed = 0; + + kctx = container_of(s, struct kbase_context, reclaim); ++ + mutex_lock(&kctx->jit_evict_lock); + + list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { +@@ -630,6 +705,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + + kbase_free_phy_pages_helper(alloc, alloc->evicted); + freed += alloc->evicted; ++ WARN_ON(atomic_sub_return(alloc->evicted, &kctx->evict_nents) < 0); + list_del_init(&alloc->evict_node); + + /* +@@ -648,35 +724,20 @@ out_unlock: + return freed; + } + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +-static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, +- struct shrink_control *sc) +-{ +- if (sc->nr_to_scan == 0) +- return kbase_mem_evictable_reclaim_count_objects(s, sc); +- +- return kbase_mem_evictable_reclaim_scan_objects(s, sc); +-} +-#endif +- + int kbase_mem_evictable_init(struct kbase_context *kctx) + { + INIT_LIST_HEAD(&kctx->evict_list); + mutex_init(&kctx->jit_evict_lock); + +- /* Register shrinker */ +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +- kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; +-#else ++ atomic_set(&kctx->evict_nents, 0); ++ + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; + kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; +-#endif + kctx->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : +- * struct shrinker does not define batch */ +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) ++ * struct shrinker does not define batch ++ */ + kctx->reclaim.batch = 0; +-#endif + register_shrinker(&kctx->reclaim); + return 0; + } +@@ -705,6 +766,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) + kbdev, + kctx->id, + (u64)new_page_count); ++ kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); + } + + /** +@@ -731,6 +793,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) + kbdev, + kctx->id, + (u64)new_page_count); ++ kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); + } + + int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +@@ -751,6 +814,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) + * can reclaim it. + */ + list_add(&gpu_alloc->evict_node, &kctx->evict_list); ++ atomic_add(gpu_alloc->nents, &kctx->evict_nents); + mutex_unlock(&kctx->jit_evict_lock); + kbase_mem_evictable_mark_reclaim(gpu_alloc); + +@@ -770,6 +834,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) + * First remove the allocation from the eviction list as it's no + * longer eligible for eviction. + */ ++ WARN_ON(atomic_sub_return(gpu_alloc->nents, &kctx->evict_nents) < 0); + list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + +@@ -840,7 +905,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in + real_flags |= KBASE_REG_SHARE_IN; + + /* now we can lock down the context, and find the region */ +- down_write(¤t->mm->mmap_sem); ++ down_write(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ +@@ -852,10 +917,18 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in + prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; + new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; + if (prev_needed != new_needed) { +- /* Aliased allocations can't be made ephemeral */ ++ /* Aliased allocations can't be shrunk as the code doesn't ++ * support looking up: ++ * - all physical pages assigned to different GPU VAs ++ * - CPU mappings for the physical pages at different vm_pgoff ++ * (==GPU VA) locations. ++ */ + if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) + goto out_unlock; + ++ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) ++ goto out_unlock; ++ + if (new_needed) { + /* Only native allocations can be marked not needed */ + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { +@@ -938,7 +1011,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in + + out_unlock: + kbase_gpu_vm_unlock(kctx); +- up_write(¤t->mm->mmap_sem); ++ up_write(kbase_mem_get_process_mmap_lock()); + out: + return ret; + } +@@ -1029,7 +1102,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, + dir); + #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ + break; +- }; ++ } + + if (unlikely(ret)) + dev_warn(kctx->kbdev->dev, +@@ -1056,6 +1129,8 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + ++ kbase_remove_dma_buf_usage(kctx, alloc); ++ + memset(pa, 0xff, sizeof(*pa) * alloc->nents); + alloc->nents = 0; + } +@@ -1123,6 +1198,7 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, + + /* Update nents as we now have pages to map */ + alloc->nents = count; ++ kbase_add_dma_buf_usage(kctx, alloc); + + return 0; + +@@ -1327,7 +1403,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + need_sync = true; + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* + * 64-bit tasks require us to reserve VA on the CPU that we use +@@ -1383,7 +1459,8 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, + reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; + reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; + reg->gpu_alloc->imported.umm.need_sync = need_sync; +- reg->extent = 0; ++ reg->gpu_alloc->imported.umm.kctx = kctx; ++ reg->extension = 0; + + if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + int err; +@@ -1436,6 +1513,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( + u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); + struct kbase_alloc_import_user_buf *user_buf; + struct page **pages = NULL; ++ int write; + + /* Flag supported only for dma-buf imported memory */ + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) +@@ -1478,7 +1556,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* + * 64-bit tasks require us to reserve VA on the CPU that we use +@@ -1547,33 +1625,33 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } + +- down_read(¤t->mm->mmap_sem); ++ down_read(kbase_mem_get_process_mmap_lock()); ++ ++ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, + #if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE +- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, +- pages, NULL); ++ write ? FOLL_WRITE : 0, pages, NULL); + #else +- reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); ++ write, 0, pages, NULL); + #endif +-#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) ++#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE + faulted_pages = get_user_pages(address, *va_pages, +- reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); ++ write, 0, pages, NULL); + #else + faulted_pages = get_user_pages(address, *va_pages, +- reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, +- pages, NULL); ++ write ? FOLL_WRITE : 0, pages, NULL); + #endif + +- up_read(¤t->mm->mmap_sem); ++ up_read(kbase_mem_get_process_mmap_lock()); + + if (faulted_pages != *va_pages) + goto fault_mismatch; + + reg->gpu_alloc->nents = 0; +- reg->extent = 0; ++ reg->extension = 0; + + if (pages) { + struct device *dev = kctx->kbdev->dev; +@@ -1672,10 +1750,11 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + /* calculate the number of pages this alias will cover */ + *num_pages = nents * stride; + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* 64-bit tasks must MMAP anyway, but not expose this address to +- * clients */ ++ * clients ++ */ + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, + *num_pages, +@@ -1721,7 +1800,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the +- stride */ ++ * stride ++ */ + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + } else { + struct kbase_va_region *aliasing_reg; +@@ -1736,6 +1816,15 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + goto bad_handle; /* Not found/already free */ + if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + goto bad_handle; /* Ephemeral region */ ++ if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE) ++ goto bad_handle; /* JIT regions can't be ++ * aliased. NO_USER_FREE flag ++ * covers the entire lifetime ++ * of JIT regions. The other ++ * types of regions covered ++ * by this flag also shall ++ * not be aliased. ++ */ + if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) + goto bad_handle; /* GPU uncached memory */ + if (!aliasing_reg->gpu_alloc) +@@ -1743,16 +1832,18 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto bad_handle; /* Not a native alloc */ + if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) +- goto bad_handle; +- /* Non-coherent memory cannot alias +- coherent memory, and vice versa.*/ ++ goto bad_handle; /* Non-coherent memory cannot ++ * alias coherent memory, and ++ * vice versa. ++ */ + + /* check size against stride */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the +- stride */ ++ * stride ++ */ + + alloc = aliasing_reg->gpu_alloc; + +@@ -1765,10 +1856,22 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; ++ ++ /* Ensure the underlying alloc is marked as being ++ * mapped at >1 different GPU VA immediately, even ++ * though mapping might not happen until later. ++ * ++ * Otherwise, we would (incorrectly) allow shrinking of ++ * the source region (aliasing_reg) and so freeing the ++ * physical pages (without freeing the entire alloc) ++ * whilst we still hold an implicit reference on those ++ * physical pages. ++ */ ++ kbase_mem_phy_alloc_gpu_mapped(alloc); + } + } + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { +@@ -1803,11 +1906,15 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + + return gpu_va; + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + no_cookie: + #endif + no_mmap: + bad_handle: ++ /* Marking the source allocs as not being mapped on the GPU and putting ++ * them is handled by putting reg's allocs, so no rollback of those ++ * actions is done here. ++ */ + kbase_gpu_vm_unlock(kctx); + no_aliased_array: + invalid_flags: +@@ -1887,7 +1994,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + sizeof(user_buffer))) { + reg = NULL; + } else { +-#ifdef CONFIG_COMPAT ++#if IS_ENABLED(CONFIG_COMPAT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + uptr = compat_ptr(user_buffer.ptr); + else +@@ -2024,7 +2131,7 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, + int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) + { + u64 old_pages; +- u64 delta; ++ u64 delta = 0; + int res = -EINVAL; + struct kbase_va_region *reg; + bool read_locked = false; +@@ -2037,7 +2144,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) + return -EINVAL; + } + +- down_write(¤t->mm->mmap_sem); ++ down_write(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ +@@ -2054,13 +2161,27 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) + if (0 == (reg->flags & KBASE_REG_GROWABLE)) + goto out_unlock; + ++ if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ++ goto out_unlock; ++ + /* Would overflow the VA region */ + if (new_pages > reg->nr_pages) + goto out_unlock; + +- /* can't be mapped more than once on the GPU */ ++ /* Can't shrink when physical pages are mapped to different GPU ++ * VAs. The code doesn't support looking up: ++ * - all physical pages assigned to different GPU VAs ++ * - CPU mappings for the physical pages at different vm_pgoff ++ * (==GPU VA) locations. ++ * ++ * Note that for Native allocs mapped at multiple GPU VAs, growth of ++ * such allocs is not a supported use-case. ++ */ + if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) + goto out_unlock; ++ ++ if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) ++ goto out_unlock; + /* can't grow regions which are ephemeral */ + if (reg->flags & KBASE_REG_DONT_NEED) + goto out_unlock; +@@ -2085,7 +2206,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) + * No update to the mm so downgrade the writer lock to a read + * lock so other readers aren't blocked after this point. + */ +- downgrade_write(¤t->mm->mmap_sem); ++ downgrade_write(kbase_mem_get_process_mmap_lock()); + read_locked = true; + + /* Allocate some more pages */ +@@ -2127,9 +2248,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) + out_unlock: + kbase_gpu_vm_unlock(kctx); + if (read_locked) +- up_read(¤t->mm->mmap_sem); ++ up_read(kbase_mem_get_process_mmap_lock()); + else +- up_write(¤t->mm->mmap_sem); ++ up_write(kbase_mem_get_process_mmap_lock()); + + return res; + } +@@ -2360,11 +2481,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, + * See MIDBASE-1057 + */ + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; +-#else +- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +-#endif + vma->vm_ops = &kbase_vm_ops; + vma->vm_private_data = map; + +@@ -2510,16 +2627,14 @@ out: + + void kbase_os_mem_map_lock(struct kbase_context *kctx) + { +- struct mm_struct *mm = current->mm; + (void)kctx; +- down_read(&mm->mmap_sem); ++ down_read(kbase_mem_get_process_mmap_lock()); + } + + void kbase_os_mem_map_unlock(struct kbase_context *kctx) + { +- struct mm_struct *mm = current->mm; + (void)kctx; +- up_read(&mm->mmap_sem); ++ up_read(kbase_mem_get_process_mmap_lock()); + } + + static int kbasep_reg_mmap(struct kbase_context *kctx, +@@ -2547,7 +2662,8 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, + /* incorrect mmap size */ + /* leave the cookie for a potential later + * mapping, or to be reclaimed later when the +- * context is freed */ ++ * context is freed ++ */ + err = -ENOMEM; + goto out; + } +@@ -2576,6 +2692,11 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, + kctx->pending_regions[cookie] = NULL; + bitmap_set(kctx->cookies, cookie, 1); + ++#if MALI_USE_CSF ++ if (reg->flags & KBASE_REG_CSF_EVENT) ++ kbase_link_event_mem_page(kctx, reg); ++#endif ++ + /* + * Overwrite the offset with the region start_pfn, so we effectively + * map from offset 0 in the region. However subtract the aligned +@@ -2595,7 +2716,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, + { + struct kbase_va_region *reg = NULL; + void *kaddr = NULL; +- size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; ++ size_t nr_pages = vma_pages(vma); + int err = 0; + int free_on_close = 0; + struct device *dev = kctx->kbdev->dev; +@@ -2608,7 +2729,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, + if (!(vma->vm_flags & VM_WRITE)) + vma->vm_flags &= ~VM_MAYWRITE; + +- if (0 == nr_pages) { ++ if (nr_pages == 0) { + err = -EINVAL; + goto out; + } +@@ -2629,7 +2750,8 @@ int kbase_context_mmap(struct kbase_context *const kctx, + /* if not the MTP, verify that the MTP has been mapped */ + rcu_read_lock(); + /* catches both when the special page isn't present or +- * when we've forked */ ++ * when we've forked ++ */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + err = -EINVAL; + rcu_read_unlock(); +@@ -2646,16 +2768,30 @@ int kbase_context_mmap(struct kbase_context *const kctx, + case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): + /* MMU dump */ + err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); +- if (0 != err) ++ if (err != 0) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + break; ++#if MALI_USE_CSF ++ case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): ++ kbase_gpu_vm_unlock(kctx); ++ err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); ++ goto out; ++ case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) ... ++ PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { ++ kbase_gpu_vm_unlock(kctx); ++ mutex_lock(&kctx->csf.lock); ++ err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); ++ mutex_unlock(&kctx->csf.lock); ++ goto out; ++ } ++#endif + case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... + PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { + err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, + &aligned_offset); +- if (0 != err) ++ if (err != 0) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; +@@ -2728,8 +2864,21 @@ int kbase_context_mmap(struct kbase_context *const kctx, + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { + /* MMU dump - userspace should now have a reference on +- * the pages, so we can now free the kernel mapping */ ++ * the pages, so we can now free the kernel mapping ++ */ + vfree(kaddr); ++ /* CPU mapping of GPU allocations have GPU VA as the vm_pgoff ++ * and that is used to shrink the mapping when the commit size ++ * is reduced. So vm_pgoff for CPU mapping created to get the ++ * snapshot of GPU page tables shall not match with any GPU VA. ++ * That can be ensured by setting vm_pgoff as vma->vm_start ++ * because, ++ * - GPU VA of any SAME_VA allocation cannot match with ++ * vma->vm_start, as CPU VAs are unique. ++ * - GPU VA of CUSTOM_VA allocations are outside the CPU ++ * virtual address space. ++ */ ++ vma->vm_pgoff = PFN_DOWN(vma->vm_start); + } + + out_unlock: +@@ -2824,8 +2973,8 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, + + /* Note: enforcing a RO prot_request onto prot is not done, since: + * - CPU-arch-specific integration required +- * - kbase_vmap() requires no access checks to be made/enforced */ +- ++ * - kbase_vmap() requires no access checks to be made/enforced ++ */ + cpu_addr = vmap(pages, page_count, VM_MAP, prot); + + kfree(pages); +@@ -2846,6 +2995,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, + if (map->sync_needed) + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + ++ kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); + return 0; + } + +@@ -2901,7 +3051,8 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + * be made. + * + * As mentioned in kbase_vmap_prot() this means that a kernel-side +- * CPU-RO mapping is not enforced to allow this to work */ ++ * CPU-RO mapping is not enforced to allow this to work ++ */ + return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); + } + KBASE_EXPORT_TEST_API(kbase_vmap); +@@ -2915,6 +3066,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + if (map->sync_needed) + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + ++ kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc); + map->offset_in_page = 0; + map->cpu_pages = NULL; + map->gpu_pages = NULL; +@@ -2933,7 +3085,7 @@ KBASE_EXPORT_TEST_API(kbase_vunmap); + + static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) + { +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) ++#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) + /* To avoid the build breakage due to an unexported kernel symbol + * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, + * we inline here the equivalent of 'add_mm_counter()' from linux +@@ -3017,14 +3169,319 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ + + /* no real access */ + vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; ++ vma->vm_ops = &kbase_vm_special_ops; ++ vma->vm_private_data = kctx; ++ ++ return 0; ++} ++ ++#if MALI_USE_CSF ++static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, ++ struct kbase_queue *queue) ++{ ++ lockdep_assert_held(&kbdev->csf.reg_lock); ++ ++ /* Return the real Hw doorbell page if queue has been ++ * assigned one, otherwise a dummy page. Always return the ++ * dummy page in no mali builds. ++ */ ++ if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) ++ return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); ++ return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET + ++ (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); ++} ++ ++static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma) ++{ ++ WARN(1, "Unexpected attempt to clone private vma\n"); ++ vma->vm_private_data = NULL; ++} ++ ++static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) ++{ ++ struct kbase_queue *queue = vma->vm_private_data; ++ struct kbase_context *kctx; ++ struct kbase_device *kbdev; ++ int err; ++ bool reset_prevented = false; ++ ++ if (WARN_ON(!queue)) ++ return; ++ ++ kctx = queue->kctx; ++ kbdev = kctx->kbdev; ++ ++ err = kbase_reset_gpu_prevent_and_wait(kbdev); ++ if (err) ++ dev_warn( ++ kbdev->dev, ++ "Unsuccessful GPU reset detected when unbinding queue (csi_index=%d), attempting to unbind regardless", ++ queue->csi_index); ++ else ++ reset_prevented = true; ++ ++ mutex_lock(&kctx->csf.lock); ++ kbase_csf_queue_unbind(queue); ++ mutex_unlock(&kctx->csf.lock); ++ ++ if (reset_prevented) ++ kbase_reset_gpu_allow(kbdev); ++ ++ /* Now as the vma is closed, drop the reference on mali device file */ ++ fput(kctx->filp); ++} ++ ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_area_struct *vma, ++ struct vm_fault *vmf) ++{ + #else +- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; ++static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; + #endif +- vma->vm_ops = &kbase_vm_special_ops; ++ struct kbase_queue *queue = vma->vm_private_data; ++ unsigned long doorbell_cpu_addr, input_cpu_addr, output_cpu_addr; ++ unsigned long doorbell_page_pfn, input_page_pfn, output_page_pfn; ++ pgprot_t doorbell_pgprot, input_page_pgprot, output_page_pgprot; ++ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); ++ vm_fault_t ret; ++ struct kbase_device *kbdev; ++ struct memory_group_manager_device *mgm_dev; ++ ++ /* Few sanity checks up front */ ++ if ((nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || ++ (vma->vm_pgoff != queue->db_file_offset)) ++ return VM_FAULT_SIGBUS; ++ ++ mutex_lock(&queue->kctx->csf.lock); ++ kbdev = queue->kctx->kbdev; ++ mgm_dev = kbdev->mgm_dev; ++ ++ /* Always map the doorbell page as uncached */ ++ doorbell_pgprot = pgprot_device(vma->vm_page_prot); ++ ++#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ ++ ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ ++ (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) ++ vma->vm_page_prot = doorbell_pgprot; ++ input_page_pgprot = doorbell_pgprot; ++ output_page_pgprot = doorbell_pgprot; ++#else ++ if (kbdev->system_coherency == COHERENCY_NONE) { ++ input_page_pgprot = pgprot_writecombine(vma->vm_page_prot); ++ output_page_pgprot = pgprot_writecombine(vma->vm_page_prot); ++ } else { ++ input_page_pgprot = vma->vm_page_prot; ++ output_page_pgprot = vma->vm_page_prot; ++ } ++#endif ++ ++ doorbell_cpu_addr = vma->vm_start; ++ ++#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE ++ if ((unsigned long)vmf->virtual_address == doorbell_cpu_addr) { ++#else ++ if (vmf->address == doorbell_cpu_addr) { ++#endif ++ mutex_lock(&kbdev->csf.reg_lock); ++ doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr, ++ doorbell_page_pfn, doorbell_pgprot); ++ mutex_unlock(&kbdev->csf.reg_lock); ++ } else { ++ /* Map the Input page */ ++ input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE; ++ input_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[0])); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_IO, vma, input_cpu_addr, ++ input_page_pfn, input_page_pgprot); ++ if (ret != VM_FAULT_NOPAGE) ++ goto exit; ++ ++ /* Map the Output page */ ++ output_cpu_addr = input_cpu_addr + PAGE_SIZE; ++ output_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[1])); ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_IO, vma, output_cpu_addr, ++ output_page_pfn, output_page_pgprot); ++ } ++ ++exit: ++ mutex_unlock(&queue->kctx->csf.lock); ++ return ret; ++} ++ ++static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { ++ .open = kbase_csf_user_io_pages_vm_open, ++ .close = kbase_csf_user_io_pages_vm_close, ++ .fault = kbase_csf_user_io_pages_vm_fault ++}; ++ ++/* Program the client process's page table entries to map the pair of ++ * input/output pages & Hw doorbell page. The caller should have validated that ++ * vma->vm_pgoff maps to the range of csf cookies. ++ */ ++static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, ++ struct vm_area_struct *vma) ++{ ++ unsigned long cookie = ++ vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); ++ size_t nr_pages = vma_pages(vma); ++ struct kbase_queue *queue; ++ int err = 0; ++ ++ lockdep_assert_held(&kctx->csf.lock); ++ ++ queue = kctx->csf.user_pages_info[cookie]; ++ ++ /* Looks like the bind has been aborted */ ++ if (!queue) ++ return -EINVAL; ++ ++ if (WARN_ON(test_bit(cookie, kctx->csf.cookies))) ++ return -EINVAL; ++ ++ /* no need for the cookie anymore */ ++ kctx->csf.user_pages_info[cookie] = NULL; ++ bitmap_set(kctx->csf.cookies, cookie, 1); ++ ++ /* Reset the handle to avoid (re)freeing the cookie (which can ++ * now get re-assigned) on unbind. ++ */ ++ queue->handle = BASEP_MEM_INVALID_HANDLE; ++ ++ if (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) { ++ err = -EINVAL; ++ goto map_failed; ++ } ++ ++ err = kbase_csf_alloc_command_stream_user_pages(kctx, queue); ++ if (err) ++ goto map_failed; ++ ++ vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; ++ /* TODO use VM_MIXEDMAP, since it is more appropriate as both types of ++ * memory with and without "struct page" backing are being inserted here. ++ * Hw Doorbell pages comes from the device register area so kernel does ++ * not use "struct page" for them. ++ */ ++ vma->vm_flags |= VM_PFNMAP; ++ ++ vma->vm_ops = &kbase_csf_user_io_pages_vm_ops; ++ vma->vm_private_data = queue; ++ ++ /* Make vma point to the special internal file, but don't drop the ++ * reference on mali device file (that would be done later when the ++ * vma is closed). ++ */ ++ vma->vm_file = kctx->kbdev->csf.db_filp; ++ get_file(vma->vm_file); ++ /* Also adjust the vm_pgoff */ ++ vma->vm_pgoff = queue->db_file_offset; ++ ++ return 0; ++ ++map_failed: ++ /* The queue cannot have got to KBASE_CSF_QUEUE_BOUND state if we ++ * reached here, so safe to use a variant of unbind that only works on ++ * stopped queues ++ * ++ * This is so we don't enter the CSF scheduler from this path. ++ */ ++ kbase_csf_queue_unbind_stopped(queue); ++ ++ return err; ++} ++ ++static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) ++{ ++ struct kbase_context *kctx = vma->vm_private_data; ++ ++ WARN_ON(!kctx->csf.user_reg_vma); ++ ++ kctx->csf.user_reg_vma = NULL; ++} ++ ++#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) ++static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_area_struct *vma, ++ struct vm_fault *vmf) ++{ ++#else ++static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++#endif ++ struct kbase_context *kctx = vma->vm_private_data; ++ struct kbase_device *kbdev = kctx->kbdev; ++ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; ++ unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); ++ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); ++ vm_fault_t ret = VM_FAULT_SIGBUS; ++ ++ /* Few sanity checks up front */ ++ if (WARN_ON(nr_pages != 1) || ++ WARN_ON(vma != kctx->csf.user_reg_vma) || ++ WARN_ON(vma->vm_pgoff != ++ PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) ++ return VM_FAULT_SIGBUS; ++ ++ mutex_lock(&kbdev->pm.lock); ++ ++ /* Don't map in the actual register page if GPU is powered down. ++ * Always map in the dummy page in no mali builds. ++ */ ++ if (!kbdev->pm.backend.gpu_powered) ++ pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); ++ ++ ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, ++ KBASE_MEM_GROUP_CSF_FW, vma, ++ vma->vm_start, pfn, ++ vma->vm_page_prot); ++ ++ mutex_unlock(&kbdev->pm.lock); ++ ++ return ret; ++} ++ ++static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { ++ .close = kbase_csf_user_reg_vm_close, ++ .fault = kbase_csf_user_reg_vm_fault ++}; ++ ++static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, ++ struct vm_area_struct *vma) ++{ ++ size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); ++ ++ /* Few sanity checks */ ++ if (kctx->csf.user_reg_vma) ++ return -EBUSY; ++ ++ if (nr_pages != 1) ++ return -EINVAL; ++ ++ if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) ++ return -EPERM; ++ ++ /* Map uncached */ ++ vma->vm_page_prot = pgprot_device(vma->vm_page_prot); ++ ++ vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; ++ ++ /* User register page comes from the device register area so ++ * "struct page" isn't available for it. ++ */ ++ vma->vm_flags |= VM_PFNMAP; ++ ++ kctx->csf.user_reg_vma = vma; ++ ++ vma->vm_ops = &kbase_csf_user_reg_vm_ops; + vma->vm_private_data = kctx; + + return 0; + } + ++#endif /* MALI_USE_CSF */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +index cd094b3..36159c1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010, 2012-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,21 +17,16 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_mem_linux.h + * Base kernel memory APIs, Linux implementation. + */ + + #ifndef _KBASE_MEM_LINUX_H_ + #define _KBASE_MEM_LINUX_H_ + +-/** A HWC dump mapping */ ++/* A HWC dump mapping */ + struct kbase_hwc_dma_mapping { + void *cpu_va; + dma_addr_t dma_pa; +@@ -43,7 +39,7 @@ struct kbase_hwc_dma_mapping { + * @kctx: The kernel context + * @va_pages: The number of pages of virtual address space to reserve + * @commit_pages: The number of physical pages to allocate upfront +- * @extent: The number of extra pages to allocate on each GPU fault which ++ * @extension: The number of extra pages to allocate on each GPU fault which + * grows the region. + * @flags: bitmask of BASE_MEM_* flags to convey special requirements & + * properties for the new allocation. +@@ -53,8 +49,8 @@ struct kbase_hwc_dma_mapping { + * Return: 0 on success or error code + */ + struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, +- u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, +- u64 *gpu_va); ++ u64 va_pages, u64 commit_pages, ++ u64 extension, u64 *flags, u64 *gpu_va); + + /** + * kbase_mem_query - Query properties of a GPU memory region +@@ -194,8 +190,8 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + * Take the provided region and make all the physical pages within it + * reclaimable by the kernel, updating the per-process VM stats as well. + * Remove any CPU mappings (as these can't be removed in the shrinker callback +- * as mmap_sem might already be taken) but leave the GPU mapping intact as +- * and until the shrinker reclaims the allocation. ++ * as mmap_sem/mmap_lock might already be taken) but leave the GPU mapping ++ * intact as and until the shrinker reclaims the allocation. + * + * Note: Must be called with the region lock of the containing context. + */ +@@ -461,4 +457,18 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, + } + #endif + ++/** ++ * kbase_mem_get_process_mmap_lock - Return the mmap lock for the current process ++ * ++ * Return: the mmap lock for the current process ++ */ ++static inline struct rw_semaphore *kbase_mem_get_process_mmap_lock(void) ++{ ++#if KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE ++ return ¤t->mm->mmap_sem; ++#else /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ ++ return ¤t->mm->mmap_lock; ++#endif /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ ++} ++ + #endif /* _KBASE_MEM_LINUX_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h +index 7011603..3f260bf 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #ifndef _KBASE_MEM_LOWLEVEL_H + #define _KBASE_MEM_LOWLEVEL_H + +@@ -31,9 +28,7 @@ + + #include + +-/** +- * @brief Flags for kbase_phy_allocator_pages_alloc +- */ ++/* Flags for kbase_phy_allocator_pages_alloc */ + #define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ + #define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ + #define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +index 0723e32..a11da82 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -154,20 +153,12 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, + struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) + { + struct page *p; +- gfp_t gfp; ++ gfp_t gfp = GFP_HIGHUSER | __GFP_ZERO; + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; + dma_addr_t dma_addr; + int i; + +-#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ +- LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) +- /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ +- gfp = GFP_USER | __GFP_ZERO; +-#else +- gfp = GFP_HIGHUSER | __GFP_ZERO; +-#endif +- + /* don't warn on higher order failures */ + if (pool->order) + gfp |= __GFP_NOWARN; +@@ -318,7 +309,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) + + kbase_mem_pool_unlock(pool); + } +- ++KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); + + static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +@@ -364,17 +355,6 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, + return freed; + } + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +-static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, +- struct shrink_control *sc) +-{ +- if (sc->nr_to_scan == 0) +- return kbase_mem_pool_reclaim_count_objects(s, sc); +- +- return kbase_mem_pool_reclaim_scan_objects(s, sc); +-} +-#endif +- + int kbase_mem_pool_init(struct kbase_mem_pool *pool, + const struct kbase_mem_pool_config *config, + unsigned int order, +@@ -398,19 +378,13 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, + spin_lock_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->page_list); + +- /* Register shrinker */ +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +- pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; +-#else + pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; + pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; +-#endif + pool->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : +- * struct shrinker does not define batch */ +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) ++ * struct shrinker does not define batch ++ */ + pool->reclaim.batch = 0; +-#endif + register_shrinker(&pool->reclaim); + + pool_dbg(pool, "initialized\n"); +@@ -830,8 +804,8 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + +- kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, +- dirty); ++ kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, ++ dirty); + + i += nr_to_pool; + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c +index 5879fdf..cfb43b0 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +index 2932945..207b585 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_MEM_POOL_DEBUGFS_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c +index aa25548..8d7bb4d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h +index 0484f59..38fd4ca 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_MEM_POOL_GROUP_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +index 5752d4a..ea8e34b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,23 +17,23 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + +-/** Show callback for the @c mem_profile debugfs file. ++/** ++ * Show callback for the @c mem_profile debugfs file. + * + * This function is called to get the contents of the @c mem_profile debugfs + * file. This is a report of current memory usage and distribution in userspace. + * +- * @param sfile The debugfs entry +- * @param data Data associated with the entry ++ * @sfile: The debugfs entry ++ * @data: Data associated with the entry + * +- * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise ++ * Return: 0 if it successfully prints data in debugfs entry file, non-zero ++ * otherwise + */ + static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) + { +@@ -71,6 +72,11 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = { + int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) + { ++#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) ++ const mode_t mode = 0444; ++#else ++ const mode_t mode = 0400; ++#endif + int err = 0; + + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) /* not initialized */ +@@ -84,7 +90,7 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { + err = -ENOMEM; +- } else if (!debugfs_create_file("mem_profile", 0444, ++ } else if (!debugfs_create_file("mem_profile", mode, + kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops)) { + err = -EAGAIN; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +index 1462247..093a65e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,14 +17,9 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_mem_profile_debugfs.h + * Header file for mem profiles entries in debugfs + * + */ +@@ -35,12 +31,17 @@ + #include + + /** +- * @brief Remove entry from Mali memory profile debugfs ++ * Remove entry from Mali memory profile debugfs ++ * @kctx: The context whose debugfs file @p data should be removed from + */ + void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + + /** +- * @brief Insert @p data to the debugfs file so it can be read by userspace ++ * Insert @p data to the debugfs file so it can be read by userspace ++ * @kctx: The context whose debugfs file @p data should be inserted to ++ * @data: A NULL-terminated string to be inserted to the debugfs file, ++ * without the trailing new line character ++ * @size: The length of the @p data string + * + * The function takes ownership of @p data and frees it later when new data + * is inserted. +@@ -48,10 +49,6 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + * If the debugfs entry corresponding to the @p kctx doesn't exist, + * an attempt will be made to create it. + * +- * @param kctx The context whose debugfs file @p data should be inserted to +- * @param data A NULL-terminated string to be inserted to the debugfs file, +- * without the trailing new line character +- * @param size The length of the @p data string + * @return 0 if @p data inserted correctly + * -EAGAIN in case of error + * @post @ref mem_profile_initialized will be set to @c true +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +index d55cc85..3184a98 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,9 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_mem_profile_debugfs_buf_size.h + * Header file for the size of the buffer to accumulate the histogram report text in + */ + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h +index ec52122..f0b385e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. +@@ -40,14 +39,14 @@ + * defined. See documentation below: + */ + +-/** ++/* + * The name of the variable where the result BLOB will be stored. + */ + #if !defined(MIPE_HEADER_BLOB_VAR_NAME) + #error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" + #endif + +-/** ++/* + * A compiler attribute for the BLOB variable. + * + * e.g. __attribute__((section("my_section"))) +@@ -58,6 +57,17 @@ + #define MIPE_HEADER_BLOB_VAR_ATTRIBUTE + #endif + ++/** ++ * A compiler attribute for packing structures ++ * ++ * e.g. __packed ++ * ++ * Default value is __attribute__((__packed__)) ++ */ ++#if !defined(MIPE_HEADER_PACKED_ATTRIBUTE) ++#define MIPE_HEADER_PACKED_ATTRIBUTE __attribute__((__packed__)) ++#endif ++ + /** + * MIPE stream id. + * +@@ -67,7 +77,7 @@ + #error "MIPE_HEADER_STREAM_ID must be defined!" + #endif + +-/** ++/* + * MIPE packet class. + * + * See enum tl_packet_class. +@@ -76,10 +86,11 @@ + #error "MIPE_HEADER_PKT_CLASS must be defined!" + #endif + +-/** ++/* + * The list of tracepoints to process. + * + * It should be defined as follows: ++ * + * #define MIPE_HEADER_TRACEPOINT_LIST \ + * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ +@@ -94,17 +105,18 @@ + #error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" + #endif + +-/** ++/* + * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. + */ + #if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) + #error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" + #endif + +-/** ++/* + * The list of enums to process. + * + * It should be defined as follows: ++ * + * #define MIPE_HEADER_ENUM_LIST \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * ENUM_DESC(enum_arg_name, enum_value) \ +@@ -117,7 +129,7 @@ + */ + #if defined(MIPE_HEADER_ENUM_LIST) + +-/** ++/* + * Tracepoint message ID used for enums declaration. + */ + #if !defined(MIPE_HEADER_ENUM_MSG_ID) +@@ -149,7 +161,7 @@ const struct + char _arg_types[sizeof(arg_types)]; \ + u32 _size_arg_names; \ + char _arg_names[sizeof(arg_names)]; \ +- } __attribute__ ((__packed__)) __ ## name; ++ } MIPE_HEADER_PACKED_ATTRIBUTE __ ## name; + + #define ENUM_DESC(arg_name, value) \ + struct { \ +@@ -159,13 +171,13 @@ const struct + u32 _value; \ + u32 _value_str_len; \ + char _value_str[sizeof(#value)]; \ +- } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value; ++ } MIPE_HEADER_PACKED_ATTRIBUTE __ ## arg_name ## _ ## value; + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST + #undef TRACEPOINT_DESC + #undef ENUM_DESC +-} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { ++} MIPE_HEADER_PACKED_ATTRIBUTE MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { + ._mipe_w0 = MIPE_PACKET_HEADER_W0( + TL_PACKET_FAMILY_TL, + MIPE_HEADER_PKT_CLASS, +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h +index 54667cf..c35ee61 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c +index 38ae46e..4554bee 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h +index 431b1f4..1eae2fc 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_NATIVE_MGM_H_ +@@ -25,7 +24,7 @@ + + #include + +-/** ++/* + * kbase_native_mgm_dev - Native memory group manager device + * + * An implementation of the memory group manager interface that is intended for +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c +index fbb090e..bf525ed 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -26,7 +25,6 @@ + #include + #include + +- + /* + * This file is included only for type definitions and functions belonging to + * specific platform folders. Do not add dependencies with symbols that are +@@ -41,14 +39,13 @@ static struct platform_device *mali_device; + + #ifndef CONFIG_OF + /** +- * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources ++ * Convert data in struct kbase_io_resources struct to Linux-specific resources ++ * @io_resources: Input IO resource data ++ * @linux_resources: Pointer to output array of Linux resource structures + * + * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function + * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. + * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. +- * +- * @param[in] io_resource Input IO resource data +- * @param[out] linux_resources Pointer to output array of Linux resource structures + */ + static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) + { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c +index b9ed8c3..de100dd 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,15 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_pm.c +- * Base kernel power management APIs ++ * DOC: Base kernel power management APIs + */ + + #include +@@ -33,12 +29,14 @@ + #include + + #include +-#include ++#include + + #ifdef CONFIG_MALI_ARBITER_SUPPORT + #include + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + ++#include ++ + int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) + { + return kbase_hwaccess_pm_powerup(kbdev, flags); +@@ -66,14 +64,14 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + kbase_pm_lock(kbdev); + + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) ++ if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, ++ suspend_handler)) { ++ kbase_pm_unlock(kbdev); + return 1; ++ } ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +- if (kbase_pm_is_suspending(kbdev) || +- kbase_pm_is_gpu_lost(kbdev)) { +-#else + if (kbase_pm_is_suspending(kbdev)) { +-#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count != 0) +@@ -101,6 +99,7 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + #ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ ++ kbase_clk_rate_trace_manager_gpu_active(kbdev); + } + + kbase_pm_unlock(kbdev); +@@ -128,6 +127,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) + if (c == 0) { + /* Last context has gone idle */ + kbase_hwaccess_pm_gpu_idle(kbdev); ++ kbase_clk_rate_trace_manager_gpu_idle(kbdev); + + /* Wake up anyone waiting for this to become 0 (e.g. suspend). + * The waiters must synchronize with us by locking the pm.lock +@@ -171,6 +171,7 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbdev->js_data.runpool_irq.submit_allowed = 0; + kbase_disjoint_state_up(kbdev); + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); +@@ -184,9 +185,14 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) + * all pm references + */ + ++#if !MALI_USE_CSF + /* Suspend job scheduler and associated components, so that it releases all +- * the PM active count references */ ++ * the PM active count references ++ */ + kbasep_js_suspend(kbdev); ++#else ++ kbase_csf_scheduler_pm_suspend(kbdev); ++#endif + + /* Wait for the active count to reach zero. This is not the same as + * waiting for a power down, since not all policies power down when this +@@ -221,14 +227,16 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) + + /* Initial active call, to power on the GPU/cores if needed */ + #ifdef CONFIG_MALI_ARBITER_SUPPORT +- (void)kbase_pm_context_active_handle_suspend(kbdev, +- (arb_gpu_start ? +- KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : +- KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)); ++ if (kbase_pm_context_active_handle_suspend(kbdev, ++ (arb_gpu_start ? ++ KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : ++ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) ++ return; + #else + kbase_pm_context_active(kbdev); + #endif + ++#if !MALI_USE_CSF + /* Resume any blocked atoms (which may cause contexts to be scheduled in + * and dependent atoms to run) + */ +@@ -238,6 +246,9 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) + * atoms + */ + kbasep_js_resume(kbdev); ++#else ++ kbase_csf_scheduler_pm_resume(kbdev); ++#endif + + /* Matching idle call, to power off the GPU/cores if we didn't actually + * need it and the policy doesn't want it on +@@ -245,9 +256,15 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) + kbase_pm_context_idle(kbdev); + + /* Re-enable GPU hardware counters */ ++#if MALI_USE_CSF ++ kbase_csf_scheduler_spin_lock(kbdev, &flags); ++ kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); ++ kbase_csf_scheduler_spin_unlock(kbdev, flags); ++#else + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++#endif + + /* Resume vinstr */ + kbase_vinstr_resume(kbdev->vinstr_ctx); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h +index 257f959..980a8d1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,14 +17,9 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_kbase_pm.h + * Power management API definitions + */ + +@@ -35,6 +31,13 @@ + #define PM_ENABLE_IRQS 0x01 + #define PM_HW_ISSUES_DETECT 0x02 + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++/* In the case that the GPU was granted by the Arbiter, it will have ++ * already been reset. The following flag ensures it is not reset ++ * twice. ++ */ ++#define PM_NO_RESET 0x04 ++#endif + + /** Initialize the power management framework. + * +@@ -59,12 +62,12 @@ int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); + + /** + * Halt the power management framework. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * + * Should ensure that no new interrupts are generated, + * but allow any currently running interrupt handlers to complete successfully. + * The GPU is forced off by the time this function returns, regardless of + * whether or not the active power policy asks for the GPU to be powered off. +- * +- * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void kbase_pm_halt(struct kbase_device *kbdev); + +@@ -154,6 +157,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev); + /** + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. +@@ -161,21 +165,18 @@ void kbase_pm_context_idle(struct kbase_device *kbdev); + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. +- * +- * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void kbase_pm_suspend(struct kbase_device *kbdev); + + /** + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * Also called when using VM arbiter, when GPU access has been granted. +- * +- * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ + void kbase_pm_resume(struct kbase_device *kbdev); + +@@ -192,8 +193,7 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data); + + /** + * kbase_pm_driver_suspend() - Put GPU and driver in suspend state +- * @param kbdev The kbase device structure for the device +- * (must be a valid pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. +@@ -212,8 +212,8 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev); + + /** + * kbase_pm_driver_resume() - Put GPU and driver in resume +- * @param kbdev The kbase device structure for the device +- * (must be a valid pointer) ++ * @kbdev: The kbase device structure for the device (must be a valid pointer) ++ * @arb_gpu_start: Arbiter has notified we can use GPU + * + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c +index 1d114a6..1e807d7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2016, 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2016, 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,18 +17,130 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase.h" +- + #include "mali_kbase_regs_history_debugfs.h" + +-#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) ++#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) + + #include + ++/** ++ * kbase_io_history_resize - resize the register access history buffer. ++ * ++ * @h: Pointer to a valid register history to resize ++ * @new_size: Number of accesses the buffer could hold ++ * ++ * A successful resize will clear all recent register accesses. ++ * If resizing fails for any reason (e.g., could not allocate memory, invalid ++ * buffer size) then the original buffer will be kept intact. ++ * ++ * @return 0 if the buffer was resized, failure otherwise ++ */ ++static int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) ++{ ++ struct kbase_io_access *old_buf; ++ struct kbase_io_access *new_buf; ++ unsigned long flags; ++ ++ if (!new_size) ++ goto out_err; /* The new size must not be 0 */ ++ ++ new_buf = vmalloc(new_size * sizeof(*h->buf)); ++ if (!new_buf) ++ goto out_err; ++ ++ spin_lock_irqsave(&h->lock, flags); ++ ++ old_buf = h->buf; ++ ++ /* Note: we won't bother with copying the old data over. The dumping ++ * logic wouldn't work properly as it relies on 'count' both as a ++ * counter and as an index to the buffer which would have changed with ++ * the new array. This is a corner case that we don't need to support. ++ */ ++ h->count = 0; ++ h->size = new_size; ++ h->buf = new_buf; ++ ++ spin_unlock_irqrestore(&h->lock, flags); ++ ++ vfree(old_buf); ++ ++ return 0; ++ ++out_err: ++ return -1; ++} ++ ++int kbase_io_history_init(struct kbase_io_history *h, u16 n) ++{ ++ h->enabled = false; ++ spin_lock_init(&h->lock); ++ h->count = 0; ++ h->size = 0; ++ h->buf = NULL; ++ if (kbase_io_history_resize(h, n)) ++ return -1; ++ ++ return 0; ++} ++ ++void kbase_io_history_term(struct kbase_io_history *h) ++{ ++ vfree(h->buf); ++ h->buf = NULL; ++} ++ ++void kbase_io_history_add(struct kbase_io_history *h, ++ void __iomem const *addr, u32 value, u8 write) ++{ ++ struct kbase_io_access *io; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&h->lock, flags); ++ ++ io = &h->buf[h->count % h->size]; ++ io->addr = (uintptr_t)addr | write; ++ io->value = value; ++ ++h->count; ++ /* If count overflows, move the index by the buffer size so the entire ++ * buffer will still be dumped later ++ */ ++ if (unlikely(!h->count)) ++ h->count = h->size; ++ ++ spin_unlock_irqrestore(&h->lock, flags); ++} ++ ++void kbase_io_history_dump(struct kbase_device *kbdev) ++{ ++ struct kbase_io_history *const h = &kbdev->io_history; ++ size_t i; ++ size_t iters; ++ unsigned long flags; ++ ++ if (!unlikely(h->enabled)) ++ return; ++ ++ spin_lock_irqsave(&h->lock, flags); ++ ++ dev_err(kbdev->dev, "Register IO History:"); ++ iters = (h->size > h->count) ? h->count : h->size; ++ dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, ++ h->count); ++ for (i = 0; i < iters; ++i) { ++ struct kbase_io_access *io = ++ &h->buf[(h->count - iters + i) % h->size]; ++ char const access = (io->addr & 1) ? 'w' : 'r'; ++ ++ dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i, ++ access, (unsigned long)(io->addr & ~0x1), io->value); ++ } ++ ++ spin_unlock_irqrestore(&h->lock, flags); ++} + + static int regs_history_size_get(void *data, u64 *val) + { +@@ -66,7 +179,7 @@ DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, + static int regs_history_show(struct seq_file *sfile, void *data) + { + struct kbase_io_history *const h = sfile->private; +- u16 i; ++ size_t i; + size_t iters; + unsigned long flags; + +@@ -85,8 +198,8 @@ static int regs_history_show(struct seq_file *sfile, void *data) + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + +- seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access, +- (unsigned long)(io->addr & ~0x1), io->value); ++ seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i, ++ access, (unsigned long)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); +@@ -95,7 +208,6 @@ out: + return 0; + } + +- + /** + * regs_history_open - open operation for regs_history debugfs file + * +@@ -109,7 +221,6 @@ static int regs_history_open(struct inode *in, struct file *file) + return single_open(file, ®s_history_show, in->i_private); + } + +- + static const struct file_operations regs_history_fops = { + .owner = THIS_MODULE, + .open = ®s_history_open, +@@ -118,7 +229,6 @@ static const struct file_operations regs_history_fops = { + .release = single_release, + }; + +- + void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) + { + debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, +@@ -131,6 +241,4 @@ void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) + kbdev->mali_debugfs_directory, &kbdev->io_history, + ®s_history_fops); + } +- +- +-#endif /* CONFIG_DEBUG_FS */ ++#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h +index a0078cb..3b181d3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -37,7 +36,31 @@ + + struct kbase_device; + +-#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) ++#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) ++ ++/** ++ * kbase_io_history_init - initialize data struct for register access history ++ * ++ * @h: The register history to initialize ++ * @n: The number of register accesses that the buffer could hold ++ * ++ * @return 0 if successfully initialized, failure otherwise ++ */ ++int kbase_io_history_init(struct kbase_io_history *h, u16 n); ++ ++/** ++ * kbase_io_history_term - uninit all resources for the register access history ++ * ++ * @h: The register history to terminate ++ */ ++void kbase_io_history_term(struct kbase_io_history *h); ++ ++/** ++ * kbase_io_history_dump - print the register history to the kernel ring buffer ++ * ++ * @kbdev: Pointer to kbase_device containing the register history to dump ++ */ ++void kbase_io_history_dump(struct kbase_device *kbdev); + + /** + * kbasep_regs_history_debugfs_init - add debugfs entries for register history +@@ -46,10 +69,7 @@ struct kbase_device; + */ + void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + +-#else /* CONFIG_DEBUG_FS */ +- +-#define kbasep_regs_history_debugfs_init CSTD_NOP +- +-#endif /* CONFIG_DEBUG_FS */ ++#else /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ ++#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ + + #endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h +index df72eec..292a29c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,142 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_RESET_GPU_H_ + #define _KBASE_RESET_GPU_H_ + ++/** ++ * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst ++ * the current thread is accessing the GPU, ++ * and wait for any in-flight reset to ++ * finish. ++ * @kbdev: Device pointer ++ * ++ * This should be used when a potential access to the HW is going to be made ++ * from a non-atomic context. ++ * ++ * It will wait for any in-flight reset to finish before returning. Hence, ++ * correct lock ordering must be observed with respect to the calling thread ++ * and the reset worker thread. ++ * ++ * This does not synchronize general access to the HW, and so multiple threads ++ * can prevent GPU reset concurrently, whilst not being serialized. This is ++ * advantageous as the threads can make this call at points where they do not ++ * know for sure yet whether they will indeed access the GPU (for example, to ++ * respect lock ordering), without unnecessarily blocking others. ++ * ++ * Threads must still use other synchronization to ensure they access the HW ++ * consistently, at a point where they are certain it needs to be accessed. ++ * ++ * On success, ensure that when access to the GPU by the caller thread has ++ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to ++ * happen. ++ * ++ * This may return a failure in cases such as a previous failure to reset the ++ * GPU within a reasonable time. If that happens, the GPU might be ++ * non-operational and the caller should not attempt any further access. ++ * ++ * Note: ++ * For atomic context, instead check kbase_reset_gpu_is_active(). ++ * ++ * Return: 0 on success, or negative error code on failure. ++ */ ++int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev); ++ ++/** ++ * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting ++ * whilst the current thread is accessing the ++ * GPU, unless a reset is already in progress. ++ * @kbdev: Device pointer ++ * ++ * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an ++ * existing reset to complete. This can be used on codepaths that the Reset ++ * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would ++ * otherwise deadlock. ++ * ++ * Instead, a reset that is currently happening will cause this function to ++ * return an error code indicating that, and further resets will not have been ++ * prevented. ++ * ++ * In such cases, the caller must check for -EAGAIN, and take similar actions ++ * as for handling reset in atomic context. That is, they must cancel any ++ * actions that depended on reset being prevented, possibly deferring them ++ * until after the reset. ++ * ++ * Otherwise a successful return means that the caller can continue its actions ++ * safely in the knowledge that reset is prevented, and the reset worker will ++ * correctly wait instead of deadlocking against this thread. ++ * ++ * On success, ensure that when access to the GPU by the caller thread has ++ * finished, that it calls kbase_reset_gpu_allow() again to allow resets to ++ * happen. ++ * ++ * Refer to kbase_reset_gpu_prevent_and_wait() for more information. ++ * ++ * Return: 0 on success. -EAGAIN if a reset is currently happening. Other ++ * negative error codes on failure. ++ */ ++int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev); ++ ++/** ++ * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been ++ * previously prevented. ++ * @kbdev: Device pointer ++ * ++ * This should be used when a potential access to the HW has finished from a ++ * non-atomic context. ++ * ++ * It must be used from the same thread that originally made a previously call ++ * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another ++ * thread. ++ */ ++void kbase_reset_gpu_allow(struct kbase_device *kbdev); ++ ++/** ++ * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is ++ * currently prevented by the current ++ * thread. ++ * @kbdev: Device pointer ++ * ++ * Make debugging checks that the current thread has made a call to ++ * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to ++ * kbase_reset_gpu_allow(). ++ * ++ * CONFIG_LOCKDEP is required to prove that reset is indeed ++ * prevented. Otherwise only limited debugging checks can be made. ++ */ ++void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); ++ ++/** ++ * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that ++ * either GPU reset previously ++ * failed, or is currently ++ * prevented. ++ * ++ * @kbdev: Device pointer ++ * ++ * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where ++ * reset was not prevented due to a failure, yet we still need to execute the ++ * cleanup code following. ++ * ++ * Cleanup code following this call must handle any inconsistent state modified ++ * by the failed GPU reset, and must timeout any blocking operations instead of ++ * waiting forever. ++ */ ++void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); ++ ++/** ++ * Flags for kbase_prepare_to_reset_gpu ++ */ ++#define RESET_FLAGS_NONE ((unsigned int)0) ++/* This reset should be treated as an unrecoverable error by HW counter logic */ ++#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) ++ + /** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. + * @kbdev: Device pointer ++ * @flags: Bitfield indicating impact of reset (see flag defines) + * + * Caller is expected to hold the kbdev->hwaccess_lock. + * +@@ -34,18 +161,20 @@ + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); ++bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, ++ unsigned int flags); + + /** + * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. + * @kbdev: Device pointer +- * ++ * @flags: Bitfield indicating impact of reset (see flag defines) ++ + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); ++bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); + + /** + * kbase_reset_gpu - Reset the GPU +@@ -95,8 +224,13 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev); + * kbase_reset_gpu_is_active - Reports if the GPU is being reset + * @kbdev: Device pointer + * +- * Return: True if the GPU is in the process of being reset (or if the reset of +- * GPU failed, not applicable to Job Manager GPUs). ++ * Any changes made to the HW when this returns true may be lost, overwritten ++ * or corrupted. ++ * ++ * Note that unless appropriate locks are held when using this function, the ++ * state could change immediately afterwards. ++ * ++ * Return: True if the GPU is in the process of being reset. + */ + bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); + +@@ -126,14 +260,4 @@ int kbase_reset_gpu_init(struct kbase_device *kbdev); + */ + void kbase_reset_gpu_term(struct kbase_device *kbdev); + +-/** +- * kbase_reset_gpu_register_complete_cb - Register the callback function to be +- * invoked on completion of GPU reset. +- * +- * @kbdev: Device pointer +- * @complete_callback: Pointer to the callback function +- */ +-void kbase_reset_gpu_register_complete_cb(struct kbase_device *kbdev, +- int (*complete_callback)(struct kbase_device *kbdev)); +- + #endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c +index b5c7b12..abbe8d5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,9 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + + #include + #include +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h +index 221eb21..d0086db 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,12 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #ifndef _KBASE_SMC_H_ + #define _KBASE_SMC_H_ + +-#ifdef CONFIG_ARM64 ++#if IS_ENABLED(CONFIG_ARM64) + + #include + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +index cbb0c76..bee3513 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include + + #include +@@ -30,8 +27,9 @@ + #include + #endif + #include +-#include ++#include + #include ++#include + #include + #include + #include +@@ -42,10 +40,9 @@ + #include + #include + ++#if !MALI_USE_CSF + /** +- * @file mali_kbase_softjobs.c +- * +- * This file implements the logic behind software only jobs that are ++ * DOC: This file implements the logic behind software only jobs that are + * executed within the driver rather than being handed over to the GPU. + */ + +@@ -136,7 +133,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) + void *user_result; + struct timespec64 ts; + struct base_dump_cpu_gpu_counters data; +- u64 system_time; ++ u64 system_time = 0ULL; + u64 cycle_counter; + u64 jc = katom->jc; + struct kbase_context *kctx = katom->kctx; +@@ -146,7 +143,11 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) + + /* Take the PM active reference as late as possible - otherwise, it could + * delay suspend until we process the atom (which may be at the end of a +- * long chain of dependencies */ ++ * long chain of dependencies ++ */ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + if (pm_active_err) { + struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; +@@ -164,6 +165,10 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) + + return pm_active_err; + } ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ else ++ atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, + &ts); +@@ -181,7 +186,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) + /* GPU_WR access is checked on the range for returning the result to + * userspace for the following reasons: + * - security, this is currently how imported user bufs are checked. +- * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ ++ * - userspace ddk guaranteed to assume region was mapped as GPU_WR ++ */ + user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); + if (!user_result) + return 0; +@@ -292,7 +298,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) + + if (!kbase_sync_fence_in_info_get(dep, &info)) { + dev_warn(dev, +- "\tVictim trigger atom %d fence [%p] %s: %s\n", ++ "\tVictim trigger atom %d fence [%pK] %s: %s\n", + kbase_jd_atom_id(kctx, dep), + info.fence, + info.name, +@@ -321,11 +327,11 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) + return; + } + +- dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", ++ dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", + kctx->tgid, kctx->id, + kbase_jd_atom_id(kctx, katom), + info.fence, timeout_ms); +- dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", ++ dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", + info.fence, info.name, + kbase_sync_status_string(info.status)); + +@@ -713,14 +719,16 @@ out_unlock: + + out_cleanup: + /* Frees allocated memory for kbase_debug_copy_job struct, including +- * members, and sets jc to 0 */ ++ * members, and sets jc to 0 ++ */ + kbase_debug_copy_finish(katom); + kfree(user_buffers); + + return ret; + } ++#endif /* !MALI_USE_CSF */ + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, + unsigned long page_num, struct page **page) + { +@@ -801,16 +809,16 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, + dma_to_copy = min(dma_buf->size, + (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); + ret = dma_buf_begin_cpu_access(dma_buf, +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) +- 0, dma_to_copy, ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) ++ 0, dma_to_copy, + #endif +- DMA_FROM_DEVICE); ++ DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + + for (i = 0; i < dma_to_copy/PAGE_SIZE && + target_page_nr < buf_data->nr_pages; i++) { +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + struct page *pg; + void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); + #else +@@ -822,20 +830,20 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, + buf_data->nr_pages, + &target_page_nr, offset); + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) ++#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + kunmap(pg); + #else + dma_buf_kunmap(dma_buf, i, extres_page); + #endif + if (ret) +- goto out_unlock; ++ break; + } + } + dma_buf_end_cpu_access(dma_buf, +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) +- 0, dma_to_copy, ++#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) ++ 0, dma_to_copy, + #endif +- DMA_FROM_DEVICE); ++ DMA_FROM_DEVICE); + break; + } + default: +@@ -846,6 +854,7 @@ out_unlock: + return ret; + } + ++#if !MALI_USE_CSF + static int kbase_debug_copy(struct kbase_jd_atom *katom) + { + struct kbase_debug_copy_buffer *buffers = katom->softjob_data; +@@ -863,6 +872,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) + + return 0; + } ++#endif /* !MALI_USE_CSF */ + + #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) + +@@ -899,7 +909,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, + if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) + return -EINVAL; + +-#if !MALI_JIT_PRESSURE_LIMIT ++#if !MALI_JIT_PRESSURE_LIMIT_BASE + /* If just-in-time memory allocation pressure limit feature is disabled, + * heap_info_gpu_addr must be zeroed-out + */ +@@ -907,21 +917,19 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, + return -EINVAL; + #endif + ++#if !MALI_USE_CSF + /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr + * cannot be 0 + */ + if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && + !info->heap_info_gpu_addr) + return -EINVAL; ++#endif /* !MALI_USE_CSF */ + + return 0; + } + +- +-#if (KERNEL_VERSION(3, 18, 63) > LINUX_VERSION_CODE) +-#define offsetofend(TYPE, MEMBER) \ +- (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +-#endif ++#if !MALI_USE_CSF + + /* + * Sizes of user data to copy for each just-in-time memory interface version +@@ -998,10 +1006,10 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) + ret = kbasep_jit_alloc_validate(kctx, info); + if (ret) + goto free_info; +- KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom, +- info->va_pages, info->commit_pages, info->extent, +- info->id, info->bin_id, info->max_allocations, +- info->flags, info->usage_id); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( ++ kbdev, katom, info->va_pages, info->commit_pages, ++ info->extension, info->id, info->bin_id, ++ info->max_allocations, info->flags, info->usage_id); + } + + katom->jit_blocked = false; +@@ -1016,7 +1024,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) + * though the region is valid it doesn't represent the + * same thing it used to. + * +- * Complete validation of va_pages, commit_pages and extent ++ * Complete validation of va_pages, commit_pages and extension + * isn't done here as it will be done during the call to + * kbase_mem_alloc. + */ +@@ -1091,14 +1099,19 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) + } + } + +-#if MALI_JIT_PRESSURE_LIMIT +- /** +- * If this is the only JIT_ALLOC atom in-flight then allow it to exceed +- * the defined pressure limit. ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ /* ++ * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit ++ * is disabled at the context scope, then bypass JIT pressure limit ++ * logic in kbase_jit_allocate(). + */ +- if (kctx->jit_current_allocations == 0) ++ if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) ++ || (kctx->jit_current_allocations == 0)) { + ignore_pressure_limit = true; +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++ } ++#else ++ ignore_pressure_limit = true; ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + if (kctx->jit_alloc[info->id]) { +@@ -1215,10 +1228,10 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) + MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); + #endif + +- KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom, +- info->gpu_alloc_addr, new_addr, info->flags, +- entry_mmu_flags, info->id, info->commit_pages, +- info->extent, info->va_pages); ++ KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( ++ kbdev, katom, info->gpu_alloc_addr, new_addr, ++ info->flags, entry_mmu_flags, info->id, ++ info->commit_pages, info->extension, info->va_pages); + kbase_vunmap(kctx, &mapping); + + kbase_trace_jit_report_gpu_mem(kctx, reg, +@@ -1358,12 +1371,16 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) + list_for_each_safe(i, tmp, &jit_pending_alloc_list) { + struct kbase_jd_atom *pending_atom = list_entry(i, + struct kbase_jd_atom, queue); ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); ++ kbase_kinstr_jm_atom_sw_start(pending_atom); + if (kbase_jit_allocate_process(pending_atom) == 0) { + /* Atom has completed */ + INIT_WORK(&pending_atom->work, + kbasep_jit_finish_worker); + queue_work(kctx->jctx.job_done_wq, &pending_atom->work); + } ++ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); ++ kbase_kinstr_jm_atom_sw_stop(pending_atom); + } + } + +@@ -1412,41 +1429,27 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) + struct base_external_resource_list *ext_res; + u64 count = 0; + size_t copy_size; +- int ret; + + user_ext_res = (__user struct base_external_resource_list *) + (uintptr_t) katom->jc; + + /* Fail the job if there is no info structure */ +- if (!user_ext_res) { +- ret = -EINVAL; +- goto fail; +- } ++ if (!user_ext_res) ++ return -EINVAL; + +- if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { +- ret = -EINVAL; +- goto fail; +- } ++ if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) ++ return -EINVAL; + + /* Is the number of external resources in range? */ +- if (!count || count > BASE_EXT_RES_COUNT_MAX) { +- ret = -EINVAL; +- goto fail; +- } ++ if (!count || count > BASE_EXT_RES_COUNT_MAX) ++ return -EINVAL; + + /* Copy the information for safe access and future storage */ + copy_size = sizeof(*ext_res); + copy_size += sizeof(struct base_external_resource) * (count - 1); +- ext_res = kzalloc(copy_size, GFP_KERNEL); +- if (!ext_res) { +- ret = -ENOMEM; +- goto fail; +- } +- +- if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { +- ret = -EINVAL; +- goto free_info; +- } ++ ext_res = memdup_user(user_ext_res, copy_size); ++ if (IS_ERR(ext_res)) ++ return PTR_ERR(ext_res); + + /* + * Overwrite the count with the first value incase it was changed +@@ -1457,11 +1460,6 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) + katom->softjob_data = ext_res; + + return 0; +- +-free_info: +- kfree(ext_res); +-fail: +- return ret; + } + + static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) +@@ -1538,6 +1536,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) + struct kbase_device *kbdev = kctx->kbdev; + + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); ++ kbase_kinstr_jm_atom_sw_start(katom); + + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); +@@ -1600,6 +1599,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) + + /* Atom is complete */ + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); ++ kbase_kinstr_jm_atom_sw_stop(katom); + return ret; + } + +@@ -1635,7 +1635,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) + struct base_fence fence; + int fd; + +- if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) ++ if (copy_from_user(&fence, ++ (__user void *)(uintptr_t)katom->jc, ++ sizeof(fence)) != 0) + return -EINVAL; + + fd = kbase_sync_fence_out_create(katom, +@@ -1644,7 +1646,8 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) + return -EINVAL; + + fence.basep.fd = fd; +- if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { ++ if (copy_to_user((__user void *)(uintptr_t)katom->jc, ++ &fence, sizeof(fence)) != 0) { + kbase_sync_fence_out_remove(katom); + kbase_sync_fence_close_fd(fd); + fence.basep.fd = -EINVAL; +@@ -1657,7 +1660,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) + struct base_fence fence; + int ret; + +- if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) ++ if (copy_from_user(&fence, ++ (__user void *)(uintptr_t)katom->jc, ++ sizeof(fence)) != 0) + return -EINVAL; + + /* Get a reference to the fence object */ +@@ -1776,6 +1781,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) + if (kbase_process_soft_job(katom_iter) == 0) { + kbase_finish_soft_job(katom_iter); + resched |= jd_done_nolock(katom_iter, NULL); ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ atomic_dec(&kbdev->pm.gpu_users_waiting); ++#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + } + mutex_unlock(&kctx->jctx.lock); + } +@@ -1783,3 +1791,4 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) + if (resched) + kbase_js_sched_all(kbdev); + } ++#endif /* !MALI_USE_CSF */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c +index 22caa4a..84784be 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c +@@ -1,11 +1,12 @@ +- /* ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* + * +- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,9 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ ++ + #include "mali_kbase_strings.h" + + #define KBASE_DRV_NAME "mali" +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h +index d2f1825..c3f94f9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + extern const char kbase_drv_name[]; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h +index 80b54d0..ad05cdf 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,14 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_sync.h +- * +- * This file contains our internal "API" for explicit fences. ++ * DOC: This file contains our internal "API" for explicit fences. + * It hides the implementation details of the actual explicit fence mechanism + * used (Android fences or sync file with DMA fences). + */ +@@ -31,11 +28,12 @@ + #ifndef MALI_KBASE_SYNC_H + #define MALI_KBASE_SYNC_H + ++#include + #include +-#ifdef CONFIG_SYNC ++#if IS_ENABLED(CONFIG_SYNC) + #include + #endif +-#ifdef CONFIG_SYNC_FILE ++#if IS_ENABLED(CONFIG_SYNC_FILE) + #include "mali_kbase_fence_defs.h" + #include + #endif +@@ -72,6 +70,7 @@ struct kbase_sync_fence_info { + */ + int kbase_sync_fence_stream_create(const char *name, int *const out_fd); + ++#if !MALI_USE_CSF + /** + * kbase_sync_fence_out_create Create an explicit output fence to specified atom + * @katom: Atom to assign the new explicit fence to +@@ -92,6 +91,7 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); + * return: 0 on success, < 0 on error + */ + int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); ++#endif /* !MALI_USE_CSF */ + + /** + * kbase_sync_fence_validate() - Validate a fd to be a valid fence +@@ -104,6 +104,7 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); + */ + int kbase_sync_fence_validate(int fd); + ++#if !MALI_USE_CSF + /** + * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom + * @katom: Atom with an explicit fence to signal +@@ -154,6 +155,7 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); + * This will also release the corresponding reference. + */ + void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); ++#endif /* !MALI_USE_CSF */ + + /** + * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence +@@ -161,13 +163,16 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); + */ + static inline void kbase_sync_fence_close_fd(int fd) + { +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) ++#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE ++ close_fd(fd); ++#elif KERNEL_VERSION(4, 17, 0) <= LINUX_VERSION_CODE + ksys_close(fd); + #else + sys_close(fd); + #endif + } + ++#if !MALI_USE_CSF + /** + * kbase_sync_fence_in_info_get() - Retrieves information about input fence + * @katom: Atom to get fence information from +@@ -187,6 +192,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + */ + int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info); ++#endif /* !MALI_USE_CSF */ + + #if defined(CONFIG_SYNC_FILE) + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +@@ -207,6 +213,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, + const char *kbase_sync_status_string(int status); + + ++#if !MALI_USE_CSF + /* + * Internal worker used to continue processing of atom. + */ +@@ -219,5 +226,6 @@ void kbase_sync_fence_wait_worker(struct work_struct *data); + */ + void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); + #endif ++#endif /* !MALI_USE_CSF */ + + #endif /* MALI_KBASE_SYNC_H */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c +index 75940fb..8af2584 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -50,15 +49,6 @@ struct mali_sync_pt { + int result; + }; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +-/* For backwards compatibility with kernels before 3.17. After 3.17 +- * sync_pt_parent is included in the kernel. */ +-static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) +-{ +- return pt->parent; +-} +-#endif +- + static struct mali_sync_timeline *to_mali_sync_timeline( + struct sync_timeline *timeline) + { +@@ -196,6 +186,7 @@ int kbase_sync_fence_stream_create(const char *name, int *const out_fd) + return 0; + } + ++#if !MALI_USE_CSF + /* Allocates a sync point within the timeline. + * + * The timeline must be the one allocated by kbase_sync_timeline_alloc +@@ -225,10 +216,6 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) + struct sync_timeline *tl; + struct sync_pt *pt; + struct sync_fence *fence; +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) +- struct files_struct *files; +- struct fdtable *fdt; +-#endif + int fd; + struct file *tl_file; + +@@ -259,29 +246,11 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) + /* from here the fence owns the sync_pt */ + + /* create a fd representing the fence */ +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } +-#else +- fd = get_unused_fd(); +- if (fd < 0) { +- sync_fence_put(fence); +- goto out; +- } +- +- files = current->files; +- spin_lock(&files->file_lock); +- fdt = files_fdtable(files); +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) +- __set_close_on_exec(fd, fdt); +-#else +- FD_SET(fd, fdt->close_on_exec); +-#endif +- spin_unlock(&files->file_lock); +-#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + + /* bind fence to the new fd */ + sync_fence_install(fence, fd); +@@ -289,7 +258,8 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) + katom->fence = sync_fence_fdget(fd); + if (katom->fence == NULL) { + /* The only way the fence can be NULL is if userspace closed it +- * for us, so we don't need to clear it up */ ++ * for us, so we don't need to clear it up ++ */ + fd = -EINVAL; + goto out; + } +@@ -305,6 +275,7 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) + katom->fence = sync_fence_fdget(fd); + return katom->fence ? 0 : -ENOENT; + } ++#endif /* !MALI_USE_CSF */ + + int kbase_sync_fence_validate(int fd) + { +@@ -318,6 +289,7 @@ int kbase_sync_fence_validate(int fd) + return 0; + } + ++#if !MALI_USE_CSF + /* Returns true if the specified timeline is allocated by Mali */ + static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) + { +@@ -376,22 +348,14 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) + if (!katom->fence) + return BASE_JD_EVENT_JOB_CANCELLED; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +- if (!list_is_singular(&katom->fence->pt_list_head)) { +-#else + if (katom->fence->num_fences != 1) { +-#endif + /* Not exactly one item in the list - so it didn't (directly) +- * come from us */ ++ * come from us ++ */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +- pt = list_first_entry(&katom->fence->pt_list_head, +- struct sync_pt, pt_list); +-#else + pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); +-#endif + timeline = sync_pt_parent(pt); + + if (!kbase_sync_timeline_is_ours(timeline)) { +@@ -413,11 +377,7 @@ static inline int kbase_fence_get_status(struct sync_fence *fence) + if (!fence) + return -ENOENT; + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +- return fence->status; +-#else + return atomic_read(&fence->status); +-#endif + } + + static void kbase_fence_wait_callback(struct sync_fence *fence, +@@ -461,7 +421,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) + if (ret < 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependent jobs in the bag to be failed, +- * to do this we schedule the work queue to complete this job */ ++ * to do this we schedule the work queue to complete this job ++ */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } +@@ -473,7 +434,8 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) + { + if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { + /* The wait wasn't cancelled - leave the cleanup for +- * kbase_fence_wait_callback */ ++ * kbase_fence_wait_callback ++ */ + return; + } + +@@ -540,3 +502,4 @@ void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) + sync_fence_wait(katom->fence, 1); + } + #endif ++#endif /* !MALI_USE_CSF */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c +index 2e1ede5..5ee7fc3 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +- * @file mali_kbase_sync_common.c ++ * @file + * + * Common code for our explicit fence functionality + */ +@@ -30,6 +29,7 @@ + #include "mali_kbase.h" + #include "mali_kbase_sync.h" + ++#if !MALI_USE_CSF + void kbase_sync_fence_wait_worker(struct work_struct *data) + { + struct kbase_jd_atom *katom; +@@ -37,6 +37,7 @@ void kbase_sync_fence_wait_worker(struct work_struct *data) + katom = container_of(data, struct kbase_jd_atom, work); + kbase_soft_event_wait_callback(katom); + } ++#endif /* !MALI_USE_CSF */ + + const char *kbase_sync_status_string(int status) + { +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c +index 0679c48..25670c4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -59,9 +58,10 @@ int kbase_sync_fence_stream_create(const char *name, int *const out_fd) + return 0; + } + ++#if !MALI_USE_CSF + int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; + #else + struct dma_fence *fence; +@@ -106,7 +106,7 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) + + int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = sync_file_get_fence(fd); + #else + struct dma_fence *fence = sync_file_get_fence(fd); +@@ -119,10 +119,11 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) + + return 0; + } ++#endif /* !MALI_USE_CSF */ + + int kbase_sync_fence_validate(int fd) + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence = sync_file_get_fence(fd); + #else + struct dma_fence *fence = sync_file_get_fence(fd); +@@ -136,6 +137,7 @@ int kbase_sync_fence_validate(int fd) + return 0; /* valid */ + } + ++#if !MALI_USE_CSF + enum base_jd_event_code + kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) + { +@@ -157,7 +159,7 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) + return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; + } + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + static void kbase_fence_wait_callback(struct fence *fence, + struct fence_cb *cb) + #else +@@ -175,7 +177,7 @@ static void kbase_fence_wait_callback(struct dma_fence *fence, + #if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) +- if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) ++ if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error < 0) + #else + if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) + #endif +@@ -200,7 +202,7 @@ static void kbase_fence_wait_callback(struct dma_fence *fence, + int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) + { + int err; +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; + #else + struct dma_fence *fence; +@@ -233,8 +235,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* We should cause the dependent jobs in the bag to be failed, +- * to do this we schedule the work queue to complete this job */ +- ++ * to do this we schedule the work queue to complete this job ++ */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } +@@ -246,7 +248,8 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) + { + if (!kbase_fence_free_callbacks(katom)) { + /* The wait wasn't cancelled - +- * leave the cleanup for kbase_fence_wait_callback */ ++ * leave the cleanup for kbase_fence_wait_callback ++ */ + return; + } + +@@ -273,6 +276,7 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) + kbase_fence_free_callbacks(katom); + kbase_fence_in_remove(katom); + } ++#endif /* !MALI_USE_CSF */ + + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + void kbase_sync_fence_info_get(struct fence *fence, +@@ -317,10 +321,11 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, + #endif + } + ++#if !MALI_USE_CSF + int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; + #else + struct dma_fence *fence; +@@ -340,7 +345,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) + { +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) ++#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; + #else + struct dma_fence *fence; +@@ -364,3 +369,4 @@ void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) + /* Not implemented */ + } + #endif ++#endif /* !MALI_USE_CSF*/ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c +new file mode 100644 +index 0000000..3088c41 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c +@@ -0,0 +1,221 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++ ++/** ++ * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation ++ * is mapped to GPU for the first time within a process. ++ * Another instantiation is done for the case when that ++ * allocation is mapped for the first time to GPU. ++ * ++ * @dma_buf: Reference to dma_buf been imported. ++ * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf. ++ * @import_count: The number of times the dma_buf was imported. ++ */ ++struct kbase_dma_buf { ++ struct dma_buf *dma_buf; ++ struct rb_node dma_buf_node; ++ u32 import_count; ++}; ++ ++/** ++ * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping. ++ * ++ * @kctx: Pointer to kbase context. ++ * @dma_buf: Pointer to a dma buffer mapping. ++ * @tree: Pointer to root of rb_tree containing the dma_buf's mapped. ++ * ++ * when we un-map any dma mapping we need to remove them from rb_tree, ++ * rb_tree is maintained at kbase_device level and kbase_process level ++ * by passing the root of kbase_device or kbase_process we can remove ++ * the node from the tree. ++ */ ++static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, ++ struct dma_buf *dma_buf, ++ struct rb_root *tree) ++{ ++ struct kbase_dma_buf *buf_node = NULL; ++ struct rb_node *node = tree->rb_node; ++ bool mapping_removed = false; ++ ++ lockdep_assert_held(&kctx->kbdev->dma_buf_lock); ++ ++ while (node) { ++ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); ++ ++ if (dma_buf == buf_node->dma_buf) { ++ WARN_ON(!buf_node->import_count); ++ ++ buf_node->import_count--; ++ ++ if (!buf_node->import_count) { ++ rb_erase(&buf_node->dma_buf_node, tree); ++ kfree(buf_node); ++ mapping_removed = true; ++ } ++ ++ break; ++ } ++ ++ if (dma_buf < buf_node->dma_buf) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } ++ ++ WARN_ON(!buf_node); ++ return mapping_removed; ++} ++ ++/** ++ * kbase_capture_dma_buf_mapping - capture a dma buffer mapping. ++ * ++ * @kctx: Pointer to kbase context. ++ * @dma_buf: Pointer to a dma buffer mapping. ++ * @root: Pointer to root of rb_tree containing the dma_buf's. ++ * ++ * We maintain a kbase_device level and kbase_process level rb_tree ++ * of all unique dma_buf's mapped to gpu memory. So when attach any ++ * dma_buf add it the rb_tree's. To add the unique mapping we need ++ * check if the mapping is not a duplicate and then add them. ++ */ ++static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, ++ struct dma_buf *dma_buf, ++ struct rb_root *root) ++{ ++ struct kbase_dma_buf *buf_node = NULL; ++ struct rb_node *node = root->rb_node; ++ bool unique_buf_imported = true; ++ ++ lockdep_assert_held(&kctx->kbdev->dma_buf_lock); ++ ++ while (node) { ++ buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); ++ ++ if (dma_buf == buf_node->dma_buf) { ++ unique_buf_imported = false; ++ break; ++ } ++ ++ if (dma_buf < buf_node->dma_buf) ++ node = node->rb_left; ++ else ++ node = node->rb_right; ++ } ++ ++ if (unique_buf_imported) { ++ struct kbase_dma_buf *new_buf_node = ++ kzalloc(sizeof(*new_buf_node), GFP_KERNEL); ++ ++ if (new_buf_node == NULL) { ++ dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); ++ /* Dont account for it if we fail to allocate memory */ ++ unique_buf_imported = false; ++ } else { ++ struct rb_node **new = &(root->rb_node), *parent = NULL; ++ ++ new_buf_node->dma_buf = dma_buf; ++ new_buf_node->import_count = 1; ++ while (*new) { ++ struct kbase_dma_buf *new_node; ++ ++ parent = *new; ++ new_node = rb_entry(parent, struct kbase_dma_buf, ++ dma_buf_node); ++ if (dma_buf < new_node->dma_buf) ++ new = &(*new)->rb_left; ++ else ++ new = &(*new)->rb_right; ++ } ++ rb_link_node(&new_buf_node->dma_buf_node, parent, new); ++ rb_insert_color(&new_buf_node->dma_buf_node, root); ++ } ++ } else if (!WARN_ON(!buf_node)) { ++ buf_node->import_count++; ++ } ++ ++ return unique_buf_imported; ++} ++ ++void kbase_remove_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool dev_mapping_removed, prcs_mapping_removed; ++ ++ mutex_lock(&kbdev->dma_buf_lock); ++ ++ dev_mapping_removed = kbase_delete_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); ++ ++ prcs_mapping_removed = kbase_delete_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); ++ ++ WARN_ON(dev_mapping_removed && !prcs_mapping_removed); ++ ++ spin_lock(&kbdev->gpu_mem_usage_lock); ++ if (dev_mapping_removed) ++ kbdev->total_gpu_pages -= alloc->nents; ++ ++ if (prcs_mapping_removed) ++ kctx->kprcs->total_gpu_pages -= alloc->nents; ++ ++ if (dev_mapping_removed || prcs_mapping_removed) ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ spin_unlock(&kbdev->gpu_mem_usage_lock); ++ ++ mutex_unlock(&kbdev->dma_buf_lock); ++} ++ ++void kbase_add_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ bool unique_dev_dmabuf, unique_prcs_dmabuf; ++ ++ mutex_lock(&kbdev->dma_buf_lock); ++ ++ /* add dma_buf to device and process. */ ++ unique_dev_dmabuf = kbase_capture_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); ++ ++ unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( ++ kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); ++ ++ WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); ++ ++ spin_lock(&kbdev->gpu_mem_usage_lock); ++ if (unique_dev_dmabuf) ++ kbdev->total_gpu_pages += alloc->nents; ++ ++ if (unique_prcs_dmabuf) ++ kctx->kprcs->total_gpu_pages += alloc->nents; ++ ++ if (unique_prcs_dmabuf || unique_dev_dmabuf) ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ spin_unlock(&kbdev->gpu_mem_usage_lock); ++ ++ mutex_unlock(&kbdev->dma_buf_lock); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h +new file mode 100644 +index 0000000..fd871fc +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h +@@ -0,0 +1,100 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KBASE_TRACE_GPU_MEM_H_ ++#define _KBASE_TRACE_GPU_MEM_H_ ++ ++#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) ++#include ++#endif ++ ++#define DEVICE_TGID ((u32) 0U) ++ ++static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, ++ struct kbase_context *kctx) ++{ ++#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) ++ lockdep_assert_held(&kbdev->gpu_mem_usage_lock); ++ ++ trace_gpu_mem_total(kbdev->id, DEVICE_TGID, ++ kbdev->total_gpu_pages << PAGE_SHIFT); ++ ++ if (likely(kctx)) ++ trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, ++ kctx->kprcs->total_gpu_pages << PAGE_SHIFT); ++#endif ++} ++ ++static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, ++ struct kbase_context *kctx, size_t pages) ++{ ++ spin_lock(&kbdev->gpu_mem_usage_lock); ++ ++ if (likely(kctx)) ++ kctx->kprcs->total_gpu_pages -= pages; ++ ++ kbdev->total_gpu_pages -= pages; ++ ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ ++ spin_unlock(&kbdev->gpu_mem_usage_lock); ++} ++ ++static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, ++ struct kbase_context *kctx, size_t pages) ++{ ++ spin_lock(&kbdev->gpu_mem_usage_lock); ++ ++ if (likely(kctx)) ++ kctx->kprcs->total_gpu_pages += pages; ++ ++ kbdev->total_gpu_pages += pages; ++ ++ kbase_trace_gpu_mem_usage(kbdev, kctx); ++ ++ spin_unlock(&kbdev->gpu_mem_usage_lock); ++} ++ ++/** ++ * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured. ++ * ++ * @kctx: Pointer to the kbase context ++ * @alloc: Pointer to the alloc to unmap ++ * ++ * Remove reference to dma buf been unmapped from kbase_device level ++ * rb_tree and Kbase_process level dma buf rb_tree. ++ */ ++void kbase_remove_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc); ++ ++/** ++ * kbase_add_dma_buf_usage - Add a dma-buf entry captured. ++ * ++ * @kctx: Pointer to the kbase context ++ * @alloc: Pointer to the alloc to map in ++ * ++ * Add reference to dma buf been mapped to kbase_device level ++ * rb_tree and Kbase_process level dma buf rb_tree. ++ */ ++void kbase_add_dma_buf_usage(struct kbase_context *kctx, ++ struct kbase_mem_phy_alloc *alloc); ++ ++#endif /* _KBASE_TRACE_GPU_MEM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h +index 8d4f044..2dad49b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #ifndef _KBASE_UTILITY_H + #define _KBASE_UTILITY_H + +@@ -32,7 +29,7 @@ + static inline void kbase_timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *timer)) + { +-#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) ++#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE + setup_timer(timer, (void (*)(unsigned long)) callback, + (unsigned long) timer); + #else +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +index d96b565..64405af 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,14 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_vinstr.h" + #include "mali_kbase_hwcnt_virtualizer.h" + #include "mali_kbase_hwcnt_types.h" +-#include "mali_kbase_hwcnt_reader.h" ++#include + #include "mali_kbase_hwcnt_gpu.h" +-#include "mali_kbase_ioctl.h" ++#include + #include "mali_malisw.h" + #include "mali_kbase_debug.h" + +@@ -33,6 +32,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -53,6 +53,10 @@ + * counters. + * @hvirt: Hardware counter virtualizer used by vinstr. + * @metadata: Hardware counter metadata provided by virtualizer. ++ * @metadata_user: API compatible hardware counter metadata provided by vinstr. ++ * For compatibility with the user driver interface, this ++ * contains a "truncated" version of the HWCNT metadata limited ++ * to 64 entries per block. NULL when not required. + * @lock: Lock protecting all vinstr state. + * @suspend_count: Suspend reference count. If non-zero, timer and worker are + * prevented from being re-scheduled. +@@ -64,6 +68,7 @@ + struct kbase_vinstr_context { + struct kbase_hwcnt_virtualizer *hvirt; + const struct kbase_hwcnt_metadata *metadata; ++ const struct kbase_hwcnt_metadata *metadata_user; + struct mutex lock; + size_t suspend_count; + size_t client_count; +@@ -83,6 +88,7 @@ struct kbase_vinstr_context { + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @enable_map: Counters enable map. ++ * @tmp_buf: Temporary buffer to use before handing dump to client. + * @dump_bufs: Array of dump buffers allocated by this client. + * @dump_bufs_meta: Metadata of dump buffers. + * @meta_idx: Index of metadata being accessed by userspace. +@@ -97,6 +103,7 @@ struct kbase_vinstr_client { + u64 next_dump_time_ns; + u32 dump_interval_ns; + struct kbase_hwcnt_enable_map enable_map; ++ struct kbase_hwcnt_dump_buffer tmp_buf; + struct kbase_hwcnt_dump_buffer_array dump_bufs; + struct kbase_hwcnt_reader_metadata *dump_bufs_meta; + atomic_t meta_idx; +@@ -182,8 +189,10 @@ static int kbasep_vinstr_client_dump( + u64 ts_end_ns; + unsigned int write_idx; + unsigned int read_idx; ++ struct kbase_hwcnt_dump_buffer *tmp_buf; + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_reader_metadata *meta; ++ u8 clk_cnt; + + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); +@@ -198,23 +207,42 @@ static int kbasep_vinstr_client_dump( + + dump_buf = &vcli->dump_bufs.bufs[write_idx]; + meta = &vcli->dump_bufs_meta[write_idx]; ++ tmp_buf = &vcli->tmp_buf; + + errcode = kbase_hwcnt_virtualizer_client_dump( +- vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); ++ vcli->hvcli, &ts_start_ns, &ts_end_ns, tmp_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ +- kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); ++ kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &vcli->enable_map); ++ ++ /* Copy the temp buffer to the userspace visible buffer. The strict ++ * variant will explicitly zero any non-enabled counters to ensure ++ * nothing except exactly what the user asked for is made visible. ++ * ++ * If the metadata in vinstr (vctx->metadata_user) is not NULL, it means ++ * vinstr has the truncated metadata, so do a narrow copy since ++ * virtualizer has a bigger buffer but user only needs part of it. ++ * otherwise we do a full copy. ++ */ ++ if (vcli->vctx->metadata_user) ++ kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf, ++ &vcli->enable_map); ++ else ++ kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, ++ &vcli->enable_map); + +- /* Zero all non-enabled counters (current values are undefined) */ +- kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); ++ clk_cnt = vcli->vctx->metadata->clk_cnt; + + meta->timestamp = ts_end_ns; + meta->event_id = event_id; + meta->buffer_idx = write_idx; ++ meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; ++ meta->cycles.shader_cores = ++ (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; + + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); +@@ -345,11 +373,7 @@ static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) + * cancelled, and the worker itself won't reschedule this timer if + * suspend_count != 0. + */ +-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE +- queue_work(system_wq, &vctx->dump_work); +-#else +- queue_work(system_highpri_wq, &vctx->dump_work); +-#endif ++ kbase_hwcnt_virtualizer_queue_work(vctx->hvirt, &vctx->dump_work); + return HRTIMER_NORESTART; + } + +@@ -365,6 +389,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) + kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); + kfree(vcli->dump_bufs_meta); + kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); ++ kbase_hwcnt_dump_buffer_free(&vcli->tmp_buf); + kbase_hwcnt_enable_map_free(&vcli->enable_map); + kfree(vcli); + } +@@ -374,7 +399,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) + * the vinstr context. + * @vctx: Non-NULL pointer to vinstr context. + * @setup: Non-NULL pointer to hardware counter ioctl setup structure. +- * setup->buffer_count must not be 0. ++ * setup->buffer_count must not be 0 and must be a power of 2. + * @out_vcli: Non-NULL pointer to where created client will be stored on + * success. + * +@@ -392,6 +417,7 @@ static int kbasep_vinstr_client_create( + WARN_ON(!vctx); + WARN_ON(!setup); + WARN_ON(setup->buffer_count == 0); ++ WARN_ON(!is_power_of_2(setup->buffer_count)); + + vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); + if (!vcli) +@@ -404,14 +430,36 @@ static int kbasep_vinstr_client_create( + if (errcode) + goto error; + +- phys_em.jm_bm = setup->jm_bm; ++ phys_em.fe_bm = setup->fe_bm; + phys_em.shader_bm = setup->shader_bm; + phys_em.tiler_bm = setup->tiler_bm; + phys_em.mmu_l2_bm = setup->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); + +- errcode = kbase_hwcnt_dump_buffer_array_alloc( +- vctx->metadata, setup->buffer_count, &vcli->dump_bufs); ++ /* Use virtualizer's metadata to alloc tmp buffer which interacts with ++ * the HWC virtualizer. ++ */ ++ errcode = kbase_hwcnt_dump_buffer_alloc(vctx->metadata, &vcli->tmp_buf); ++ if (errcode) ++ goto error; ++ ++ /* Enable all the available clk_enable_map. */ ++ vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; ++ ++ if (vctx->metadata_user) ++ /* Use vinstr's truncated metadata to alloc dump buffers which ++ * interact with clients. ++ */ ++ errcode = ++ kbase_hwcnt_dump_buffer_array_alloc(vctx->metadata_user, ++ setup->buffer_count, ++ &vcli->dump_bufs); ++ else ++ /* Use metadata from virtualizer to allocate dump buffers if ++ * vinstr doesn't have the truncated metadata. ++ */ ++ errcode = kbase_hwcnt_dump_buffer_array_alloc( ++ vctx->metadata, setup->buffer_count, &vcli->dump_bufs); + if (errcode) + goto error; + +@@ -439,6 +487,7 @@ int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx) + { ++ int errcode; + struct kbase_vinstr_context *vctx; + const struct kbase_hwcnt_metadata *metadata; + +@@ -455,6 +504,11 @@ int kbase_vinstr_init( + + vctx->hvirt = hvirt; + vctx->metadata = metadata; ++ vctx->metadata_user = NULL; ++ errcode = kbase_hwcnt_gpu_metadata_create_truncate_64( ++ &vctx->metadata_user, metadata); ++ if (errcode) ++ goto err_metadata_create; + + mutex_init(&vctx->lock); + INIT_LIST_HEAD(&vctx->clients); +@@ -464,6 +518,11 @@ int kbase_vinstr_init( + + *out_vctx = vctx; + return 0; ++ ++err_metadata_create: ++ kfree(vctx); ++ ++ return errcode; + } + + void kbase_vinstr_term(struct kbase_vinstr_context *vctx) +@@ -484,6 +543,9 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx) + } + } + ++ if (vctx->metadata_user) ++ kbase_hwcnt_metadata_destroy(vctx->metadata_user); ++ + WARN_ON(vctx->client_count != 0); + kfree(vctx); + } +@@ -543,11 +605,8 @@ void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) + } + + if (has_periodic_clients) +-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE +- queue_work(system_wq, &vctx->dump_work); +-#else +- queue_work(system_highpri_wq, &vctx->dump_work); +-#endif ++ kbase_hwcnt_virtualizer_queue_work( ++ vctx->hvirt, &vctx->dump_work); + } + } + +@@ -564,7 +623,8 @@ int kbase_vinstr_hwcnt_reader_setup( + + if (!vctx || !setup || + (setup->buffer_count == 0) || +- (setup->buffer_count > MAX_BUFFER_COUNT)) ++ (setup->buffer_count > MAX_BUFFER_COUNT) || ++ !is_power_of_2(setup->buffer_count)) + return -EINVAL; + + errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); +@@ -675,26 +735,31 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; + + struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; ++ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); ++ const size_t min_size = min(size, meta_size); + + /* Metadata sanity check. */ + WARN_ON(idx != meta->buffer_idx); + +- if (sizeof(struct kbase_hwcnt_reader_metadata) != size) +- return -EINVAL; +- + /* Check if there is any buffer available. */ +- if (atomic_read(&cli->write_idx) == meta_idx) ++ if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) + return -EAGAIN; + + /* Check if previously taken buffer was put back. */ +- if (atomic_read(&cli->read_idx) != meta_idx) ++ if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) + return -EBUSY; + ++ /* Clear user buffer to zero. */ ++ if (unlikely(meta_size < size && clear_user(buffer, size))) ++ return -EFAULT; ++ + /* Copy next available buffer's metadata to user. */ +- if (copy_to_user(buffer, meta, size)) ++ if (unlikely(copy_to_user(buffer, meta, min_size))) + return -EFAULT; + +- atomic_inc(&cli->meta_idx); ++ /* Compare exchange meta idx to protect against concurrent getters */ ++ if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1)) ++ return -EBUSY; + + return 0; + } +@@ -715,24 +780,68 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + unsigned int read_idx = atomic_read(&cli->read_idx); + unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; + +- struct kbase_hwcnt_reader_metadata meta; +- +- if (sizeof(struct kbase_hwcnt_reader_metadata) != size) +- return -EINVAL; ++ struct kbase_hwcnt_reader_metadata *meta; ++ const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); ++ const size_t max_size = max(size, meta_size); ++ int ret = 0; ++ u8 stack_kbuf[64]; ++ u8 *kbuf = NULL; ++ size_t i; + + /* Check if any buffer was taken. */ +- if (atomic_read(&cli->meta_idx) == read_idx) ++ if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) + return -EPERM; + ++ if (likely(max_size <= sizeof(stack_kbuf))) { ++ /* Use stack buffer when the size is small enough. */ ++ if (unlikely(meta_size > size)) ++ memset(stack_kbuf, 0, sizeof(stack_kbuf)); ++ kbuf = stack_kbuf; ++ } else { ++ kbuf = kzalloc(max_size, GFP_KERNEL); ++ if (unlikely(!kbuf)) ++ return -ENOMEM; ++ } ++ ++ /* ++ * Copy user buffer to zero cleared kernel buffer which has enough ++ * space for both user buffer and kernel metadata. ++ */ ++ if (unlikely(copy_from_user(kbuf, buffer, size))) { ++ ret = -EFAULT; ++ goto out; ++ } ++ ++ /* ++ * Make sure any "extra" data passed from userspace is zero. ++ * It's meaningful only in case meta_size < size. ++ */ ++ for (i = meta_size; i < size; i++) { ++ /* Check if user data beyond meta size is zero. */ ++ if (unlikely(kbuf[i] != 0)) { ++ ret = -EINVAL; ++ goto out; ++ } ++ } ++ + /* Check if correct buffer is put back. */ +- if (copy_from_user(&meta, buffer, size)) +- return -EFAULT; +- if (idx != meta.buffer_idx) +- return -EINVAL; ++ meta = (struct kbase_hwcnt_reader_metadata *)kbuf; ++ if (unlikely(idx != meta->buffer_idx)) { ++ ret = -EINVAL; ++ goto out; ++ } + +- atomic_inc(&cli->read_idx); ++ /* Compare exchange read idx to protect against concurrent putters */ ++ if (read_idx != ++ atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) { ++ ret = -EPERM; ++ goto out; ++ } + +- return 0; ++out: ++ if (unlikely(kbuf != stack_kbuf)) ++ kfree(kbuf); ++ return ret; + } + + /** +@@ -760,11 +869,8 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + * worker is already queued. + */ + if ((interval != 0) && (cli->vctx->suspend_count == 0)) +-#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE +- queue_work(system_wq, &cli->vctx->dump_work); +-#else +- queue_work(system_highpri_wq, &cli->vctx->dump_work); +-#endif ++ kbase_hwcnt_virtualizer_queue_work(cli->vctx->hvirt, ++ &cli->vctx->dump_work); + + mutex_unlock(&cli->vctx->lock); + +@@ -813,26 +919,58 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + struct kbase_vinstr_client *cli, + u32 __user *hwver) + { +- u32 ver = 0; ++ u32 ver = 5; + const enum kbase_hwcnt_gpu_group_type type = + kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); + +- switch (type) { +- case KBASE_HWCNT_GPU_GROUP_TYPE_V4: +- ver = 4; +- break; +- case KBASE_HWCNT_GPU_GROUP_TYPE_V5: +- ver = 5; +- break; +- default: +- WARN_ON(true); +- } +- +- if (ver != 0) { +- return put_user(ver, hwver); +- } else { ++ if (WARN_ON(type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) + return -EINVAL; ++ ++ return put_user(ver, hwver); ++} ++ ++/** ++ * The hwcnt reader's ioctl command - get API version. ++ * @cli: The non-NULL pointer to the client ++ * @arg: Command's argument. ++ * @size: Size of arg. ++ * ++ * @return 0 on success, else error code. ++ */ ++static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( ++ struct kbase_vinstr_client *cli, unsigned long arg, size_t size) ++{ ++ long ret = -EINVAL; ++ ++ if (size == sizeof(u32)) { ++ ret = put_user(HWCNT_READER_API, (u32 __user *)arg); ++ } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { ++ u8 clk_cnt = cli->vctx->metadata->clk_cnt; ++ unsigned long bytes = 0; ++ struct kbase_hwcnt_reader_api_version api_version = { ++ .version = HWCNT_READER_API, ++ .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, ++ }; ++ ++ if (clk_cnt > 0) ++ api_version.features |= ++ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; ++ if (clk_cnt > 1) ++ api_version.features |= ++ KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; ++ ++ bytes = copy_to_user( ++ (void __user *)arg, &api_version, sizeof(api_version)); ++ ++ /* copy_to_user returns zero in case of success. ++ * If it fails, it returns the number of bytes that could NOT be copied ++ */ ++ if (bytes == 0) ++ ret = 0; ++ else ++ ret = -EFAULT; + } ++ return ret; + } + + /** +@@ -858,42 +996,48 @@ static long kbasep_vinstr_hwcnt_reader_ioctl( + if (!cli) + return -EINVAL; + +- switch (cmd) { +- case KBASE_HWCNT_READER_GET_API_VERSION: +- rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); ++ switch (_IOC_NR(cmd)) { ++ case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): ++ rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( ++ cli, arg, _IOC_SIZE(cmd)); + break; +- case KBASE_HWCNT_READER_GET_HWVER: ++ case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + cli, (u32 __user *)arg); + break; +- case KBASE_HWCNT_READER_GET_BUFFER_SIZE: +- rcode = put_user( +- (u32)cli->vctx->metadata->dump_buf_bytes, +- (u32 __user *)arg); ++ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): ++ if (cli->vctx->metadata_user) ++ rcode = put_user( ++ (u32)cli->vctx->metadata_user->dump_buf_bytes, ++ (u32 __user *)arg); ++ else ++ rcode = put_user( ++ (u32)cli->vctx->metadata->dump_buf_bytes, ++ (u32 __user *)arg); + break; +- case KBASE_HWCNT_READER_DUMP: ++ case _IOC_NR(KBASE_HWCNT_READER_DUMP): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); + break; +- case KBASE_HWCNT_READER_CLEAR: ++ case _IOC_NR(KBASE_HWCNT_READER_CLEAR): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); + break; +- case KBASE_HWCNT_READER_GET_BUFFER: ++ case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; +- case KBASE_HWCNT_READER_PUT_BUFFER: ++ case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; +- case KBASE_HWCNT_READER_SET_INTERVAL: ++ case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + cli, (u32)arg); + break; +- case KBASE_HWCNT_READER_ENABLE_EVENT: ++ case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; +- case KBASE_HWCNT_READER_DISABLE_EVENT: ++ case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; +@@ -955,7 +1099,16 @@ static int kbasep_vinstr_hwcnt_reader_mmap( + return -EINVAL; + + vm_size = vma->vm_end - vma->vm_start; +- size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; ++ ++ /* The mapping is allowed to span the entirety of the page allocation, ++ * not just the chunk where the dump buffers are allocated. ++ * This accommodates the corner case where the combined size of the ++ * dump buffers is smaller than a single page. ++ * This does not pose a security risk as the pages are zeroed on ++ * allocation, and anything out of bounds of the dump buffers is never ++ * written to. ++ */ ++ size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; + + if (vma->vm_pgoff > (size >> PAGE_SHIFT)) + return -EINVAL; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +index 81d315f..6747ec7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h +index f618755..2a243dd 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #undef TRACE_SYSTEM +@@ -31,7 +30,7 @@ + #if defined(CONFIG_MALI_GATOR_SUPPORT) + #define MALI_JOB_SLOTS_EVENT_CHANGED + +-/** ++/* + * mali_job_slots_event - Reports change of job slot status. + * @gpu_id: Kbase device id + * @event_id: ORed together bitfields representing a type of event, +@@ -181,6 +180,23 @@ TRACE_EVENT(mali_total_alloc_pages_change, + __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ + KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) + ++#if MALI_USE_CSF ++#define KBASE_MMU_FAULT_CODE_VALID(code) \ ++ ((code >= 0xC0 && code <= 0xEB) && \ ++ (!(code >= 0xC5 && code <= 0xC7)) && \ ++ (!(code >= 0xCC && code <= 0xD8)) && \ ++ (!(code >= 0xDC && code <= 0xDF)) && \ ++ (!(code >= 0xE1 && code <= 0xE3))) ++#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ ++ {0xC0, "TRANSLATION_FAULT_" }, \ ++ {0xC4, "TRANSLATION_FAULT_" }, \ ++ {0xC8, "PERMISSION_FAULT_" }, \ ++ {0xD0, "TRANSTAB_BUS_FAULT_" }, \ ++ {0xD8, "ACCESS_FLAG_" }, \ ++ {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ ++ {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ ++ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }) ++#else /* MALI_USE_CSF */ + #define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEF) && \ + (!(code >= 0xC5 && code <= 0xC6)) && \ +@@ -197,6 +213,7 @@ TRACE_EVENT(mali_total_alloc_pages_change, + {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ + {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) ++#endif /* MALI_USE_CSF */ + #endif /* __TRACE_MALI_MMU_HELPERS */ + + /* trace_mali_mmu_page_fault_grow +@@ -288,7 +305,8 @@ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, + TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); + +-#if MALI_JIT_PRESSURE_LIMIT ++#if !MALI_USE_CSF ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /* trace_mali_jit_report + * + * Tracepoint about the GPU data structure read to form a just-in-time memory +@@ -326,13 +344,11 @@ TRACE_EVENT(mali_jit_report, + ), + __entry->read_val, __entry->used_pages) + ); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ ++#endif /* !MALI_USE_CSF */ + +-#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE) + TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +-#endif +- +-#if MALI_JIT_PRESSURE_LIMIT ++#if MALI_JIT_PRESSURE_LIMIT_BASE + /* trace_mali_jit_report_pressure + * + * Tracepoint about change in physical memory pressure, due to the information +@@ -366,14 +382,13 @@ TRACE_EVENT(mali_jit_report_pressure, + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) + ); +-#endif /* MALI_JIT_PRESSURE_LIMIT */ ++#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + #ifndef __TRACE_SYSGRAPH_ENUM + #define __TRACE_SYSGRAPH_ENUM + /* Enum of sysgraph message IDs */ + enum sysgraph_msg { + SGR_ARRIVE, +- SGR_DEP_RES, + SGR_SUBMIT, + SGR_COMPLETE, + SGR_POST, +@@ -401,7 +416,7 @@ TRACE_EVENT(sysgraph, + __entry->message = message; + __entry->atom_id = atom_id; + ), +- TP_printk("msg=%u proc_id=%u, param1=%d\n", __entry->message, ++ TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message, + __entry->proc_id, __entry->atom_id) + ); + +@@ -427,7 +442,7 @@ TRACE_EVENT(sysgraph_gpu, + __entry->atom_id = atom_id; + __entry->js = js; + ), +- TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d\n", ++ TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d", + __entry->message, __entry->proc_id, + __entry->atom_id, __entry->js) + ); +@@ -516,7 +531,7 @@ TRACE_EVENT(mali_jit_trim, + TP_printk("freed_pages=%zu", __entry->freed_pages) + ); + +-#include "mali_kbase_debug_linux_ktrace.h" ++#include "debug/mali_kbase_debug_linux_ktrace.h" + + #endif /* _TRACE_MALI_H */ + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h +index 3a4db10..c0649f2 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -28,26 +27,11 @@ + #define _MALISW_H_ + + #include +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +-#define U8_MAX ((u8)~0U) +-#define S8_MAX ((s8)(U8_MAX>>1)) +-#define S8_MIN ((s8)(-S8_MAX - 1)) +-#define U16_MAX ((u16)~0U) +-#define S16_MAX ((s16)(U16_MAX>>1)) +-#define S16_MIN ((s16)(-S16_MAX - 1)) +-#define U32_MAX ((u32)~0U) +-#define S32_MAX ((s32)(U32_MAX>>1)) +-#define S32_MIN ((s32)(-S32_MAX - 1)) +-#define U64_MAX ((u64)~0ULL) +-#define S64_MAX ((s64)(U64_MAX>>1)) +-#define S64_MIN ((s64)(-S64_MAX - 1)) +-#endif /* LINUX_VERSION_CODE */ +-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) +-#define SIZE_MAX (~(size_t)0) +-#endif /* LINUX_VERSION_CODE */ + + /** + * MIN - Return the lesser of two values. ++ * @x: value1 ++ * @y: value2 + * + * As a macro it may evaluate its arguments more than once. + * Refer to MAX macro for more details +@@ -55,7 +39,9 @@ + #define MIN(x, y) ((x) < (y) ? (x) : (y)) + + /** +- * MAX - Return the greater of two values. ++ * MAX - Return the greater of two values. ++ * @x: value1 ++ * @y: value2 + * + * As a macro it may evaluate its arguments more than once. + * If called on the same two arguments as MIN it is guaranteed to return +@@ -67,24 +53,27 @@ + #define MAX(x, y) ((x) < (y) ? (y) : (x)) + + /** +- * @hideinitializer +- * Function-like macro for suppressing unused variable warnings. Where possible +- * such variables should be removed; this macro is present for cases where we +- * much support API backwards compatibility. ++ * Function-like macro for suppressing unused variable warnings. ++ * @x: unused variable ++ * ++ * Where possible such variables should be removed; this macro is present for ++ * cases where we much support API backwards compatibility. + */ + #define CSTD_UNUSED(x) ((void)(x)) + + /** +- * @hideinitializer +- * Function-like macro for use where "no behavior" is desired. This is useful +- * when compile time macros turn a function-like macro in to a no-op, but +- * where having no statement is otherwise invalid. ++ * Function-like macro for use where "no behavior" is desired. ++ * @...: no-op ++ * ++ * This is useful when compile time macros turn a function-like macro in to a ++ * no-op, but where having no statement is otherwise invalid. + */ + #define CSTD_NOP(...) ((void)#__VA_ARGS__) + + /** +- * @hideinitializer + * Function-like macro for stringizing a single level macro. ++ * @x: macro's value ++ * + * @code + * #define MY_MACRO 32 + * CSTD_STR1( MY_MACRO ) +@@ -94,10 +83,11 @@ + #define CSTD_STR1(x) #x + + /** +- * @hideinitializer +- * Function-like macro for stringizing a macro's value. This should not be used +- * if the macro is defined in a way which may have no value; use the +- * alternative @c CSTD_STR2N macro should be used instead. ++ * Function-like macro for stringizing a macro's value. ++ * @x: macro's value ++ * ++ * This should not be used if the macro is defined in a way which may have no ++ * value; use the alternative @c CSTD_STR2N macro should be used instead. + * @code + * #define MY_MACRO 32 + * CSTD_STR2( MY_MACRO ) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c +similarity index 67% +rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h +rename to dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c +index 9516e56..1db3abe 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_GPU_H_ +-#define _KBASE_GPU_H_ +- +-#include "mali_kbase_gpu_regmap.h" +-#include "mali_kbase_gpu_fault.h" +-#include "mali_kbase_gpu_coherency.h" +-#include "mali_kbase_gpu_id.h" +- +-#endif /* _KBASE_GPU_H_ */ ++/* Create the trace point if not configured in kernel */ ++#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY ++#define CREATE_TRACE_POINTS ++#include "mali_power_gpu_frequency_trace.h" ++#endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h +new file mode 100644 +index 0000000..ea18fcd +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI ++#define _TRACE_POWER_GPU_FREQUENCY_MALI ++#endif ++ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM power ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace ++#undef TRACE_INCLUDE_PATH ++#define TRACE_INCLUDE_PATH . ++ ++#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_POWER_GPU_FREQUENCY_H ++ ++#include ++ ++DECLARE_EVENT_CLASS(gpu, ++ ++ TP_PROTO(unsigned int state, unsigned int gpu_id), ++ ++ TP_ARGS(state, gpu_id), ++ ++ TP_STRUCT__entry( ++ __field( u32, state ) ++ __field( u32, gpu_id ) ++ ), ++ ++ TP_fast_assign( ++ __entry->state = state; ++ __entry->gpu_id = gpu_id; ++ ), ++ ++ TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, ++ (unsigned long)__entry->gpu_id) ++); ++ ++DEFINE_EVENT(gpu, gpu_frequency, ++ ++ TP_PROTO(unsigned int frequency, unsigned int gpu_id), ++ ++ TP_ARGS(frequency, gpu_id) ++); ++ ++#endif /* _TRACE_POWER_GPU_FREQUENCY_H */ ++ ++/* This part must be outside protection */ ++#include +diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild +similarity index 61% +rename from dvalin/kernel/drivers/base/protected_memory_allocator/Kconfig +rename to dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild +index 7b42d6c..7f2bb26 100644 +--- a/dvalin/kernel/drivers/base/protected_memory_allocator/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,14 +16,15 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + ++mali_kbase-y += \ ++ mmu/mali_kbase_mmu.o \ ++ mmu/mali_kbase_mmu_hw_direct.o \ ++ mmu/mali_kbase_mmu_mode_aarch64.o + +-config MALI_PROTECTED_MEMORY_ALLOCATOR +- tristate "MALI_PROTECTED_MEMORY_ALLOCATOR" +- help +- This option enables an example implementation of a protected memory allocator +- for allocation and release of pages of secure memory intended to be used +- by the firmware of Mali GPU device drivers. ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += mmu/backend/mali_kbase_mmu_csf.o ++else ++ mali_kbase-y += mmu/backend/mali_kbase_mmu_jm.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c +new file mode 100644 +index 0000000..05253ae +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c +@@ -0,0 +1,565 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/** ++ * DOC: Base kernel MMU management specific for CSF GPU. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, ++ struct kbase_mmu_setup * const setup) ++{ ++ /* Set up the required caching policies at the correct indices ++ * in the memattr register. ++ */ ++ setup->memattr = ++ (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << ++ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | ++ (AS_MEMATTR_FORCE_TO_CACHE_ALL << ++ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | ++ (AS_MEMATTR_WRITE_ALLOC << ++ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << ++ (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | ++ (AS_MEMATTR_AARCH64_OUTER_WA << ++ (AS_MEMATTR_INDEX_OUTER_WA * 8)) | ++ (AS_MEMATTR_AARCH64_NON_CACHEABLE << ++ (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)) | ++ (AS_MEMATTR_AARCH64_SHARED << ++ (AS_MEMATTR_INDEX_SHARED * 8)); ++ ++ setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; ++ setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; ++} ++ ++/** ++ * submit_work_pagefault() - Submit a work for MMU page fault. ++ * ++ * @kbdev: Kbase device pointer ++ * @as_nr: Faulty address space ++ * @fault: Data relating to the fault ++ * ++ * This function submits a work for reporting the details of MMU fault. ++ */ ++static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, ++ struct kbase_fault *fault) ++{ ++ unsigned long flags; ++ struct kbase_as *const as = &kbdev->as[as_nr]; ++ struct kbase_context *kctx; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ ++ as->pf_data = (struct kbase_fault) { ++ .status = fault->status, ++ .addr = fault->addr, ++ }; ++ ++ /* ++ * A page fault work item could already be pending for the ++ * context's address space, when the page fault occurs for ++ * MCU's address space. ++ */ ++ if (!queue_work(as->pf_wq, &as->work_pagefault)) ++ kbase_ctx_sched_release_ctx(kctx); ++ else { ++ dev_dbg(kbdev->dev, ++ "Page fault is already pending for as %u\n", ++ as_nr); ++ atomic_inc(&kbdev->faults_pending); ++ } ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, ++ struct kbase_fault *fault) ++{ ++ /* decode the fault status */ ++ u32 exception_type = fault->status & 0xFF; ++ u32 access_type = (fault->status >> 8) & 0x3; ++ u32 source_id = (fault->status >> 16); ++ int as_no; ++ ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "Unexpected Page fault in firmware address space at VA 0x%016llX\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n", ++ fault->addr, ++ fault->status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(fault->status), ++ source_id); ++ ++ /* Report MMU fault for all address spaces (except MCU_AS_NR) */ ++ for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) ++ submit_work_pagefault(kbdev, as_no, fault); ++ ++ /* GPU reset is required to recover */ ++ if (kbase_prepare_to_reset_gpu(kbdev, ++ RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) ++ kbase_reset_gpu(kbdev); ++} ++KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); ++ ++void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, struct kbase_fault *fault) ++{ ++ struct kbase_device *kbdev = kctx->kbdev; ++ u32 const status = fault->status; ++ int exception_type = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> ++ GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT; ++ int access_type = (status & GPU_FAULTSTATUS_ACCESS_TYPE_MASK) >> ++ GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; ++ int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >> ++ GPU_FAULTSTATUS_SOURCE_ID_SHIFT; ++ const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ? ++ "true" : "false"; ++ int as_no = as->number; ++ unsigned long flags; ++ ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "GPU bus fault in AS%d at VA 0x%016llX\n" ++ "VA_VALID: %s\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n" ++ "pid: %d\n", ++ as_no, fault->addr, ++ addr_valid, ++ status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(access_type), ++ source_id, ++ kctx->pid); ++ ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ ++ /* Switching to UNMAPPED mode above would have enabled the firmware to ++ * recover from the fault (if the memory access was made by firmware) ++ * and it can then respond to CSG termination requests to be sent now. ++ * All GPU command queue groups associated with the context would be ++ * affected as they use the same GPU address space. ++ */ ++ kbase_csf_ctx_handle_fault(kctx, fault); ++ ++ /* Now clear the GPU fault */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAR_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++/* ++ * The caller must ensure it's retained the ctx to prevent it from being ++ * scheduled out whilst it's being worked on. ++ */ ++void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, ++ struct kbase_as *as, const char *reason_str, ++ struct kbase_fault *fault) ++{ ++ unsigned long flags; ++ unsigned int exception_type; ++ unsigned int access_type; ++ unsigned int source_id; ++ int as_no; ++ struct kbase_device *kbdev; ++ const u32 status = fault->status; ++ ++ as_no = as->number; ++ kbdev = kctx->kbdev; ++ ++ /* Make sure the context was active */ ++ if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) ++ return; ++ ++ /* decode the fault status */ ++ exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); ++ access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); ++ source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); ++ ++ /* terminal fault, print info about the fault */ ++ dev_err(kbdev->dev, ++ "Unhandled Page fault in AS%d at VA 0x%016llX\n" ++ "Reason: %s\n" ++ "raw fault status: 0x%X\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n" ++ "pid: %d\n", ++ as_no, fault->addr, ++ reason_str, ++ status, ++ exception_type, kbase_gpu_exception_name(exception_type), ++ access_type, kbase_gpu_access_type_name(status), ++ source_id, ++ kctx->pid); ++ ++ /* AS transaction begin */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ ++ /* switch to UNMAPPED mode, ++ * will abort all jobs and stop any hw counter dumping ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_disable(kctx); ++ kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ /* AS transaction end */ ++ ++ /* Switching to UNMAPPED mode above would have enabled the firmware to ++ * recover from the fault (if the memory access was made by firmware) ++ * and it can then respond to CSG termination requests to be sent now. ++ * All GPU command queue groups associated with the context would be ++ * affected as they use the same GPU address space. ++ */ ++ kbase_csf_ctx_handle_fault(kctx, fault); ++ ++ /* Clear down the fault */ ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++} ++ ++/** ++ * kbase_mmu_interrupt_process() - Process a bus or page fault. ++ * @kbdev: The kbase_device the fault happened on ++ * @kctx: The kbase_context for the faulting address space if one was ++ * found. ++ * @as: The address space that has the fault ++ * @fault: Data relating to the fault ++ * ++ * This function will process a fault on a specific address space ++ */ ++static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, ++ struct kbase_context *kctx, struct kbase_as *as, ++ struct kbase_fault *fault) ++{ ++ lockdep_assert_held(&kbdev->hwaccess_lock); ++ ++ if (!kctx) { ++ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", ++ kbase_as_has_bus_fault(as, fault) ? ++ "Bus error" : "Page fault", ++ as->number, fault->addr); ++ ++ /* Since no ctx was found, the MMU must be disabled. */ ++ WARN_ON(as->current_setup.transtab); ++ ++ if (kbase_as_has_bus_fault(as, fault)) ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAR_FAULT); ++ else if (kbase_as_has_page_fault(as, fault)) { ++ kbase_mmu_hw_clear_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ kbase_mmu_hw_enable_fault(kbdev, as, ++ KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); ++ } ++ ++ return; ++ } ++ ++ if (kbase_as_has_bus_fault(as, fault)) { ++ /* ++ * We need to switch to UNMAPPED mode - but we do this in a ++ * worker so that we can sleep ++ */ ++ WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); ++ atomic_inc(&kbdev->faults_pending); ++ } else { ++ WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); ++ atomic_inc(&kbdev->faults_pending); ++ } ++} ++ ++int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, ++ u32 status, u32 as_nr) ++{ ++ struct kbase_context *kctx; ++ unsigned long flags; ++ struct kbase_as *as; ++ struct kbase_fault *fault; ++ ++ if (WARN_ON(as_nr == MCU_AS_NR)) ++ return -EINVAL; ++ ++ if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) ++ return -EINVAL; ++ ++ as = &kbdev->as[as_nr]; ++ fault = &as->bf_data; ++ fault->status = status; ++ fault->addr = (u64) kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; ++ fault->addr |= kbase_reg_read(kbdev, ++ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); ++ fault->protected_mode = false; ++ ++ /* report the fault to debugfs */ ++ kbase_as_fault_debugfs_new(kbdev, as_nr); ++ ++ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_nr); ++ ++ /* Process the bus fault interrupt for this address space */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ return 0; ++} ++ ++void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) ++{ ++ const int num_as = 16; ++ const int pf_shift = 0; ++ const unsigned long as_bit_mask = (1UL << num_as) - 1; ++ unsigned long flags; ++ u32 new_mask; ++ u32 tmp; ++ u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask); ++ ++ /* remember current mask */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); ++ /* mask interrupts for now */ ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++ ++ while (pf_bits) { ++ struct kbase_context *kctx; ++ int as_no = ffs(pf_bits) - 1; ++ struct kbase_as *as = &kbdev->as[as_no]; ++ struct kbase_fault *fault = &as->pf_data; ++ ++ /* find faulting address */ ++ fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_HI)); ++ fault->addr <<= 32; ++ fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTADDRESS_LO)); ++ ++ /* Mark the fault protected or not */ ++ fault->protected_mode = false; ++ ++ /* report the fault to debugfs */ ++ kbase_as_fault_debugfs_new(kbdev, as_no); ++ ++ /* record the fault status */ ++ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, ++ AS_FAULTSTATUS)); ++ ++ fault->extra_addr = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); ++ fault->extra_addr <<= 32; ++ fault->extra_addr |= kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); ++ ++ /* Mark page fault as handled */ ++ pf_bits &= ~(1UL << as_no); ++ ++ /* remove the queued PF from the mask */ ++ new_mask &= ~MMU_PAGE_FAULT(as_no); ++ ++ if (as_no == MCU_AS_NR) { ++ kbase_mmu_report_mcu_as_fault_and_reset(kbdev, fault); ++ /* Pointless to handle remaining faults */ ++ break; ++ } ++ ++ /* ++ * Refcount the kctx - it shouldn't disappear anyway, since ++ * Page faults _should_ only occur whilst GPU commands are ++ * executing, and a command causing the Page fault shouldn't ++ * complete until the MMU is updated. ++ * Reference is released at the end of bottom half of page ++ * fault handling. ++ */ ++ kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); ++ ++ /* Process the interrupt for this address space */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_mmu_interrupt_process(kbdev, kctx, as, fault); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ } ++ ++ /* reenable interrupts */ ++ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); ++ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); ++ new_mask |= tmp; ++ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); ++ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); ++} ++ ++int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, ++ struct kbase_va_region *const reg) ++{ ++ /* Can't soft-stop the provoking job */ ++ return -EPERM; ++} ++ ++/** ++ * kbase_mmu_gpu_fault_worker() - Process a GPU fault for the device. ++ * ++ * @data: work_struct passed by queue_work() ++ * ++ * Report a GPU fatal error for all GPU command queue groups that are ++ * using the address space and terminate them. ++ */ ++static void kbase_mmu_gpu_fault_worker(struct work_struct *data) ++{ ++ struct kbase_as *const faulting_as = container_of(data, struct kbase_as, ++ work_gpufault); ++ const u32 as_nr = faulting_as->number; ++ struct kbase_device *const kbdev = container_of(faulting_as, struct ++ kbase_device, as[as_nr]); ++ struct kbase_fault *fault; ++ struct kbase_context *kctx; ++ u32 status; ++ u64 address; ++ u32 as_valid; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ fault = &faulting_as->gf_data; ++ status = fault->status; ++ as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; ++ address = fault->addr; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ dev_warn(kbdev->dev, ++ "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n" ++ "ASID_VALID: %s, ADDRESS_VALID: %s\n", ++ status, ++ kbase_gpu_exception_name( ++ GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), ++ as_nr, address, ++ as_valid ? "true" : "false", ++ status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false"); ++ ++ kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr); ++ kbase_csf_ctx_handle_fault(kctx, fault); ++ kbase_ctx_sched_release_ctx_lock(kctx); ++ ++ atomic_dec(&kbdev->faults_pending); ++ ++ /* A work for GPU fault is complete. ++ * Till reaching here, no further GPU fault will be reported. ++ * Now clear the GPU fault to allow next GPU fault interrupt report. ++ */ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), ++ GPU_COMMAND_CLEAR_FAULT); ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++/** ++ * submit_work_gpufault() - Submit a work for GPU fault. ++ * ++ * @kbdev: Kbase device pointer ++ * @status: GPU fault status ++ * @as_nr: Faulty address space ++ * @address: GPU fault address ++ * ++ * This function submits a work for reporting the details of GPU fault. ++ */ ++static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, u64 address) ++{ ++ unsigned long flags; ++ struct kbase_as *const as = &kbdev->as[as_nr]; ++ struct kbase_context *kctx; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); ++ ++ if (kctx) { ++ kbase_ctx_sched_retain_ctx_refcount(kctx); ++ ++ as->gf_data = (struct kbase_fault) { ++ .status = status, ++ .addr = address, ++ }; ++ ++ if (WARN_ON(!queue_work(as->pf_wq, &as->work_gpufault))) ++ kbase_ctx_sched_release_ctx(kctx); ++ else ++ atomic_inc(&kbdev->faults_pending); ++ } ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++} ++ ++void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, u64 address, bool as_valid) ++{ ++ if (!as_valid || (as_nr == MCU_AS_NR)) { ++ int as; ++ ++ /* Report GPU fault for all contexts (except MCU_AS_NR) in case either ++ * the address space is invalid or it's MCU address space. ++ */ ++ for (as = 1; as < kbdev->nr_hw_address_spaces; as++) ++ submit_work_gpufault(kbdev, status, as, address); ++ } else ++ submit_work_gpufault(kbdev, status, as_nr, address); ++} ++KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); ++ ++int kbase_mmu_as_init(struct kbase_device *kbdev, int i) ++{ ++ kbdev->as[i].number = i; ++ kbdev->as[i].bf_data.addr = 0ULL; ++ kbdev->as[i].pf_data.addr = 0ULL; ++ kbdev->as[i].gf_data.addr = 0ULL; ++ ++ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); ++ if (!kbdev->as[i].pf_wq) ++ return -ENOMEM; ++ ++ INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_gpufault, kbase_mmu_gpu_fault_worker); ++ ++ return 0; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c +index 2d8fb51..01ca419 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,21 +17,18 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * Base kernel MMU management specific for Job Manager GPU. ++ * DOC: Base kernel MMU management specific for Job Manager GPU. + */ + + #include + #include + #include +-#include ++#include + #include +-#include "../mali_kbase_mmu_internal.h" +-#include "mali_kbase_device_internal.h" ++#include + + void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup) +@@ -98,7 +96,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + } + +-/** ++/* + * The caller must ensure it's retained the ctx to prevent it from being + * scheduled out whilst it's being worked on. + */ +@@ -145,6 +143,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + kctx->pid); + + /* hardware counters dump fault handling */ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) { +@@ -153,6 +152,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + kbdev->hwcnt.addr_bytes))) + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + } ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled - this will occur on releasing the context's refcount +@@ -187,14 +187,26 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } + +-void kbase_mmu_interrupt_process(struct kbase_device *kbdev, ++/** ++ * kbase_mmu_interrupt_process() - Process a bus or page fault. ++ * @kbdev: The kbase_device the fault happened on ++ * @kctx: The kbase_context for the faulting address space if one was ++ * found. ++ * @as: The address space that has the fault ++ * @fault: Data relating to the fault ++ * ++ * This function will process a fault on a specific address space ++ */ ++static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) + { ++ unsigned long flags; ++ + lockdep_assert_held(&kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, +- "Entering %s kctx %p, as %p\n", ++ "Entering %s kctx %pK, as %pK\n", + __func__, (void *)kctx, (void *)as); + + if (!kctx) { +@@ -228,11 +240,13 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + * hw counters dumping in progress, signal the + * other thread that it failed + */ ++ spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if ((kbdev->hwcnt.kctx == kctx) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.backend.state = + KBASE_INSTR_STATE_FAULT; ++ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* + * Stop the kctx from submitting more jobs and cause it +@@ -241,14 +255,10 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + +- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) +- dev_warn(kbdev->dev, +- "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", +- as->number, fault->addr, +- fault->extra_addr); +- else +- dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", +- as->number, fault->addr); ++ dev_warn(kbdev->dev, ++ "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", ++ as->number, fault->addr, ++ fault->extra_addr); + + /* + * We need to switch to UNMAPPED mode - but we do this in a +@@ -262,7 +272,7 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + } + + dev_dbg(kbdev->dev, +- "Leaving %s kctx %p, as %p\n", ++ "Leaving %s kctx %pK, as %pK\n", + __func__, (void *)kctx, (void *)as); + } + +@@ -296,7 +306,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) + unsigned long flags; + u32 new_mask; + u32 tmp, bf_bits, pf_bits; +- bool gpu_lost = false; + + dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", + __func__, irq_stat); +@@ -362,22 +371,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) + /* record the fault status */ + fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTSTATUS)); +- +- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { +- fault->extra_addr = kbase_reg_read(kbdev, +- MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); +- fault->extra_addr <<= 32; +- fault->extra_addr |= kbase_reg_read(kbdev, +- MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); +- } +- +- /* check if we still have GPU */ +- gpu_lost = kbase_is_gpu_lost(kbdev); +- if (gpu_lost) { +- if (kctx) +- kbasep_js_runpool_release_ctx(kbdev, kctx); +- return; +- } ++ fault->extra_addr = kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); ++ fault->extra_addr <<= 32; ++ fault->extra_addr |= kbase_reg_read(kbdev, ++ MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + + if (kbase_as_has_bus_fault(as, fault)) { + /* Mark bus fault as handled. +@@ -418,7 +416,23 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, + struct kbase_va_region *const reg) + { + dev_dbg(kctx->kbdev->dev, +- "Switching to incremental rendering for region %p\n", ++ "Switching to incremental rendering for region %pK\n", + (void *)reg); + return kbase_job_slot_softstop_start_rp(kctx, reg); + } ++ ++int kbase_mmu_as_init(struct kbase_device *kbdev, int i) ++{ ++ kbdev->as[i].number = i; ++ kbdev->as[i].bf_data.addr = 0ULL; ++ kbdev->as[i].pf_data.addr = 0ULL; ++ ++ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); ++ if (!kbdev->as[i].pf_wq) ++ return -ENOMEM; ++ ++ INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); ++ INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); ++ ++ return 0; ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c +index c2c7c4b..41ee07f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +- * @file mali_kbase_mmu.c +- * Base kernel MMU management. ++ * DOC: Base kernel MMU management. + */ + + #include +@@ -31,20 +29,20 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include + #include + #include +-#include +-#include + #include + #include + #include + #include + #include ++#include + ++#include + #define KBASE_MMU_PAGE_ENTRIES 512 + + /** +@@ -82,21 +80,20 @@ static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr); + + /** +- * kbase_mmu_sync_pgd - sync page directory to memory ++ * kbase_mmu_sync_pgd() - sync page directory to memory when needed. + * @kbdev: Device pointer. + * @handle: Address of DMA region. + * @size: Size of the region to sync. + * + * This should be called after each page directory update. + */ +- + static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, + dma_addr_t handle, size_t size) + { +- /* If page table is not coherent then ensure the gpu can read ++ /* In non-coherent system, ensure the GPU can read + * the pages from memory + */ +- if (kbdev->system_coherency != COHERENCY_ACE) ++ if (kbdev->system_coherency == COHERENCY_NONE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); + } +@@ -117,7 +114,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + /** + * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to + * a region on a GPU page fault +- * ++ * @kbdev: KBase device + * @reg: The region that will be backed with more pages + * @fault_rel_pfn: PFN of the fault relative to the start of the region + * +@@ -133,23 +130,32 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, + struct kbase_va_region *reg, size_t fault_rel_pfn) + { +- size_t multiple = reg->extent; ++ size_t multiple = reg->extension; + size_t reg_current_size = kbase_reg_current_backed_size(reg); + size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; + size_t remainder; + + if (!multiple) { +- dev_warn(kbdev->dev, +- "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", ++ dev_warn( ++ kbdev->dev, ++ "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); + return minimum_extra; + } + + /* Calculate the remainder to subtract from minimum_extra to make it +- * the desired (rounded down) multiple of the extent. ++ * the desired (rounded down) multiple of the extension. + * Depending on reg's flags, the base used for calculating multiples is + * different + */ ++ ++ /* multiple is based from the current backed size, even if the ++ * current backed size/pfn for end of committed memory are not ++ * themselves aligned to multiple ++ */ ++ remainder = minimum_extra % multiple; ++ ++#if !MALI_USE_CSF + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* multiple is based from the top of the initial commit, which + * has been allocated in such a way that (start_pfn + +@@ -175,13 +181,8 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, + + remainder = pages_after_initial % multiple; + } +- } else { +- /* multiple is based from the current backed size, even if the +- * current backed size/pfn for end of committed memory are not +- * themselves aligned to multiple +- */ +- remainder = minimum_extra % multiple; + } ++#endif /* !MALI_USE_CSF */ + + if (remainder == 0) + return minimum_extra; +@@ -522,10 +523,15 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, + static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) + { ++#if MALI_USE_CSF ++ CSTD_UNUSED(kbdev); ++ kbase_ctx_sched_release_ctx_lock(kctx); ++#else /* MALI_USE_CSF */ + kbasep_js_runpool_release_ctx(kbdev, kctx); ++#endif /* MALI_USE_CSF */ + } + +-void page_fault_worker(struct work_struct *data) ++void kbase_mmu_page_fault_worker(struct work_struct *data) + { + u64 fault_pfn; + u32 fault_status; +@@ -544,7 +550,9 @@ void page_fault_worker(struct work_struct *data) + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; + size_t current_backed_size; +- ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ size_t pages_trimmed = 0; ++#endif + + faulting_as = container_of(data, struct kbase_as, work_pagefault); + fault = &faulting_as->pf_data; +@@ -553,7 +561,7 @@ void page_fault_worker(struct work_struct *data) + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + dev_dbg(kbdev->dev, +- "Entering %s %p, fault_pfn %lld, as_no %d\n", ++ "Entering %s %pK, fault_pfn %lld, as_no %d\n", + __func__, (void *)data, fault_pfn, as_no); + + /* Grab the context that was already refcounted in kbase_mmu_interrupt() +@@ -568,6 +576,21 @@ void page_fault_worker(struct work_struct *data) + + KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++#if !MALI_USE_CSF ++ mutex_lock(&kctx->jctx.lock); ++#endif ++#endif ++ ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* check if we still have GPU */ ++ if (unlikely(kbase_is_gpu_removed(kbdev))) { ++ dev_dbg(kbdev->dev, ++ "%s: GPU has been removed\n", __func__); ++ goto fault_done; ++ } ++#endif ++ + if (unlikely(fault->protected_mode)) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Protected mode fault", fault); +@@ -611,21 +634,13 @@ void page_fault_worker(struct work_struct *data) + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: +- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) +- kbase_mmu_report_fault_and_kill(kctx, faulting_as, +- "Address size fault", fault); +- else +- kbase_mmu_report_fault_and_kill(kctx, faulting_as, +- "Unknown fault code", fault); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Address size fault", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: +- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) +- kbase_mmu_report_fault_and_kill(kctx, faulting_as, +- "Memory attributes fault", fault); +- else +- kbase_mmu_report_fault_and_kill(kctx, faulting_as, +- "Unknown fault code", fault); ++ kbase_mmu_report_fault_and_kill(kctx, faulting_as, ++ "Memory attributes fault", fault); + goto fault_done; + + default: +@@ -691,6 +706,10 @@ page_fault_retry: + goto fault_done; + } + ++ if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == ++ AS_FAULTSTATUS_ACCESS_TYPE_READ) ++ dev_warn(kbdev->dev, "Grow on pagefault while reading"); ++ + /* find the size we need to grow it by + * we know the result fit in a size_t due to + * kbase_region_tracker_find_region_enclosing_address +@@ -758,6 +777,13 @@ page_fault_retry: + + pages_to_grow = 0; + ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { ++ kbase_jit_request_phys_increase(kctx, new_pages); ++ pages_trimmed = new_pages; ++ } ++#endif ++ + spin_lock(&kctx->mem_partials_lock); + grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, + &grow_2mb_pool, prealloc_sas); +@@ -818,7 +844,7 @@ page_fault_retry: + + if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { + dev_dbg(kctx->kbdev->dev, +- "Get region %p for IR\n", ++ "Get region %pK for IR\n", + (void *)region); + kbase_va_region_alloc_get(kctx, region); + } +@@ -872,6 +898,13 @@ page_fault_retry: + } + } + #endif ++ ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (pages_trimmed) { ++ kbase_jit_done_phys_increase(kctx, pages_trimmed); ++ pages_trimmed = 0; ++ } ++#endif + kbase_gpu_vm_unlock(kctx); + } else { + int ret = -ENOMEM; +@@ -918,6 +951,17 @@ page_fault_retry: + } + + fault_done: ++#if MALI_JIT_PRESSURE_LIMIT_BASE ++ if (pages_trimmed) { ++ kbase_gpu_vm_lock(kctx); ++ kbase_jit_done_phys_increase(kctx, pages_trimmed); ++ kbase_gpu_vm_unlock(kctx); ++ } ++#if !MALI_USE_CSF ++ mutex_unlock(&kctx->jctx.lock); ++#endif ++#endif ++ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); + +@@ -928,7 +972,7 @@ fault_done: + release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); +- dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); ++ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); + } + + static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, +@@ -938,7 +982,11 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, + int i; + struct page *p; + ++#ifdef CONFIG_MALI_2MB_ALLOC ++ p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]); ++#else /* CONFIG_MALI_2MB_ALLOC */ + p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); ++#endif /* CONFIG_MALI_2MB_ALLOC */ + if (!p) + return 0; + +@@ -964,6 +1012,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, + + atomic_add(1, &kbdev->memdev.used_pages); + ++ kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); ++ + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) + kbdev->mmu_mode->entry_invalidate(&page[i]); + +@@ -973,8 +1023,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, + return page_to_phys(p); + + alloc_free: +- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, +- false); ++ ++#ifdef CONFIG_MALI_2MB_ALLOC ++ kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], p, false); ++#else /* CONFIG_MALI_2MB_ALLOC */ ++ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); ++#endif /* CONFIG_MALI_2MB_ALLOC */ + + return 0; + } +@@ -1200,7 +1254,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + */ + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow( ++#ifdef CONFIG_MALI_2MB_ALLOC ++ &kbdev->mem_pools.large[ ++#else + &kbdev->mem_pools.small[ ++#endif + kctx->mmu.group_id], + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu.mmu_lock); +@@ -1279,7 +1337,11 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev, + + tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); + tmp_p = phys_to_page(tmp_pgd); ++#ifdef CONFIG_MALI_2MB_ALLOC ++ kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], ++#else + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], ++#endif + tmp_p, false); + + /* If the MMU tables belong to a context then we accounted the memory +@@ -1290,6 +1352,8 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev, + atomic_sub(1, &mmut->kctx->used_pages); + } + atomic_sub(1, &kbdev->memdev.used_pages); ++ ++ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + } + + u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, +@@ -1361,7 +1425,11 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + */ + mutex_unlock(&mmut->mmu_lock); + err = kbase_mem_pool_grow( ++#ifdef CONFIG_MALI_2MB_ALLOC ++ &kbdev->mem_pools.large[mmut->group_id], ++#else + &kbdev->mem_pools.small[mmut->group_id], ++#endif + cur_level); + mutex_lock(&mmut->mmu_lock); + } while (!err); +@@ -1509,7 +1577,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + +- if (kbase_prepare_to_reset_gpu_locked(kbdev)) ++ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } + } +@@ -1522,10 +1590,29 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, + { + int err; + u32 op; ++ bool gpu_powered; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); ++ gpu_powered = kbdev->pm.backend.gpu_powered; ++ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); ++ ++ /* GPU is off so there's no need to perform flush/invalidate. ++ * But even if GPU is not actually powered down, after gpu_powered flag ++ * was set to false, it is still safe to skip the flush/invalidate. ++ * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE ++ * which is sent when address spaces are restored after gpu_powered flag ++ * is set to true. Flushing of L2 cache is certainly not required as L2 ++ * cache is definitely off if gpu_powered is false. ++ */ ++ if (!gpu_powered) ++ return; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { +- /* GPU is off so there's no need to perform flush/invalidate */ ++ /* GPU has just been powered off due to system suspend. ++ * So again, no need to perform flush/invalidate. ++ */ + return; + } + +@@ -1544,9 +1631,10 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover + */ +- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); ++ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + +- if (kbase_prepare_to_reset_gpu(kbdev)) ++ if (kbase_prepare_to_reset_gpu( ++ kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + +@@ -1577,9 +1665,13 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + return; + + kbdev = kctx->kbdev; ++#if !MALI_USE_CSF + mutex_lock(&kbdev->js_data.queue_mutex); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); + mutex_unlock(&kbdev->js_data.queue_mutex); ++#else ++ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); ++#endif /* !MALI_USE_CSF */ + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); +@@ -1621,6 +1713,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); ++ lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + + /* + * The address space is being disabled, drain all knowledge of it out +@@ -1832,7 +1925,11 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + */ + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow( ++#ifdef CONFIG_MALI_2MB_ALLOC ++ &kbdev->mem_pools.large[ ++#else + &kbdev->mem_pools.small[ ++#endif + kctx->mmu.group_id], + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu.mmu_lock); +@@ -1927,8 +2024,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, + } + + p = pfn_to_page(PFN_DOWN(pgd)); +- ++#ifdef CONFIG_MALI_2MB_ALLOC ++ kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], ++#else + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], ++#endif + p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); +@@ -1940,6 +2040,8 @@ static void mmu_teardown_level(struct kbase_device *kbdev, + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } ++ ++ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); + } + + int kbase_mmu_init(struct kbase_device *const kbdev, +@@ -1969,7 +2071,11 @@ int kbase_mmu_init(struct kbase_device *const kbdev, + int err; + + err = kbase_mem_pool_grow( ++#ifdef CONFIG_MALI_2MB_ALLOC ++ &kbdev->mem_pools.large[mmut->group_id], ++#else + &kbdev->mem_pools.small[mmut->group_id], ++#endif + MIDGARD_MMU_BOTTOMLEVEL); + if (err) { + kbase_mmu_term(kbdev, mmut); +@@ -2000,6 +2106,11 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) + mutex_destroy(&mmut->mmu_lock); + } + ++void kbase_mmu_as_term(struct kbase_device *kbdev, int i) ++{ ++ destroy_workqueue(kbdev->as[i].pf_wq); ++} ++ + static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, + int level, char ** const buffer, size_t *size_left) + { +@@ -2140,7 +2251,7 @@ fail_free: + } + KBASE_EXPORT_TEST_API(kbase_mmu_dump); + +-void bus_fault_worker(struct work_struct *data) ++void kbase_mmu_bus_fault_worker(struct work_struct *data) + { + struct kbase_as *faulting_as; + int as_no; +@@ -2168,6 +2279,17 @@ void bus_fault_worker(struct work_struct *data) + return; + } + ++#ifdef CONFIG_MALI_ARBITER_SUPPORT ++ /* check if we still have GPU */ ++ if (unlikely(kbase_is_gpu_removed(kbdev))) { ++ dev_dbg(kbdev->dev, ++ "%s: GPU has been removed\n", __func__); ++ release_ctx(kbdev, kctx); ++ atomic_dec(&kbdev->faults_pending); ++ return; ++ } ++#endif ++ + if (unlikely(fault->protected_mode)) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure", fault); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h +index c9e27b1..a2d1a8e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,35 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_MMU_H_ + #define _KBASE_MMU_H_ + ++/** ++ * kbase_mmu_as_init() - Initialising GPU address space object. ++ * ++ * This is called from device probe to initialise an address space object ++ * of the device. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer). ++ * @i: Array index of address space object. ++ * ++ * Return: 0 on success and non-zero value on failure. ++ */ ++int kbase_mmu_as_init(struct kbase_device *kbdev, int i); ++ ++/** ++ * kbase_mmu_as_term() - Terminate address space object. ++ * ++ * This is called upon device termination to destroy ++ * the address space object of the device. ++ * ++ * @kbdev: The kbase device structure for the device (must be a valid pointer). ++ * @i: Array index of address space object. ++ */ ++void kbase_mmu_as_term(struct kbase_device *kbdev, int i); ++ + /** + * kbase_mmu_init - Initialise an object representing GPU page tables + * +@@ -115,4 +138,18 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, + int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, + u32 as_nr); + ++/** ++ * kbase_mmu_gpu_fault_interrupt() - Report a GPU fault. ++ * @kbdev: Kbase device pointer ++ * @status: GPU fault status ++ * @as_nr: Faulty address space ++ * @address: GPU fault address ++ * @as_valid: true if address space is valid ++ * ++ * This function builds GPU fault information to submit a work ++ * for reporting the details of the fault. ++ */ ++void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, ++ u32 as_nr, u64 address, bool as_valid); ++ + #endif /* _KBASE_MMU_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h +index e6eef86..d1f1ff2 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +@@ -39,6 +38,11 @@ struct kbase_context; + + /** + * enum kbase_mmu_fault_type - MMU fault type descriptor. ++ * @KBASE_MMU_FAULT_TYPE_UNKNOWN: unknown fault ++ * @KBASE_MMU_FAULT_TYPE_PAGE: page fault ++ * @KBASE_MMU_FAULT_TYPE_BUS: nus fault ++ * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault ++ * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED: bus_unexpected fault + */ + enum kbase_mmu_fault_type { + KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c +index f22e73e..a99b988 100755 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -25,8 +24,7 @@ + #include + #include + #include +-#include +-#include ++#include + + /** + * lock_region() - Generate lockaddr to lock memory region in MMU +@@ -126,38 +124,33 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) + struct kbase_mmu_setup *current_setup = &as->current_setup; + u64 transcfg = 0; + +- if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { +- transcfg = current_setup->transcfg; ++ transcfg = current_setup->transcfg; + +- /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK +- * Clear PTW_MEMATTR bits +- */ +- transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; +- /* Enable correct PTW_MEMATTR bits */ +- transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; +- /* Ensure page-tables reads use read-allocate cache-policy in +- * the L2 +- */ +- transcfg |= AS_TRANSCFG_R_ALLOCATE; +- +- if (kbdev->system_coherency == COHERENCY_ACE) { +- /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) +- * Clear PTW_SH bits +- */ +- transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); +- /* Enable correct PTW_SH bits */ +- transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); +- } ++ /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK ++ * Clear PTW_MEMATTR bits ++ */ ++ transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; ++ /* Enable correct PTW_MEMATTR bits */ ++ transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; ++ /* Ensure page-tables reads use read-allocate cache-policy in ++ * the L2 ++ */ ++ transcfg |= AS_TRANSCFG_R_ALLOCATE; + +- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), +- transcfg); +- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), +- (transcfg >> 32) & 0xFFFFFFFFUL); +- } else { +- if (kbdev->system_coherency == COHERENCY_ACE) +- current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; ++ if (kbdev->system_coherency != COHERENCY_NONE) { ++ /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) ++ * Clear PTW_SH bits ++ */ ++ transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); ++ /* Enable correct PTW_SH bits */ ++ transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), ++ transcfg); ++ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), ++ (transcfg >> 32) & 0xFFFFFFFFUL); ++ + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), + current_setup->transtab & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), +@@ -230,10 +223,11 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + + /* Clear the page (and bus fault IRQ as well in case one occurred) */ + pf_bf_mask = MMU_PAGE_FAULT(as->number); ++#if !MALI_USE_CSF + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + pf_bf_mask |= MMU_BUS_ERROR(as->number); +- ++#endif + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); + + unlock: +@@ -261,10 +255,11 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | + MMU_PAGE_FAULT(as->number); + ++#if !MALI_USE_CSF + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + irq_mask |= MMU_BUS_ERROR(as->number); +- ++#endif + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); + + unlock: +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h +index 28bd341..b8cd55f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KBASE_MMU_INTERNAL_H_ +@@ -26,6 +25,15 @@ + void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup); + ++/** ++ * kbase_mmu_report_mcu_as_fault_and_reset - Report page fault for all ++ * address spaces and reset the GPU. ++ * @kbdev: The kbase_device the fault happened on ++ * @fault: Data relating to the fault ++ */ ++void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, ++ struct kbase_fault *fault); ++ + void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault); + +@@ -33,24 +41,11 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault); + +-/** +- * kbase_mmu_interrupt_process - Process a bus or page fault. +- * @kbdev The kbase_device the fault happened on +- * @kctx The kbase_context for the faulting address space if one was found. +- * @as The address space that has the fault +- * @fault Data relating to the fault +- * +- * This function will process a fault on a specific address space +- */ +-void kbase_mmu_interrupt_process(struct kbase_device *kbdev, +- struct kbase_context *kctx, struct kbase_as *as, +- struct kbase_fault *fault); +- + /** + * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible +- * @kctx The kbase_context for the faulting address space. +- * @reg Reference of a growable GPU memory region in the same context. +- * Takes ownership of the reference if successful. ++ * @kctx: kbase_context for the faulting address space. ++ * @reg: of a growable GPU memory region in the same context. ++ * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region. +@@ -60,4 +55,18 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + int kbase_mmu_switch_to_ir(struct kbase_context *kctx, + struct kbase_va_region *reg); + ++/** ++ * kbase_mmu_page_fault_worker() - Process a page fault. ++ * ++ * @data: work_struct passed by queue_work() ++ */ ++void kbase_mmu_page_fault_worker(struct work_struct *data); ++ ++/** ++ * kbase_mmu_bus_fault_worker() - Process a bus fault. ++ * ++ * @data: work_struct passed by queue_work() ++ */ ++void kbase_mmu_bus_fault_worker(struct work_struct *data); ++ + #endif /* _KBASE_MMU_INTERNAL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c +index 02493e9..16b928d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2014, 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase.h" +@@ -48,25 +47,7 @@ + */ + static inline void page_table_entry_set(u64 *pte, u64 phy) + { +-#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE + WRITE_ONCE(*pte, phy); +-#else +-#ifdef CONFIG_64BIT +- barrier(); +- *pte = phy; +- barrier(); +-#elif defined(CONFIG_ARM) +- barrier(); +- asm volatile("ldrd r0, [%1]\n\t" +- "strd r0, %0\n\t" +- : "=m" (*pte) +- : "r" (&phy) +- : "r0", "r1"); +- barrier(); +-#else +-#error "64-bit atomic write must be implemented for your architecture" +-#endif +-#endif + } + + static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c +deleted file mode 100644 +index 91a2d7a..0000000 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c ++++ /dev/null +@@ -1,215 +0,0 @@ +-/* +- * +- * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- */ +- +- +-#include "mali_kbase.h" +-#include +-#include "mali_kbase_defs.h" +- +-#define ENTRY_TYPE_MASK 3ULL +-#define ENTRY_IS_ATE 1ULL +-#define ENTRY_IS_INVAL 2ULL +-#define ENTRY_IS_PTE 3ULL +- +-#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +-#define ENTRY_RD_BIT (1ULL << 6) +-#define ENTRY_WR_BIT (1ULL << 7) +-#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +-#define ENTRY_ACCESS_BIT (1ULL << 10) +-#define ENTRY_NX_BIT (1ULL << 54) +- +-#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ +- ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) +- +-/* Helper Function to perform assignment of page table entries, to +- * ensure the use of strd, which is required on LPAE systems. +- */ +-static inline void page_table_entry_set(u64 *pte, u64 phy) +-{ +-#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE +- WRITE_ONCE(*pte, phy); +-#else +-#ifdef CONFIG_64BIT +- barrier(); +- *pte = phy; +- barrier(); +-#elif defined(CONFIG_ARM) +- barrier(); +- asm volatile("ldrd r0, [%1]\n\t" +- "strd r0, %0\n\t" +- : "=m" (*pte) +- : "r" (&phy) +- : "r0", "r1"); +- barrier(); +-#else +-#error "64-bit atomic write must be implemented for your architecture" +-#endif +-#endif +-} +- +-static void mmu_get_as_setup(struct kbase_mmu_table *mmut, +- struct kbase_mmu_setup * const setup) +-{ +- /* Set up the required caching policies at the correct indices +- * in the memattr register. +- */ +- setup->memattr = +- (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << +- (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | +- (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << +- (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | +- (AS_MEMATTR_LPAE_WRITE_ALLOC << +- (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | +- (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << +- (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | +- (AS_MEMATTR_LPAE_OUTER_WA << +- (AS_MEMATTR_INDEX_OUTER_WA * 8)) | +- 0; /* The other indices are unused for now */ +- +- setup->transtab = ((u64)mmut->pgd & +- ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | +- AS_TRANSTAB_LPAE_ADRMODE_TABLE | +- AS_TRANSTAB_LPAE_READ_INNER; +- +- setup->transcfg = 0; +-} +- +-static void mmu_update(struct kbase_device *kbdev, +- struct kbase_mmu_table *mmut, +- int as_nr) +-{ +- struct kbase_as *as; +- struct kbase_mmu_setup *current_setup; +- +- if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) +- return; +- +- as = &kbdev->as[as_nr]; +- current_setup = &as->current_setup; +- +- mmu_get_as_setup(mmut, current_setup); +- +- /* Apply the address space setting */ +- kbase_mmu_hw_configure(kbdev, as); +-} +- +-static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +-{ +- struct kbase_as * const as = &kbdev->as[as_nr]; +- struct kbase_mmu_setup * const current_setup = &as->current_setup; +- +- current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; +- +- /* Apply the address space setting */ +- kbase_mmu_hw_configure(kbdev, as); +-} +- +-static phys_addr_t pte_to_phy_addr(u64 entry) +-{ +- if (!(entry & 1)) +- return 0; +- +- return entry & ~0xFFF; +-} +- +-static int ate_is_valid(u64 ate, int const level) +-{ +- return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); +-} +- +-static int pte_is_valid(u64 pte, int const level) +-{ +- return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +-} +- +-/* +- * Map KBASE_REG flags to MMU flags +- */ +-static u64 get_mmu_flags(unsigned long flags) +-{ +- u64 mmu_flags; +- unsigned long memattr_idx; +- +- memattr_idx = KBASE_REG_MEMATTR_VALUE(flags); +- if (WARN(memattr_idx == AS_MEMATTR_INDEX_NON_CACHEABLE, +- "Legacy Mode MMU cannot honor GPU non-cachable memory, will use default instead\n")) +- memattr_idx = AS_MEMATTR_INDEX_DEFAULT; +- /* store mem_attr index as 4:2, noting that: +- * - macro called above ensures 3 bits already +- * - all AS_MEMATTR_INDEX_<...> macros only use 3 bits +- */ +- mmu_flags = memattr_idx << 2; +- +- /* write perm if requested */ +- mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; +- /* read perm if requested */ +- mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; +- /* nx if requested */ +- mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; +- +- if (flags & KBASE_REG_SHARE_BOTH) { +- /* inner and outer shareable */ +- mmu_flags |= SHARE_BOTH_BITS; +- } else if (flags & KBASE_REG_SHARE_IN) { +- /* inner shareable coherency */ +- mmu_flags |= SHARE_INNER_BITS; +- } +- +- return mmu_flags; +-} +- +-static void entry_set_ate(u64 *entry, +- struct tagged_addr phy, +- unsigned long flags, +- int const level) +-{ +- page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) | +- ENTRY_IS_ATE); +-} +- +-static void entry_set_pte(u64 *entry, phys_addr_t phy) +-{ +- page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); +-} +- +-static void entry_invalidate(u64 *entry) +-{ +- page_table_entry_set(entry, ENTRY_IS_INVAL); +-} +- +-static struct kbase_mmu_mode const lpae_mode = { +- .update = mmu_update, +- .get_as_setup = mmu_get_as_setup, +- .disable_as = mmu_disable_as, +- .pte_to_phy_addr = pte_to_phy_addr, +- .ate_is_valid = ate_is_valid, +- .pte_is_valid = pte_is_valid, +- .entry_set_ate = entry_set_ate, +- .entry_set_pte = entry_set_pte, +- .entry_invalidate = entry_invalidate, +- .flags = 0 +-}; +- +-struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) +-{ +- return &lpae_mode; +-} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig +index ef9fb96..5f0118d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,11 +16,7 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- +- + + # Add your platform specific Kconfig file here + # +@@ -27,4 +24,3 @@ + # + # Where xxx is the platform name is the name set in MALI_PLATFORM_NAME + # +- +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +old mode 100644 +new mode 100755 +index 0a82eaf..4f3fc0d +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild.rej b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild.rej +new file mode 100644 +index 0000000..dbc2029 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild.rej +@@ -0,0 +1,17 @@ ++--- drivers/gpu/arm/midgard/platform/devicetree/Kbuild +++++ drivers/gpu/arm/midgard/platform/devicetree/Kbuild ++@@ -16,10 +17,10 @@ ++ # along with this program; if not, you can access it online at ++ # http://www.gnu.org/licenses/gpl-2.0.html. ++ # ++-# SPDX-License-Identifier: GPL-2.0 ++-# ++ # ++ ++ mali_kbase-y += \ ++- $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ ++- $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o +++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ +++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_platform.o \ +++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ +++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c +new file mode 100644 +index 0000000..4bcd585 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c +@@ -0,0 +1,105 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2015, 2017-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include "mali_kbase_config_platform.h" ++ ++#if MALI_USE_CSF ++#include ++#endif ++ ++static void *enumerate_gpu_clk(struct kbase_device *kbdev, ++ unsigned int index) ++{ ++ if (index >= kbdev->nr_clocks) ++ return NULL; ++ ++#if MALI_USE_CSF ++ if (of_machine_is_compatible("arm,juno")) ++ WARN_ON(kbdev->nr_clocks != 1); ++#endif ++ ++ return kbdev->clocks[index]; ++} ++ ++static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, ++ void *gpu_clk_handle) ++{ ++#if MALI_USE_CSF ++ /* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at ++ * the boot time. Then after the first call to kbase_devfreq_target() ++ * the clock rate is reported as 450 MHZ and the frequency does not ++ * change after that. But the actual frequency at which GPU operates ++ * is always 50 MHz, which is equal to the frequency of system counter ++ * and HW counters also increment at the same rate. ++ * DVFS, which is a client of kbase_ipa_control, needs normalization of ++ * GPU_ACTIVE counter to calculate the time for which GPU has been busy. ++ * So for the correct normalization need to return the system counter ++ * frequency value. ++ * This is a reasonable workaround as the frequency value remains same ++ * throughout. It can be removed after GPUCORE-25693. ++ */ ++ if (of_machine_is_compatible("arm,juno")) ++ return arch_timer_get_cntfrq(); ++#endif ++ ++ return clk_get_rate((struct clk *)gpu_clk_handle); ++} ++ ++static int gpu_clk_notifier_register(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) ++{ ++ compiletime_assert(offsetof(struct clk_notifier_data, clk) == ++ offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), ++ "mismatch in the offset of clk member"); ++ ++ compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == ++ sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), ++ "mismatch in the size of clk member"); ++ ++#if MALI_USE_CSF ++ /* Frequency is fixed on Juno platforms */ ++ if (of_machine_is_compatible("arm,juno")) ++ return 0; ++#endif ++ ++ return clk_notifier_register((struct clk *)gpu_clk_handle, nb); ++} ++ ++static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, ++ void *gpu_clk_handle, struct notifier_block *nb) ++{ ++#if MALI_USE_CSF ++ if (of_machine_is_compatible("arm,juno")) ++ return; ++#endif ++ ++ clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); ++} ++ ++struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { ++ .get_gpu_clk_rate = get_gpu_clk_rate, ++ .enumerate_gpu_clk = enumerate_gpu_clk, ++ .gpu_clk_notifier_register = gpu_clk_notifier_register, ++ .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, ++}; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +old mode 100644 +new mode 100755 +index 55a50c4..d729ffb +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +@@ -1,20 +1,28 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* +- * mali_kbase_config_devicetree.c + * +- * Copyright (C) 2017 Amlogic, Inc. All rights reserved. ++ * (C) COPYRIGHT 2015, 2017, 2020-2021 ARM Limited. All rights reserved. + * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * +- * This program is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +- * more details. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + ++#include ++#include ++#include ++ + #ifdef CONFIG_DEVFREQ_THERMAL + #include + #include +@@ -82,3 +90,14 @@ void kbase_platform_unregister(void) + { + } + #endif ++ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif ++{ ++ return 1; ++} ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c +new file mode 100644 +index 0000000..63aa33f +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c +@@ -0,0 +1,43 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++#include "mali_kbase_config_platform.h" ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct kbase_platform_funcs_conf platform_funcs = { ++ .platform_init_func = NULL, ++ .platform_term_func = NULL, ++ .platform_late_init_func = NULL, ++ .platform_late_term_func = NULL, ++}; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +old mode 100644 +new mode 100755 +index 233a18e..6cb3b53 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +@@ -1,45 +1,22 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* +- * mali_kbase_config_platform.h + * +- * Copyright (C) 2017 Amlogic, Inc. All rights reserved. ++ * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. + * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * +- * This program is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +- * more details. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. + * +- */ +- +-/** +- * Maximum frequency GPU will be clocked at. Given in kHz. +- * This must be specified as there is no default value. +- * +- * Attached value: number in kHz +- * Default value: NA +- */ +-#define GPU_FREQ_KHZ_MAX (750000) +-/** +- * Minimum frequency GPU will be clocked at. Given in kHz. +- * This must be specified as there is no default value. ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * Attached value: number in kHz +- * Default value: NA +- */ +-#define GPU_FREQ_KHZ_MIN (100000) +- +-/** +- * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock +- * +- * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func +- * for the function prototype. +- * +- * Attached value: A kbase_cpu_clk_speed_func. +- * Default Value: NA + */ + #define CPU_SPEED_FUNC (NULL) + +@@ -81,7 +58,12 @@ extern struct devfreq_cooling_ops t83x_model_ops; + #else + #define POWER_MODEL_CALLBACKS (NULL) + #endif ++ ++#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) ++ + extern struct kbase_pm_callback_conf pm_callbacks; ++extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; ++extern struct kbase_platform_funcs_conf platform_funcs; + + void mali_dev_freeze(void); + void mali_dev_restore(void); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h.rej b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h.rej +new file mode 100644 +index 0000000..277acaa +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h.rej +@@ -0,0 +1,42 @@ ++--- drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +++++ drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h ++@@ -1,11 +1,12 @@ +++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++ /* ++ * ++- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. +++ * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++- * of such GNU licence. +++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++@@ -16,8 +17,6 @@ ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++- * SPDX-License-Identifier: GPL-2.0 ++- * ++ */ ++ ++ /** ++@@ -34,10 +33,13 @@ ++ * Attached value: pointer to @ref kbase_platform_funcs_conf ++ * Default value: See @ref kbase_platform_funcs_conf ++ */ ++-#define PLATFORM_FUNCS (NULL) +++#define PLATFORM_FUNCS (&platform_funcs) ++ ++-extern struct kbase_pm_callback_conf pm_callbacks; +++#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) ++ +++extern struct kbase_pm_callback_conf pm_callbacks; +++extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; +++extern struct kbase_platform_funcs_conf platform_funcs; ++ /** ++ * Autosuspend delay ++ * +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +old mode 100644 +new mode 100755 +index ee18718..4a0b72c +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +@@ -1,21 +1,24 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* +- * mali_kbase_runtime_pm.c + * +- * Copyright (C) 2017 Amlogic, Inc. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. + * +- * This program is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +- * more details. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +-//#define DEBUG + #include + #include + #include +@@ -25,6 +28,7 @@ + #include + #include + #include ++ + #include "mali_kbase_config_platform.h" + #include "mali_scaling.h" + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild +index 6780e4c..b547366 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,8 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + mali_kbase-y += \ +- $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ +- mali_kbase_platform_fake.o ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ ++ mali_kbase_platform_fake.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +index fac3cd5..d8682db 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +index d165ce2..ff1ee65 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,18 +17,16 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include + #include + #include + #include + #include "mali_kbase_config_platform.h" + ++#include ++ + #ifndef CONFIG_OF + static struct kbase_io_resources io_resources = { + .job_irq_number = 68, +@@ -67,3 +66,14 @@ struct kbase_platform_config *kbase_get_platform_config(void) + { + return &versatile_platform_config; + } ++ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif ++{ ++ return 1; ++} ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild +index 51b408e..b547366 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,10 +16,8 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + mali_kbase-y += \ +- $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ +- mali_kbase_platform_fake.o ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ ++ mali_kbase_platform_fake.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +index fac3cd5..d8682db 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +index efca0a5..fee6a36 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -25,6 +24,8 @@ + #include + #include + ++#include ++ + #ifndef CONFIG_OF + static struct kbase_io_resources io_resources = { + .job_irq_number = 68, +@@ -63,3 +64,14 @@ struct kbase_platform_config *kbase_get_platform_config(void) + { + return &versatile_platform_config; + } ++ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif ++{ ++ return 1; ++} ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild +index e07709c..bb92c47 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2012-2013, 2016-2017, 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,11 +16,9 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + + mali_kbase-y += \ +- $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ +- $(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ +- mali_kbase_platform_fake.o ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ ++ platform/$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ ++ mali_kbase_platform_fake.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +index fac3cd5..d8682db 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /** +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +index b6714b9..f6fb9aa 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,17 +17,15 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + #include + #include + #include + #include + ++#include ++ + #ifndef CONFIG_OF + static struct kbase_io_resources io_resources = { + .job_irq_number = 75, +@@ -65,3 +64,14 @@ struct kbase_platform_config *kbase_get_platform_config(void) + { + return &versatile_platform_config; + } ++ ++#ifdef CONFIG_MALI_MIDGARD_DVFS ++#if MALI_USE_CSF ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) ++#else ++int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) ++#endif ++{ ++ return 1; ++} ++#endif /* CONFIG_MALI_MIDGARD_DVFS */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h +index 8778d81..9dd9253 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _PROTECTED_MODE_SWITCH_H_ +@@ -28,35 +27,23 @@ struct protected_mode_device; + /** + * struct protected_mode_ops - Callbacks for protected mode switch operations + * +- * @protected_mode_enable: Callback to enable protected mode for device ++ * @protected_mode_enable: Callback to enable protected mode for device, and ++ * reset device ++ * Returns 0 on success, non-zero on error + * @protected_mode_disable: Callback to disable protected mode for device ++ * Returns 0 on success, non-zero on error + */ + struct protected_mode_ops { +- /** +- * protected_mode_enable() - Enable protected mode on device +- * @dev: The struct device +- * +- * Return: 0 on success, non-zero on error +- */ + int (*protected_mode_enable)( + struct protected_mode_device *protected_dev); +- +- /** +- * protected_mode_disable() - Disable protected mode on device, and +- * reset device +- * @dev: The struct device +- * +- * Return: 0 on success, non-zero on error +- */ + int (*protected_mode_disable)( + struct protected_mode_device *protected_dev); + }; + + /** + * struct protected_mode_device - Device structure for protected mode devices +- * +- * @ops - Callbacks associated with this device +- * @data - Pointer to device private data ++ * @ops: Callbacks associated with this device ++ * @data: Pointer to device private data + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild +index df16a77..ee3de7b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,9 +16,15 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + ++ccflags-y += -I$(src)/include \ ++ -I$(src) ++ ++subdir-ccflags-y += -I$(src)/include \ ++ -I$(src) ++ + obj-$(CONFIG_MALI_KUTF) += kutf/ +-obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ ++obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ ++obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ ++ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig +index fa91aea..a86e1ce 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,9 +16,42 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + +-source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" +-source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" ++menuconfig MALI_KUTF ++ bool "Build Mali Kernel Unit Test Framework modules" ++ depends on MALI_MIDGARD && MALI_DEBUG ++ default y if MALI_DEBUG ++ help ++ This option will build the Mali testing framework modules. ++ ++ Modules: ++ - kutf.ko ++ - kutf_test.ko ++ ++config MALI_KUTF_IRQ_TEST ++ bool "Build Mali KUTF IRQ test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the IRQ latency measurement test module. ++ It can determine the latency of the Mali GPU IRQ on your system. ++ ++ Modules: ++ - mali_kutf_irq_test.ko ++ ++config MALI_KUTF_CLK_RATE_TRACE ++ bool "Build Mali KUTF Clock rate trace test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the clock rate trace portal test module. ++ It can test the clocks integration into the platform and exercise some ++ basic trace test in the system. ++ ++ Modules: ++ - mali_kutf_clk_rate_trace_test_portal.ko ++ ++ ++comment "Enable MALI_DEBUG for KUTF modules support" ++ depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig +index be3fedb..167facd 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig +@@ -1,38 +1,61 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. + # +-# A copy of the licence is included with the program, and can also be obtained +-# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +-# Boston, MA 02110-1301, USA. + # + +-config UNIT_TEST_KERNEL_MODULES +- bool +- default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES +- default n +- +-config BUILD_IPA_TESTS +- bool +- default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ +- default n +- +-config BUILD_IPA_UNIT_TESTS +- bool +- default y if NO_MALI && BUILD_IPA_TESTS +- default n +- +-config BUILD_CSF_TESTS +- bool +- default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF +- default n +- +-config BUILD_ARBIF_TESTS +- bool +- default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT +- default n ++menuconfig MALI_KUTF ++ bool "Build Mali Kernel Unit Test Framework modules" ++ depends on MALI_MIDGARD && MALI_DEBUG ++ default y if BACKEND_KERNEL && MALI_DEBUG ++ help ++ This option will build the Mali testing framework modules. ++ ++ Modules: ++ - kutf.ko ++ - kutf_test.ko ++ ++config MALI_KUTF_IRQ_TEST ++ bool "Build Mali KUTF IRQ test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the IRQ latency measurement test module. ++ It can determine the latency of the Mali GPU IRQ on your system. ++ ++ Modules: ++ - mali_kutf_irq_test.ko + ++config MALI_KUTF_CLK_RATE_TRACE ++ bool "Build Mali KUTF Clock rate trace test module" ++ depends on MALI_KUTF ++ default y ++ help ++ This option will build the clock rate trace portal test module. ++ It can test the clocks integration into the platform and exercise some ++ basic trace test in the system. ++ ++ Modules: ++ - mali_kutf_clk_rate_trace_test_portal.ko ++ ++ ++# Enable MALI_DEBUG for KUTF modules support ++ ++config UNIT_TEST_KERNEL_MODULES ++ bool ++ default y if UNIT_TEST_CODE && BACKEND_KERNEL ++ default n +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp +new file mode 100644 +index 0000000..9d6137d +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp +@@ -0,0 +1,40 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++bob_defaults { ++ name: "kernel_test_includes", ++ local_include_dirs: [ ++ "include", ++ "./../../", ++ "./../", ++ "./" ++ ], ++} ++ ++bob_defaults { ++ name: "kernel_test_configs", ++ mali_kutf: { ++ kbuild_options: ["CONFIG_MALI_KUTF=y"], ++ }, ++ unit_test_kernel_modules: { ++ kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"], ++ }, ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h +index 15e168c..c4c713c 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KERNEL_UTF_HELPERS_H_ +@@ -33,6 +32,14 @@ + + #include + ++/** ++ * kutf_helper_pending_input() - Check any pending lines sent by user space ++ * @context: KUTF context ++ * ++ * Return: true if there are pending lines, otherwise false ++ */ ++bool kutf_helper_pending_input(struct kutf_context *context); ++ + /** + * kutf_helper_input_dequeue() - Dequeue a line sent by user space + * @context: KUTF context +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h +index 3b1300e..e147cbb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KERNEL_UTF_HELPERS_USER_H_ +@@ -63,7 +62,8 @@ struct kutf_helper_named_val { + * unrecoverable) + * + * Positive values indicate correct access but invalid parsing (can be +- * recovered from assuming data in the future is correct) */ ++ * recovered from assuming data in the future is correct) ++ */ + enum kutf_helper_err { + /* No error - must be zero */ + KUTF_HELPER_ERR_NONE = 0, +@@ -71,14 +71,16 @@ enum kutf_helper_err { + KUTF_HELPER_ERR_INVALID_NAME, + /* Named value parsing of string or u64 type encountered extra + * characters after the value (after the last digit for a u64 type or +- * after the string end delimiter for string type) */ ++ * after the string end delimiter for string type) ++ */ + KUTF_HELPER_ERR_CHARS_AFTER_VAL, + /* Named value parsing of string type couldn't find the string end + * delimiter. + * + * This cannot be encountered when the NAME="value" message exceeds the + * textbuf's maximum line length, because such messages are not checked +- * for an end string delimiter */ ++ * for an end string delimiter ++ */ + KUTF_HELPER_ERR_NO_END_DELIMITER, + /* Named value didn't parse as any of the known types */ + KUTF_HELPER_ERR_INVALID_VALUE, +@@ -122,7 +124,8 @@ int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); + * + * Any failure will be logged on the suite's current test fixture + * +- * Returns 0 on success, non-zero on failure */ ++ * Returns 0 on success, non-zero on failure ++ */ + int kutf_helper_send_named_str(struct kutf_context *context, + const char *val_name, const char *val_str); + +@@ -138,7 +141,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, + * + * Returns 0 on success. Negative value on failure to receive from the 'run' + * file, positive value indicates an enum kutf_helper_err value for correct +- * reception of data but invalid parsing */ ++ * reception of data but invalid parsing ++ */ + int kutf_helper_receive_named_val( + struct kutf_context *context, + struct kutf_helper_named_val *named_val); +@@ -165,7 +169,8 @@ int kutf_helper_receive_named_val( + * - return value will be 0 to indicate success + * + * The rationale behind this is that we'd prefer to continue the rest of the +- * test with failures propagated, rather than hitting a timeout */ ++ * test with failures propagated, rather than hitting a timeout ++ */ + int kutf_helper_receive_check_val( + struct kutf_helper_named_val *named_val, + struct kutf_context *context, +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h +index 988559d..5d4d96e 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KERNEL_UTF_MEM_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h +index 49ebeb4..2fb1a47 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KERNEL_UTF_RESULTSET_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h +index 8d75f50..b9c333b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KERNEL_UTF_SUITE_H_ +@@ -264,9 +263,10 @@ struct kutf_suite { + struct list_head test_list; + }; + +-/* ============================================================================ +- Application functions +-============================================================================ */ ++/** =========================================================================== ++ * Application functions ++ * ============================================================================ ++ */ + + /** + * kutf_create_application() - Create an in kernel test application. +@@ -284,9 +284,10 @@ struct kutf_application *kutf_create_application(const char *name); + */ + void kutf_destroy_application(struct kutf_application *app); + +-/* ============================================================================ +- Suite functions +-============================================================================ */ ++/**============================================================================ ++ * Suite functions ++ * ============================================================================ ++ */ + + /** + * kutf_create_suite() - Create a kernel test suite. +@@ -416,10 +417,10 @@ void kutf_add_test_with_filters_and_data( + unsigned int filters, + union kutf_callback_data test_data); + +- +-/* ============================================================================ +- Test functions +-============================================================================ */ ++/** =========================================================================== ++ * Test functions ++ * ============================================================================ ++ */ + /** + * kutf_test_log_result_external() - Log a result which has been created + * externally into a in a standard form +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h +index 25b8285..18dcc3d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _KERNEL_UTF_UTILS_H_ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild +index 2531d41..c4790bc 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,12 +16,16 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- +-ccflags-y += -I$(src)/../include + +-obj-$(CONFIG_MALI_KUTF) += kutf.o ++ifeq ($(CONFIG_MALI_KUTF),y) ++obj-m += kutf.o + +-kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o ++kutf-y := \ ++ kutf_mem.o \ ++ kutf_resultset.o \ ++ kutf_suite.o \ ++ kutf_utils.o \ ++ kutf_helpers.o \ ++ kutf_helpers_user.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp +index 32eab14..89edae9 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp +@@ -1,23 +1,30 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + + bob_kernel_module { + name: "kutf", + defaults: [ +- "kernel_defaults", +- "kutf_includes", ++ "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", ++ "kernel_test_includes", + ], + srcs: [ + "Kbuild", +@@ -28,9 +35,8 @@ bob_kernel_module { + "kutf_suite.c", + "kutf_utils.c", + ], +- kbuild_options: ["CONFIG_MALI_KUTF=m"], + enabled: false, +- base_build_kutf: { ++ mali_kutf: { + enabled: true, + }, + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c +index cab5add..c075428 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* Kernel UTF test helpers */ +@@ -29,10 +28,11 @@ + #include + #include + #include ++#include + + static DEFINE_SPINLOCK(kutf_input_lock); + +-static bool pending_input(struct kutf_context *context) ++bool kutf_helper_pending_input(struct kutf_context *context) + { + bool input_pending; + +@@ -44,6 +44,7 @@ static bool pending_input(struct kutf_context *context) + + return input_pending; + } ++EXPORT_SYMBOL(kutf_helper_pending_input); + + char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) + { +@@ -59,7 +60,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) + spin_unlock(&kutf_input_lock); + + err = wait_event_interruptible(context->userdata.input_waitq, +- pending_input(context)); ++ kutf_helper_pending_input(context)); + + if (err) + return ERR_PTR(-EINTR); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c +index 108fa82..a8b59f7 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* Kernel UTF test helpers that mirror those for kutf-userside */ +@@ -42,7 +41,8 @@ static const char *get_val_type_name(enum kutf_helper_valtype valtype) + * a) "<0 comparison on unsigned type" warning - if we did both upper + * and lower bound check + * b) incorrect range checking if it was a signed type - if we did +- * upper bound check only */ ++ * upper bound check only ++ */ + unsigned int type_idx = (unsigned int)valtype; + + if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) +@@ -54,7 +54,8 @@ static const char *get_val_type_name(enum kutf_helper_valtype valtype) + /* Check up to str_len chars of val_str to see if it's a valid value name: + * + * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator +- * - And, each char is in the character set [A-Z0-9_] */ ++ * - And, each char is in the character set [A-Z0-9_] ++ */ + static int validate_val_name(const char *val_str, int str_len) + { + int i = 0; +@@ -87,7 +88,8 @@ static int validate_val_name(const char *val_str, int str_len) + * e.g. "str" + * + * That is, before any '\\', '\n' or '"' characters. This is so we don't have +- * to escape the string */ ++ * to escape the string ++ */ + static int find_quoted_string_valid_len(const char *str) + { + char *ptr; +@@ -207,7 +209,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, + str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; + + /* Using kmalloc() here instead of mempool since we know we need to free +- * before we return */ ++ * before we return ++ */ + str_buf = kmalloc(str_buf_sz, GFP_KERNEL); + if (!str_buf) { + errmsg = kutf_dsprintf(&context->fixture_pool, +@@ -218,7 +221,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, + copy_ptr = str_buf; + + /* Manually copy each string component instead of snprintf because +- * val_str may need to end early, and less error path handling */ ++ * val_str may need to end early, and less error path handling ++ */ + + /* name */ + memcpy(copy_ptr, val_name, val_name_len); +@@ -331,7 +335,8 @@ int kutf_helper_receive_named_val( + /* possibly a number value - strtoull will parse it */ + err = kstrtoull(recv_str, 0, &u64val); + /* unlike userspace can't get an end ptr, but if kstrtoull() +- * reads characters after the number it'll report -EINVAL */ ++ * reads characters after the number it'll report -EINVAL ++ */ + if (!err) { + int len_remain = strnlen(recv_str, recv_sz); + +@@ -399,7 +404,8 @@ int kutf_helper_receive_check_val( + goto out_fail_and_fixup; + } + +- if (strcmp(named_val->val_name, expect_val_name) != 0) { ++ if (named_val->val_name != NULL && ++ strcmp(named_val->val_name, expect_val_name) != 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Expecting to receive value named '%s' but got '%s'", + expect_val_name, named_val->val_name); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c +index fd98bea..716970a 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* Kernel UTF memory management functions */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c +index 94ecfa4..c7572bd 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* Kernel UTF result management functions */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c +index 3f15669..6745299 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* Kernel UTF suite, test and fixture management including user to kernel +- * interaction */ ++ * interaction ++ */ + + #include + #include +@@ -598,7 +598,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func, + goto fail_file; + } + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) ++#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE + tmp = debugfs_create_file_unsafe( + #else + tmp = debugfs_create_file( +@@ -634,7 +634,7 @@ static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) + kfree(test_fix); + } + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) ++#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE + /* Adapting to the upstream debugfs_create_x32() change */ + static int ktufp_u32_get(void *data, u64 *val) + { +@@ -679,7 +679,7 @@ void kutf_add_test_with_filters_and_data( + } + + test_func->filters = filters; +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) ++#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE + tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir, + &test_func->filters, &kutfp_fops_x32_ro); + #else +@@ -692,12 +692,17 @@ void kutf_add_test_with_filters_and_data( + } + + test_func->test_id = id; ++#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE ++ debugfs_create_u32("test_id", S_IROTH, test_func->dir, ++ &test_func->test_id); ++#else + tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, + &test_func->test_id); + if (!tmp) { + pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); + goto fail_file; + } ++#endif + + for (i = 0; i < suite->fixture_variants; i++) { + if (create_fixture_variant(test_func, i)) { +@@ -1153,7 +1158,7 @@ void kutf_test_abort(struct kutf_context *context) + } + EXPORT_SYMBOL(kutf_test_abort); + +-#ifdef CONFIG_DEBUG_FS ++#if IS_ENABLED(CONFIG_DEBUG_FS) + + /** + * init_kutf_core() - Module entry point. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c +index 7f5ac51..c0fb3ba 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* Kernel UTF utility functions */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild +new file mode 100644 +index 0000000..027bc27 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild +@@ -0,0 +1,25 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++ifeq ($(CONFIG_MALI_KUTF_CLK_RATE_TRACE),y) ++obj-m += mali_kutf_clk_rate_trace_test_portal.o ++ ++mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp +new file mode 100644 +index 0000000..225ad69 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp +@@ -0,0 +1,43 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++bob_kernel_module { ++ name: "mali_kutf_clk_rate_trace_test_portal", ++ defaults: [ ++ "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", ++ "kernel_test_includes", ++ ], ++ srcs: [ ++ "Kbuild", ++ "mali_kutf_clk_rate_trace_test.c", ++ "../mali_kutf_clk_rate_trace_test.h", ++ ], ++ extra_symbols: [ ++ "mali_kbase", ++ "kutf", ++ ], ++ enabled: false, ++ mali_kutf_clk_rate_trace: { ++ kbuild_options: ["CONFIG_MALI_KUTF_CLK_RATE_TRACE=y"], ++ enabled: true, ++ }, ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +new file mode 100644 +index 0000000..f9410a5 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +@@ -0,0 +1,957 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) ++#include ++#else ++#include ++#endif ++#include "mali_kbase.h" ++#include "backend/gpu/mali_kbase_irq_internal.h" ++#include "backend/gpu/mali_kbase_pm_internal.h" ++#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" ++ ++#include ++#include ++#include ++#include ++ ++#include "../mali_kutf_clk_rate_trace_test.h" ++ ++#define MINOR_FOR_FIRST_KBASE_DEV (-1) ++ ++/* KUTF test application pointer for this test */ ++struct kutf_application *kutf_app; ++ ++enum portal_server_state { ++ PORTAL_STATE_NO_CLK, ++ PORTAL_STATE_LIVE, ++ PORTAL_STATE_CLOSING, ++}; ++ ++/** ++ * struct clk_trace_snapshot - Trace info data on a clock. ++ * @previous_rate: Snapshot start point clock rate. ++ * @current_rate: End point clock rate. It becomes the start rate of the ++ * next trace snapshot. ++ * @rate_up_cnt: Count in the snapshot duration when the clock trace ++ * write is a rate of higher value than the last. ++ * @rate_down_cnt: Count in the snapshot duration when the clock trace write ++ * is a rate of lower value than the last. ++ */ ++struct clk_trace_snapshot { ++ unsigned long previous_rate; ++ unsigned long current_rate; ++ u32 rate_up_cnt; ++ u32 rate_down_cnt; ++}; ++ ++/** ++ * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test. ++ * @kbdev: kbase device for the GPU. ++ * @listener: Clock rate change listener structure. ++ * @invoke_notify: When true, invoke notify command is being executed. ++ * @snapshot: Clock trace update snapshot data array. A snapshot ++ * for each clock contains info accumulated beteen two ++ * GET_TRACE_SNAPSHOT requests. ++ * @nclks: Number of clocks visible to the trace portal. ++ * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC ++ * PM_CTX_CNT requests made to the portal. On change from ++ * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is ++ * triggered. ++ * @total_update_cnt: Total number of received trace write callbacks. ++ * @server_state: Portal server operational state. ++ * @result_msg: Message for the test result. ++ * @test_status: Portal test reslt status. ++ */ ++struct kutf_clk_rate_trace_fixture_data { ++ struct kbase_device *kbdev; ++ struct kbase_clk_rate_listener listener; ++ bool invoke_notify; ++ struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS]; ++ unsigned int nclks; ++ unsigned int pm_ctx_cnt; ++ unsigned int total_update_cnt; ++ enum portal_server_state server_state; ++ char const *result_msg; ++ enum kutf_result_status test_status; ++}; ++ ++struct clk_trace_portal_input { ++ struct kutf_helper_named_val cmd_input; ++ enum kbasep_clk_rate_trace_req portal_cmd; ++ int named_val_err; ++}; ++ ++struct kbasep_cmd_name_pair { ++ enum kbasep_clk_rate_trace_req cmd; ++ const char *name; ++}; ++ ++struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { ++ { PORTAL_CMD_GET_PLATFORM, GET_PLATFORM }, ++ { PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR }, ++ { PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE }, ++ { PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT }, ++ { PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT }, ++ { PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT }, ++ { PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL }, ++ { PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ }, ++}; ++ ++/* Global pointer for the kutf_portal_trace_write() to use. When ++ * this pointer is engaged, new requests for create fixture will fail ++ * hence limiting the use of the portal at any time to a singleton. ++ */ ++struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; ++ ++#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) ++static char portal_msg_buf[PORTAL_MSG_LEN]; ++ ++static void kutf_portal_trace_write( ++ struct kbase_clk_rate_listener *listener, ++ u32 index, u32 new_rate) ++{ ++ struct clk_trace_snapshot *snapshot; ++ struct kutf_clk_rate_trace_fixture_data *data; ++ ++ if (listener == NULL) { ++ pr_err("%s - index: %u, new_rate: %u, listener is NULL\n", ++ __func__, index, new_rate); ++ return; ++ } ++ ++ data = container_of(listener, struct kutf_clk_rate_trace_fixture_data, ++ listener); ++ ++ lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); ++ ++ if (WARN_ON(g_ptr_portal_data == NULL)) ++ return; ++ if (WARN_ON(index >= g_ptr_portal_data->nclks)) ++ return; ++ ++ /* This callback is triggered by invoke notify command, skipping */ ++ if (data->invoke_notify) ++ return; ++ ++ snapshot = &g_ptr_portal_data->snapshot[index]; ++ if (new_rate > snapshot->current_rate) ++ snapshot->rate_up_cnt++; ++ else ++ snapshot->rate_down_cnt++; ++ snapshot->current_rate = new_rate; ++ g_ptr_portal_data->total_update_cnt++; ++} ++ ++static void kutf_set_pm_ctx_active(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ if (WARN_ON(data->pm_ctx_cnt != 1)) ++ return; ++ ++ kbase_pm_context_active(data->kbdev); ++ kbase_pm_wait_for_desired_state(data->kbdev); ++#if !MALI_USE_CSF ++ kbase_pm_request_gpu_cycle_counter(data->kbdev); ++#endif ++} ++ ++static void kutf_set_pm_ctx_idle(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ if (WARN_ON(data->pm_ctx_cnt > 0)) ++ return; ++#if !MALI_USE_CSF ++ kbase_pm_release_gpu_cycle_counter(data->kbdev); ++#endif ++ kbase_pm_context_idle(data->kbdev); ++} ++ ++static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ const unsigned int cnt = data->pm_ctx_cnt; ++ const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; ++ char const *errmsg = NULL; ++ ++ WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && ++ req != PORTAL_CMD_DEC_PM_CTX_CNT); ++ ++ if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { ++ data->pm_ctx_cnt++; ++ if (data->pm_ctx_cnt == 1) ++ kutf_set_pm_ctx_active(context); ++ } ++ ++ if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) { ++ data->pm_ctx_cnt--; ++ if (data->pm_ctx_cnt == 0) ++ kutf_set_pm_ctx_idle(context); ++ } ++ ++ /* Skip the length check, no chance of overflow for two ints */ ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for adjusting pm_ctx_cnt"); ++ } ++ ++ return errmsg; ++} ++ ++static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ unsigned long rate; ++ bool idle; ++ int ret; ++ int i; ++ char const *errmsg = NULL; ++ ++ WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && ++ (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); ++ ++ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, RATE:[", seq); ++ ++ for (i = 0; i < data->nclks; i++) { ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR) ++ rate = kbdev->pm.clk_rtm.clks[i]->clock_val; ++ else ++ rate = data->snapshot[i].current_rate; ++ idle = kbdev->pm.clk_rtm.gpu_idle; ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ if ((i + 1) == data->nclks) ++ ret += snprintf(portal_msg_buf + ret, ++ PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", ++ rate, idle); ++ else ++ ret += snprintf(portal_msg_buf + ret, ++ PORTAL_MSG_LEN - ret, "0x%lx, ", rate); ++ ++ if (ret >= PORTAL_MSG_LEN) { ++ pr_warn("Message buf overflow with rate array data\n"); ++ return kutf_dsprintf(&context->fixture_pool, ++ "Message buf overflow with rate array data"); ++ } ++ } ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending back rate array\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending rate array"); ++ } ++ ++ return errmsg; ++} ++ ++/** ++ * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot ++ * @context: KUTF context ++ * @cmd: The decoded portal input request ++ * ++ * The accumulated clock rate trace information is kept inside as an snapshot ++ * record. A user request of getting the snapshot marks the closure of the ++ * current snapshot record, and the start of the next one. The response ++ * message contains the current snapshot record, with each clock's ++ * data sequentially placed inside (array marker) [ ]. ++ */ ++static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct clk_trace_snapshot snapshot; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ int ret; ++ int i; ++ char const *fmt; ++ char const *errmsg = NULL; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); ++ ++ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); ++ ++ for (i = 0; i < data->nclks; i++) { ++ spin_lock(&data->kbdev->pm.clk_rtm.lock); ++ /* copy out the snapshot of the clock */ ++ snapshot = data->snapshot[i]; ++ /* Set the next snapshot start condition */ ++ data->snapshot[i].previous_rate = snapshot.current_rate; ++ data->snapshot[i].rate_up_cnt = 0; ++ data->snapshot[i].rate_down_cnt = 0; ++ spin_unlock(&data->kbdev->pm.clk_rtm.lock); ++ ++ /* Check i corresponding to the last clock */ ++ if ((i + 1) == data->nclks) ++ fmt = "(0x%lx, 0x%lx, %u, %u)]}"; ++ else ++ fmt = "(0x%lx, 0x%lx, %u, %u), "; ++ ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, ++ fmt, snapshot.previous_rate, snapshot.current_rate, ++ snapshot.rate_up_cnt, snapshot.rate_down_cnt); ++ if (ret >= PORTAL_MSG_LEN) { ++ pr_warn("Message buf overflow with snapshot data\n"); ++ return kutf_dsprintf(&context->fixture_pool, ++ "Message buf overflow with snapshot data"); ++ } ++ } ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending back snapshot array\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending snapshot array"); ++ } ++ ++ return errmsg; ++} ++ ++/** ++ * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback ++ * @context: KUTF context ++ * @cmd: The decoded portal input request ++ * ++ * Invokes frequency change notification callbacks with a fake ++ * GPU frequency 42 kHz for the top clock domain. ++ */ ++static char const *kutf_clk_trace_do_invoke_notify_42k( ++ struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ const unsigned long new_rate_hz = 42000; ++ int ret; ++ char const *errmsg = NULL; ++ struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ); ++ ++ spin_lock(&clk_rtm->lock); ++ ++ data->invoke_notify = true; ++ kbase_clk_rate_trace_manager_notify_all( ++ clk_rtm, 0, new_rate_hz); ++ data->invoke_notify = false; ++ ++ spin_unlock(&clk_rtm->lock); ++ ++ ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); ++ ++ if (ret >= PORTAL_MSG_LEN) { ++ pr_warn("Message buf overflow with invoked data\n"); ++ return kutf_dsprintf(&context->fixture_pool, ++ "Message buf overflow with invoked data"); ++ } ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); ++ } ++ ++ return errmsg; ++} ++ ++static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ char const *errmsg = NULL; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL); ++ ++ data->server_state = PORTAL_STATE_CLOSING; ++ ++ /* Skip the length check, no chance of overflow for two ints */ ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for " CLOSE_PORTAL "reuquest"); ++ } ++ ++ return errmsg; ++} ++ ++/** ++ * kutf_clk_trace_do_get_platform() - Gets platform information ++ * @context: KUTF context ++ * @cmd: The decoded portal input request ++ * ++ * Checks the gpu node in the device tree to see if arbitration is enabled ++ * If so determines device tree whether platform is PV or PTM ++ * ++ * Return: A string to indicate the platform (PV/PTM/GPU/UNKNOWN) ++ */ ++static char const *kutf_clk_trace_do_get_platform( ++ struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ int seq = cmd->cmd_input.u.val_u64 & 0xFF; ++ char const *errmsg = NULL; ++ const void *arbiter_if_node = NULL; ++ const void *power_node = NULL; ++ const char *platform = "GPU"; ++#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ arbiter_if_node = ++ of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); ++#endif ++ if (arbiter_if_node) { ++ power_node = of_find_compatible_node(NULL, NULL, ++ "arm,mali-gpu-power"); ++ if (power_node) { ++ platform = "PV"; ++ } else { ++ power_node = of_find_compatible_node(NULL, NULL, ++ "arm,mali-ptm"); ++ if (power_node) ++ platform = "PTM"; ++ else ++ platform = "UNKNOWN"; ++ } ++ } else { ++ platform = "GPU"; ++ } ++ ++ pr_debug("%s - platform is %s\n", __func__, platform); ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, PLATFORM:%s}", seq, platform); ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_PLATFORM); ++ ++ if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { ++ pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Error in sending ack for " GET_PLATFORM "request"); ++ } ++ ++ return errmsg; ++} ++ ++static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ int i; ++ int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); ++ ++ cmd->named_val_err = err; ++ if (err == KUTF_HELPER_ERR_NONE && ++ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { ++ /* All portal request commands are of format (named u64): ++ * CMD_NAME=1234 ++ * where, 1234 is a (variable) sequence number tag. ++ */ ++ for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { ++ if (strcmp(cmd->cmd_input.val_name, ++ kbasep_portal_cmd_name_map[i].name)) ++ continue; ++ ++ cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; ++ return true; ++ } ++ } ++ ++ cmd->portal_cmd = PORTAL_CMD_INVALID; ++ return false; ++} ++ ++static void kutf_clk_trace_flag_result(struct kutf_context *context, ++ enum kutf_result_status result, char const *msg) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ ++ if (result > data->test_status) { ++ data->test_status = result; ++ if (msg) ++ data->result_msg = msg; ++ if (data->server_state == PORTAL_STATE_LIVE && ++ result > KUTF_RESULT_WARN) { ++ data->server_state = PORTAL_STATE_CLOSING; ++ } ++ } ++} ++ ++static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ char const *errmsg = NULL; ++ ++ BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != ++ PORTAL_TOTAL_CMDS); ++ WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); ++ ++ switch (cmd->portal_cmd) { ++ case PORTAL_CMD_GET_PLATFORM: ++ errmsg = kutf_clk_trace_do_get_platform(context, cmd); ++ break; ++ case PORTAL_CMD_GET_CLK_RATE_MGR: ++ /* Fall through */ ++ case PORTAL_CMD_GET_CLK_RATE_TRACE: ++ errmsg = kutf_clk_trace_do_get_rate(context, cmd); ++ break; ++ case PORTAL_CMD_GET_TRACE_SNAPSHOT: ++ errmsg = kutf_clk_trace_do_get_snapshot(context, cmd); ++ break; ++ case PORTAL_CMD_INC_PM_CTX_CNT: ++ /* Fall through */ ++ case PORTAL_CMD_DEC_PM_CTX_CNT: ++ errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd); ++ break; ++ case PORTAL_CMD_CLOSE_PORTAL: ++ errmsg = kutf_clk_trace_do_close_portal(context, cmd); ++ break; ++ case PORTAL_CMD_INVOKE_NOTIFY_42KHZ: ++ errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd); ++ break; ++ default: ++ pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", ++ cmd->portal_cmd); ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Don't know how to handle portal_cmd: %d", ++ cmd->portal_cmd); ++ break; ++ } ++ ++ if (errmsg) ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); ++ ++ return (errmsg == NULL); ++} ++ ++/** ++ * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input ++ * @context: KUTF context ++ * @cmd: The erroneous input request ++ * ++ * This function deal with an erroneous input request, and respond with ++ * a proper 'NACK' message. ++ */ ++static int kutf_clk_trace_do_nack_response(struct kutf_context *context, ++ struct clk_trace_portal_input *cmd) ++{ ++ int seq; ++ int err; ++ char const *errmsg = NULL; ++ ++ WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); ++ ++ if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && ++ cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { ++ /* Keep seq number as % 256 */ ++ seq = cmd->cmd_input.u.val_u64 & 255; ++ snprintf(portal_msg_buf, PORTAL_MSG_LEN, ++ "{SEQ:%d, MSG: Unknown command '%s'.}", seq, ++ cmd->cmd_input.val_name); ++ err = kutf_helper_send_named_str(context, "NACK", ++ portal_msg_buf); ++ } else ++ err = kutf_helper_send_named_str(context, "NACK", ++ "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); ++ ++ if (err) { ++ errmsg = kutf_dsprintf(&context->fixture_pool, ++ "Failed to send portal NACK response"); ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); ++ } ++ ++ return err; ++} ++ ++/** ++ * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing ++ * @context: KUTF context ++ * ++ * This function carries out some basic test on the tracing operation: ++ * 1). GPU idle on test start, trace rate should be 0 (low power state) ++ * 2). Make sure GPU is powered up, the trace rate should match ++ * that from the clcok manager's internal recorded rate ++ * 3). If the GPU active transition occurs following 2), there ++ * must be rate change event from tracing. ++ */ ++void kutf_clk_trace_barebone_check(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ bool fail = false; ++ bool idle[2] = { false }; ++ char const *msg = NULL; ++ int i; ++ ++ /* Check consistency if gpu happens to be idle */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ idle[0] = kbdev->pm.clk_rtm.gpu_idle; ++ if (kbdev->pm.clk_rtm.gpu_idle) { ++ for (i = 0; i < data->nclks; i++) { ++ if (data->snapshot[i].current_rate) { ++ /* Idle should have a rate 0 */ ++ fail = true; ++ break; ++ } ++ } ++ } ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ if (fail) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "GPU Idle not yielding 0-rate"); ++ pr_err("Trace did not see idle rate\n"); ++ } else { ++ /* Make local PM active if not done so yet */ ++ if (data->pm_ctx_cnt == 0) { ++ /* Ensure the GPU is powered */ ++ data->pm_ctx_cnt++; ++ kutf_set_pm_ctx_active(context); ++ } ++ /* Checking the rate is consistent */ ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ idle[1] = kbdev->pm.clk_rtm.gpu_idle; ++ for (i = 0; i < data->nclks; i++) { ++ /* Rate match between the manager and the trace */ ++ if (kbdev->pm.clk_rtm.clks[i]->clock_val != ++ data->snapshot[i].current_rate) { ++ fail = true; ++ break; ++ } ++ } ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ if (idle[1]) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "GPU still idle after set_pm_ctx_active"); ++ pr_err("GPU still idle after set_pm_ctx_active\n"); ++ } ++ ++ if (!msg && fail) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "Trace rate not matching Clk manager's read"); ++ pr_err("Trace rate not matching Clk manager's read\n"); ++ } ++ } ++ ++ if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "Trace update did not occur"); ++ pr_err("Trace update did not occur\n"); ++ } ++ if (msg) ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg); ++ else if (!data->total_update_cnt) { ++ msg = kutf_dsprintf(&context->fixture_pool, ++ "No trace update seen during the test!"); ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg); ++ } ++} ++ ++static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd) ++{ ++ return (cmd->named_val_err == -EBUSY); ++} ++ ++void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) ++{ ++ struct clk_trace_portal_input cmd; ++ unsigned long timeout = jiffies + HZ * 2; ++ bool has_cmd; ++ ++ while (time_before(jiffies, timeout)) { ++ if (kutf_helper_pending_input(context)) { ++ has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, ++ &cmd); ++ if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) ++ break; ++ ++ kutf_helper_send_named_str(context, "NACK", ++ "Fatal! No clocks visible, aborting"); ++ } ++ msleep(20); ++ } ++ ++ kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, ++ "No clocks visble to the portal"); ++} ++ ++/** ++ * mali_kutf_clk_rate_trace_test_portal() - Service portal input ++ * @context: KUTF context ++ * ++ * The test portal operates on input requests. If the input request is one ++ * of the recognized portal commands, it handles it accordingly. Otherwise ++ * a negative response 'NACK' is returned. The portal service terminates ++ * when a 'CLOSE_PORTAL' request is received, or due to an internal error. ++ * Both case would result in the server_state transitioned to CLOSING. ++ * ++ * If the portal is closed on request, a sanity test on the clock rate ++ * trace operation is undertaken via function: ++ * kutf_clk_trace_barebone_check(); ++ */ ++static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct clk_trace_portal_input new_cmd; ++ ++ pr_debug("Test portal service start\n"); ++ ++ while (data->server_state == PORTAL_STATE_LIVE) { ++ if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd)) ++ kutf_clk_trace_process_portal_cmd(context, &new_cmd); ++ else if (kutf_clk_trace_end_of_stream(&new_cmd)) ++ /* Dequeue on portal input, end of stream */ ++ data->server_state = PORTAL_STATE_CLOSING; ++ else ++ kutf_clk_trace_do_nack_response(context, &new_cmd); ++ } ++ ++ /* Closing, exhausting all the pending inputs with NACKs. */ ++ if (data->server_state == PORTAL_STATE_CLOSING) { ++ while (kutf_helper_pending_input(context) && ++ (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || ++ !kutf_clk_trace_end_of_stream(&new_cmd))) { ++ kutf_helper_send_named_str(context, "NACK", ++ "Portal closing down"); ++ } ++ } ++ ++ /* If no portal error, do a barebone test here irrespective ++ * whatever the portal live session has been testing, which ++ * is entirely driven by the user-side via portal requests. ++ */ ++ if (data->test_status <= KUTF_RESULT_WARN) { ++ if (data->server_state != PORTAL_STATE_NO_CLK) ++ kutf_clk_trace_barebone_check(context); ++ else { ++ /* No clocks case, NACK 2-sec for the fatal situation */ ++ kutf_clk_trace_no_clks_dummy(context); ++ } ++ } ++ ++ /* If we have changed pm_ctx count, drop it back */ ++ if (data->pm_ctx_cnt) { ++ /* Although we count on portal requests, it only has material ++ * impact when from 0 -> 1. So the reverse is a simple one off. ++ */ ++ data->pm_ctx_cnt = 0; ++ kutf_set_pm_ctx_idle(context); ++ } ++ ++ /* Finally log the test result line */ ++ if (data->test_status < KUTF_RESULT_WARN) ++ kutf_test_pass(context, data->result_msg); ++ else if (data->test_status == KUTF_RESULT_WARN) ++ kutf_test_warn(context, data->result_msg); ++ else if (data->test_status == KUTF_RESULT_FATAL) ++ kutf_test_fatal(context, data->result_msg); ++ else ++ kutf_test_fail(context, data->result_msg); ++ ++ pr_debug("Test end\n"); ++} ++ ++/** ++ * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data ++ * required for mali_kutf_clk_rate_trace_test_portal. ++ * @context: KUTF context. ++ * ++ * Return: Fixture data created on success or NULL on failure ++ */ ++static void *mali_kutf_clk_rate_trace_create_fixture( ++ struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data; ++ struct kbase_device *kbdev; ++ unsigned long rate; ++ int i; ++ ++ /* Acquire the kbase device */ ++ pr_debug("Finding device\n"); ++ kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); ++ if (kbdev == NULL) { ++ kutf_test_fail(context, "Failed to find kbase device"); ++ return NULL; ++ } ++ ++ pr_debug("Creating fixture\n"); ++ data = kutf_mempool_alloc(&context->fixture_pool, ++ sizeof(struct kutf_clk_rate_trace_fixture_data)); ++ if (!data) ++ return NULL; ++ ++ *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 }; ++ pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); ++ spin_lock(&kbdev->pm.clk_rtm.lock); ++ ++ if (g_ptr_portal_data != NULL) { ++ pr_warn("Test portal is already in use, run aborted\n"); ++ kutf_test_fail(context, "Portal allows single session only"); ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ return NULL; ++ } ++ ++ for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { ++ if (kbdev->pm.clk_rtm.clks[i]) { ++ data->nclks++; ++ if (kbdev->pm.clk_rtm.gpu_idle) ++ rate = 0; ++ else ++ rate = kbdev->pm.clk_rtm.clks[i]->clock_val; ++ data->snapshot[i].previous_rate = rate; ++ data->snapshot[i].current_rate = rate; ++ } ++ } ++ ++ spin_unlock(&kbdev->pm.clk_rtm.lock); ++ ++ if (data->nclks) { ++ /* Subscribe this test server portal */ ++ data->listener.notify = kutf_portal_trace_write; ++ data->invoke_notify = false; ++ ++ kbase_clk_rate_trace_manager_subscribe( ++ &kbdev->pm.clk_rtm, &data->listener); ++ /* Update the kutf_server_portal fixture_data pointer */ ++ g_ptr_portal_data = data; ++ } ++ ++ data->kbdev = kbdev; ++ data->result_msg = NULL; ++ data->test_status = KUTF_RESULT_PASS; ++ ++ if (data->nclks == 0) { ++ data->server_state = PORTAL_STATE_NO_CLK; ++ pr_debug("Kbdev has no clocks for rate trace"); ++ } else ++ data->server_state = PORTAL_STATE_LIVE; ++ ++ pr_debug("Created fixture\n"); ++ ++ return data; ++} ++ ++/** ++ * Destroy fixture data previously created by ++ * mali_kutf_clk_rate_trace_create_fixture. ++ * ++ * @context: KUTF context. ++ */ ++static void mali_kutf_clk_rate_trace_remove_fixture( ++ struct kutf_context *context) ++{ ++ struct kutf_clk_rate_trace_fixture_data *data = context->fixture; ++ struct kbase_device *kbdev = data->kbdev; ++ ++ if (data->nclks) { ++ /* Clean up the portal trace write arrangement */ ++ g_ptr_portal_data = NULL; ++ ++ kbase_clk_rate_trace_manager_unsubscribe( ++ &kbdev->pm.clk_rtm, &data->listener); ++ } ++ pr_debug("Destroying fixture\n"); ++ kbase_release_device(kbdev); ++ pr_debug("Destroyed fixture\n"); ++} ++ ++/** ++ * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule. ++ */ ++int mali_kutf_clk_rate_trace_test_module_init(void) ++{ ++ struct kutf_suite *suite; ++ unsigned int filters; ++ union kutf_callback_data suite_data = { 0 }; ++ ++ pr_debug("Creating app\n"); ++ ++ g_ptr_portal_data = NULL; ++ kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); ++ ++ if (!kutf_app) { ++ pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME ++ " failed!\n"); ++ return -ENOMEM; ++ } ++ ++ pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); ++ suite = kutf_create_suite_with_filters_and_data( ++ kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, ++ mali_kutf_clk_rate_trace_create_fixture, ++ mali_kutf_clk_rate_trace_remove_fixture, ++ KUTF_F_TEST_GENERIC, ++ suite_data); ++ ++ if (!suite) { ++ pr_warn("Creation of suite %s failed!\n", ++ CLK_RATE_TRACE_SUITE_NAME); ++ kutf_destroy_application(kutf_app); ++ return -ENOMEM; ++ } ++ ++ filters = suite->suite_default_flags; ++ kutf_add_test_with_filters( ++ suite, 0x0, CLK_RATE_TRACE_PORTAL, ++ mali_kutf_clk_rate_trace_test_portal, ++ filters); ++ ++ pr_debug("Init complete\n"); ++ return 0; ++} ++ ++/** ++ * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this ++ * test. ++ */ ++void mali_kutf_clk_rate_trace_test_module_exit(void) ++{ ++ pr_debug("Exit start\n"); ++ kutf_destroy_application(kutf_app); ++ pr_debug("Exit complete\n"); ++} ++ ++ ++module_init(mali_kutf_clk_rate_trace_test_module_init); ++module_exit(mali_kutf_clk_rate_trace_test_module_exit); ++ ++MODULE_LICENSE("GPL"); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h +new file mode 100644 +index 0000000..f37efa8 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h +@@ -0,0 +1,151 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_ ++#define _KUTF_CLK_RATE_TRACE_TEST_H_ ++ ++#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace" ++#define CLK_RATE_TRACE_SUITE_NAME "rate_trace" ++#define CLK_RATE_TRACE_PORTAL "portal" ++ ++/** ++ * enum kbasep_clk_rate_trace_req - request command to the clock rate trace ++ * service portal. ++ * ++ * @PORTAL_CMD_GET_PLATFORM: Request the platform that the tests are ++ * to be run on. ++ * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal ++ * data record. On a positive acknowledgement ++ * the prevailing clock rates and the GPU idle ++ * condition flag are returned. ++ * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its ++ * data record. On a positive acknowledgement ++ * the last trace recorded clock rates and the ++ * GPU idle condition flag are returned. ++ * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its ++ * current snapshot data record. On a positive ++ * acknowledgement the snapshot array matching ++ * the number of clocks are returned. It also ++ * starts a fresh snapshot inside the clock ++ * trace portal. ++ * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase ++ * its internal PM_CTX_COUNT. If this increase ++ * yielded a count of 0 -> 1 change, the portal ++ * will initiate a PM_CTX_ACTIVE call to the ++ * Kbase power management. Futher increase ++ * requests will limit to only affect the ++ * portal internal count value. ++ * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease ++ * its internal PM_CTX_COUNT. If this decrease ++ * yielded a count of 1 -> 0 change, the portal ++ * will initiate a PM_CTX_IDLE call to the ++ * Kbase power management. ++ * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the ++ * client has completed its session. The portal ++ * will start the close down action. If no ++ * error has occurred during the dynamic ++ * interactive session, an inherent basic test ++ * carrying out some sanity check on the clock ++ * trace is undertaken. ++ * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks ++ * for the top clock domain with a new GPU frequency ++ * set to 42 kHZ. ++ * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be ++ * the highest enumeration value, as it ++ * represents valid command array size. ++ * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID. ++ */ ++/* PORTAL_CMD_INVALID must be the last one, serving the size */ ++enum kbasep_clk_rate_trace_req { ++ PORTAL_CMD_GET_PLATFORM, ++ PORTAL_CMD_GET_CLK_RATE_MGR, ++ PORTAL_CMD_GET_CLK_RATE_TRACE, ++ PORTAL_CMD_GET_TRACE_SNAPSHOT, ++ PORTAL_CMD_INC_PM_CTX_CNT, ++ PORTAL_CMD_DEC_PM_CTX_CNT, ++ PORTAL_CMD_CLOSE_PORTAL, ++ PORTAL_CMD_INVOKE_NOTIFY_42KHZ, ++ PORTAL_CMD_INVALID, ++ PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID, ++}; ++ ++/** ++ * Portal service request command names. The portal request consists of a kutf ++ * named u64-value. For those above enumerated PORTAL_CMD, the names defined ++ * here are used to mark the name and then followed with a sequence number ++ * value. Example (manual script here for illustration): ++ * exec 5<>run # open the portal kutf run as fd-5 ++ * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1 ++ * head -n 1 <&5 # read back the 1-line server reseponse ++ * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string ++ * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1 ++ * head -n 1 <&5 # read back the 1-line server reseponse ++ * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" ++ * echo CLOSE_PORTAL=1 >&5 # close the portal ++ * cat <&5 # read back all the response lines ++ * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command ++ * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed. ++ * exec 5>&- # close the service portal fd. ++ * ++ * Expected request command return format: ++ * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}" ++ * Note, the above contains 2-clock with rates in [], GPU idle ++ * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}" ++ * Note, 1-clock with rate in [], GPU not idle ++ * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" ++ * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480, ++ * trace_rate_up_count: 1, trace_rate_down_count : 0) ++ * For the specific sample case here, there is a single rate_trace event ++ * that yielded a rate increase change. No rate drop event recorded in the ++ * reporting snapshot duration. ++ * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}" ++ * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1) ++ * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}" ++ * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0) ++ * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}" ++ * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be ++ * dropped down to 0 as part of the portal close clean up. ++ */ ++#define GET_PLATFORM "GET_PLATFORM" ++#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" ++#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" ++#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" ++#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" ++#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" ++#define CLOSE_PORTAL "CLOSE_PORTAL" ++#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" ++ ++/** ++ * Portal service response tag names. The response consists of a kutf ++ * named string-value. In case of a 'NACK' (negative acknowledgement), it ++ * can be one of the two formats: ++ * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2. ++ * Note, the portal has received a valid name and valid sequence number ++ * but can't carry-out the request, reason in the MSG field. ++ * 2. NACK="Failing-message" ++ * Note, unable to parse a valid name or valid sequence number, ++ * or some internal error condition. Reason in the quoted string. ++ */ ++#define ACK "ACK" ++#define NACK "NACK" ++#define MAX_REPLY_NAME_LEN 32 ++ ++#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild +index ca8c512..213d6d5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + # +-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,12 +16,10 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 + # +-# +- +-ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android + +-obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o ++ifeq ($(CONFIG_MALI_KUTF_IRQ_TEST),y) ++obj-m += mali_kutf_irq_test.o + + mali_kutf_irq_test-y := mali_kutf_irq_test_main.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig +deleted file mode 100644 +index 4a3863a..0000000 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig ++++ /dev/null +@@ -1,29 +0,0 @@ +-# +-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +-# +-# This program is free software and is provided to you under the terms of the +-# GNU General Public License version 2 as published by the Free Software +-# Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, you can access it online at +-# http://www.gnu.org/licenses/gpl-2.0.html. +-# +-# SPDX-License-Identifier: GPL-2.0 +-# +-# +- +-config MALI_IRQ_LATENCY +- tristate "Mali GPU IRQ latency measurement" +- depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF +- default m +- help +- This option will build a test module mali_kutf_irq_test that +- can determine the latency of the Mali GPU IRQ on your system. +- Choosing M here will generate a single module called mali_kutf_irq_test. +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile +deleted file mode 100644 +index bc4d654..0000000 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile ++++ /dev/null +@@ -1,51 +0,0 @@ +-# +-# (C) COPYRIGHT 2015, 2017-2018, 2020 ARM Limited. All rights reserved. +-# +-# This program is free software and is provided to you under the terms of the +-# GNU General Public License version 2 as published by the Free Software +-# Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. +-# +-# This program is distributed in the hope that it will be useful, +-# but WITHOUT ANY WARRANTY; without even the implied warranty of +-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-# GNU General Public License for more details. +-# +-# You should have received a copy of the GNU General Public License +-# along with this program; if not, you can access it online at +-# http://www.gnu.org/licenses/gpl-2.0.html. +-# +-# SPDX-License-Identifier: GPL-2.0 +-# +-# +- +-# linux build system bootstrap for out-of-tree module +- +-# default to building for the host +-ARCH ?= $(shell uname -m) +- +-ifeq ($(KDIR),) +-$(error Must specify KDIR to point to the kernel to target)) +-endif +- +-TEST_CCFLAGS := \ +- -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ +- -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ +- -DMALI_USE_CSF=$(MALI_USE_CSF) \ +- $(SCONS_CFLAGS) \ +- -I$(CURDIR)/../include \ +- -I$(CURDIR)/../../../../../../include \ +- -I$(CURDIR)/../../../ \ +- -I$(CURDIR)/../../ \ +- -I$(CURDIR)/../../backend/gpu \ +- -I$(CURDIR)/../../debug \ +- -I$(CURDIR)/../../debug/backend \ +- -I$(CURDIR)/ \ +- -I$(srctree)/drivers/staging/android \ +- -I$(srctree)/include/linux +- +-all: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) $(SCONS_CONFIGS) EXTRA_CFLAGS="$(TEST_CCFLAGS)" KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../kutf/Module.symvers $(CURDIR)/../../Module.symvers" modules +- +-clean: +- $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp +index 90efdcf..155875b 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp +@@ -1,15 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * +- * A copy of the licence is included with the program, and can also be obtained +- * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +@@ -17,6 +23,7 @@ bob_kernel_module { + name: "mali_kutf_irq_test", + defaults: [ + "mali_kbase_shared_config_defaults", ++ "kernel_test_configs", + "kernel_test_includes", + ], + srcs: [ +@@ -28,8 +35,8 @@ bob_kernel_module { + "kutf", + ], + enabled: false, +- base_build_kutf: { ++ mali_kutf_irq_test: { ++ kbuild_options: ["CONFIG_MALI_KUTF_IRQ_TEST=y"], + enabled: true, +- kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"], + }, + } +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +index 26b442a..fdc5437 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include +@@ -25,8 +24,8 @@ + #include + + #include "mali_kbase.h" +-#include +-#include ++#include ++#include + + #include + #include +@@ -242,7 +241,7 @@ int mali_kutf_irq_test_main_init(void) + + irq_app = kutf_create_application("irq"); + +- if (NULL == irq_app) { ++ if (irq_app == NULL) { + pr_warn("Creation of test application failed!\n"); + return -ENOMEM; + } +@@ -251,7 +250,7 @@ int mali_kutf_irq_test_main_init(void) + 1, mali_kutf_irq_default_create_fixture, + mali_kutf_irq_default_remove_fixture); + +- if (NULL == suite) { ++ if (suite == NULL) { + pr_warn("Creation of test suite failed!\n"); + kutf_destroy_application(irq_app); + return -ENOMEM; +diff --git a/dvalin/kernel/drivers/base/memory_group_manager/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild +similarity index 82% +rename from dvalin/kernel/drivers/base/memory_group_manager/Kbuild +rename to dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild +index a049bed..c723f3a 100644 +--- a/dvalin/kernel/drivers/base/memory_group_manager/Kbuild ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild +@@ -1,10 +1,11 @@ ++# SPDX-License-Identifier: GPL-2.0 + # +-# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + # + # This program is free software and is provided to you under the terms of the + # GNU General Public License version 2 as published by the Free Software + # Foundation, and any use by you of this program is subject to the terms +-# of such GNU licence. ++# of such GNU license. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -15,8 +16,6 @@ + # along with this program; if not, you can access it online at + # http://www.gnu.org/licenses/gpl-2.0.html. + # +-# SPDX-License-Identifier: GPL-2.0 +-# + # + +-obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) := memory_group_manager.o +\ No newline at end of file ++mali_kbase-y += thirdparty/mali_kbase_mmap.o +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c +index f266d8e..de1199a 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c +@@ -1,24 +1,4 @@ + /* +- * +- * (C) COPYRIGHT ARM Limited. All rights reserved. +- * +- * This program is free software and is provided to you under the terms of the +- * GNU General Public License version 2 as published by the Free Software +- * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, you can access it online at +- * http://www.gnu.org/licenses/gpl-2.0.html. +- * +- * SPDX-License-Identifier: GPL-2.0 +- * +- *//* + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +@@ -30,7 +10,7 @@ + */ + + #include "linux/mman.h" +-#include "../mali_kbase.h" ++#include + + /* mali_kbase_mmap.c + * +@@ -209,7 +189,8 @@ check_current: + return -ENOMEM; + if (gap_start <= high_limit && gap_end - gap_start >= length) { + /* We found a suitable gap. Clip it with the original +- * high_limit. */ ++ * high_limit. ++ */ + if (gap_end > info->high_limit) + gap_end = info->high_limit; + +@@ -270,19 +251,38 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, + bool is_same_4gb_page = false; + unsigned long ret; + ++ /* the 'nolock' form is used here: ++ * - the base_pfn of the SAME_VA zone does not change ++ * - in normal use, va_size_pages is constant once the first allocation ++ * begins ++ * ++ * However, in abnormal use this function could be processing whilst ++ * another new zone is being setup in a different thread (e.g. to ++ * borrow part of the SAME_VA zone). In the worst case, this path may ++ * witness a higher SAME_VA end_pfn than the code setting up the new ++ * zone. ++ * ++ * This is safe because once we reach the main allocation functions, ++ * we'll see the updated SAME_VA end_pfn and will determine that there ++ * is no free region at the address found originally by too large a ++ * same_va_end_addr here, and will fail the allocation gracefully. ++ */ ++ struct kbase_reg_zone *zone = ++ kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); ++ u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; ++ + /* err on fixed address */ + if ((flags & MAP_FIXED) || addr) + return -EINVAL; + +-#ifdef CONFIG_64BIT ++#if IS_ENABLED(CONFIG_64BIT) + /* too big? */ + if (len > TASK_SIZE - SZ_2M) + return -ENOMEM; + + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { +- +- high_limit = min_t(unsigned long, mm->mmap_base, +- (kctx->same_va_end << PAGE_SHIFT)); ++ high_limit = ++ min_t(unsigned long, mm->mmap_base, same_va_end_addr); + + /* If there's enough (> 33 bits) of GPU VA space, align + * to 2MB boundaries. +@@ -317,19 +317,25 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, + align_mask = align_offset - 1; + is_shader_code = true; + } ++#if !MALI_USE_CSF + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { +- unsigned long extent_bytes = +- (unsigned long)(reg->extent << PAGE_SHIFT); ++ unsigned long extension_bytes = ++ (unsigned long)(reg->extension ++ << PAGE_SHIFT); + /* kbase_check_alloc_sizes() already satisfies + * these checks, but they're here to avoid + * maintenance hazards due to the assumptions +- * involved */ +- WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT)); ++ * involved ++ */ ++ WARN_ON(reg->extension > ++ (ULONG_MAX >> PAGE_SHIFT)); + WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); +- WARN_ON(!is_power_of_2(extent_bytes)); +- align_mask = extent_bytes - 1; ++ WARN_ON(!is_power_of_2(extension_bytes)); ++ align_mask = extension_bytes - 1; + align_offset = +- extent_bytes - (reg->initial_commit << PAGE_SHIFT); ++ extension_bytes - ++ (reg->initial_commit << PAGE_SHIFT); ++#endif /* !MALI_USE_CSF */ + } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + is_same_4gb_page = true; + } +@@ -352,11 +358,10 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, + is_same_4gb_page); + + if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && +- high_limit < (kctx->same_va_end << PAGE_SHIFT)) { ++ high_limit < same_va_end_addr) { + /* Retry above mmap_base */ + info.low_limit = mm->mmap_base; +- info.high_limit = min_t(u64, TASK_SIZE, +- (kctx->same_va_end << PAGE_SHIFT)); ++ info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr); + + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild +new file mode 100644 +index 0000000..4344850 +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild +@@ -0,0 +1,32 @@ ++# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++# ++# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++# ++# This program is free software and is provided to you under the terms of the ++# GNU General Public License version 2 as published by the Free Software ++# Foundation, and any use by you of this program is subject to the terms ++# of such GNU license. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, you can access it online at ++# http://www.gnu.org/licenses/gpl-2.0.html. ++# ++# ++ ++mali_kbase-y += \ ++ tl/mali_kbase_timeline.o \ ++ tl/mali_kbase_timeline_io.o \ ++ tl/mali_kbase_tlstream.o \ ++ tl/mali_kbase_tracepoints.o ++ ++ ++ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) ++ mali_kbase-y += tl/backend/mali_kbase_timeline_csf.o ++else ++ mali_kbase-y += tl/backend/mali_kbase_timeline_jm.o ++endif +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c +new file mode 100644 +index 0000000..a2868da +--- /dev/null ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c +@@ -0,0 +1,171 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++void kbase_create_timeline_objects(struct kbase_device *kbdev) ++{ ++ unsigned int as_nr; ++ unsigned int slot_i; ++ struct kbase_context *kctx; ++ struct kbase_timeline *timeline = kbdev->timeline; ++ struct kbase_tlstream *summary = ++ &kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; ++ ++ /* Summarize the Address Space objects. */ ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); ++ ++ /* Create Legacy GPU object to track in AOM for dumping */ ++ __kbase_tlstream_tl_new_gpu(summary, ++ kbdev, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ kbdev->gpu_props.num_cores); ++ ++ ++ for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) ++ __kbase_tlstream_tl_lifelink_as_gpu(summary, ++ &kbdev->as[as_nr], ++ kbdev); ++ ++ /* Trace the creation of a new kbase device and set its properties. */ ++ __kbase_tlstream_tl_kbase_new_device(summary, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ kbdev->gpu_props.num_cores, kbdev->csf.global_iface.group_num, ++ kbdev->nr_hw_address_spaces); ++ ++ /* Lock the context list, to ensure no changes to the list are made ++ * while we're summarizing the contexts and their contents. ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); ++ ++ /* Hold the scheduler lock while we emit the current state ++ * We also need to continue holding the lock until after the first body ++ * stream tracepoints are emitted to ensure we don't change the ++ * scheduler until after then ++ */ ++ mutex_lock(&kbdev->csf.scheduler.lock); ++ ++ for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) { ++ ++ struct kbase_queue_group *group = ++ kbdev->csf.scheduler.csg_slots[slot_i].resident_group; ++ ++ if (group) ++ __kbase_tlstream_tl_kbase_device_program_csg(summary, ++ kbdev->gpu_props.props.raw_props.gpu_id, ++ group->handle, slot_i); ++ } ++ ++ /* Reset body stream buffers while holding the kctx lock. ++ * As we are holding the lock, we can guarantee that no kctx creation or ++ * deletion tracepoints can be fired from outside of this function by ++ * some other thread. ++ */ ++ kbase_timeline_streams_body_reset(timeline); ++ ++ mutex_unlock(&kbdev->csf.scheduler.lock); ++ ++ /* For each context in the device... */ ++ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { ++ size_t i; ++ struct kbase_tlstream *body = ++ &timeline->streams[TL_STREAM_TYPE_OBJ]; ++ ++ /* Lock the context's KCPU queues, to ensure no KCPU-queue ++ * related actions can occur in this context from now on. ++ */ ++ mutex_lock(&kctx->csf.kcpu_queues.lock); ++ ++ /* Acquire the MMU lock, to ensure we don't get a concurrent ++ * address space assignment while summarizing this context's ++ * address space. ++ */ ++ mutex_lock(&kbdev->mmu_hw_mutex); ++ ++ /* Trace the context itself into the body stream, not the ++ * summary stream. ++ * We place this in the body to ensure it is ordered after any ++ * other tracepoints related to the contents of the context that ++ * might have been fired before acquiring all of the per-context ++ * locks. ++ * This ensures that those tracepoints will not actually affect ++ * the object model state, as they reference a context that ++ * hasn't been traced yet. They may, however, cause benign ++ * errors to be emitted. ++ */ ++ __kbase_tlstream_tl_kbase_new_ctx(body, kctx->id, ++ kbdev->gpu_props.props.raw_props.gpu_id); ++ ++ /* Also trace with the legacy AOM tracepoint for dumping */ ++ __kbase_tlstream_tl_new_ctx(body, ++ kctx, ++ kctx->id, ++ (u32)(kctx->tgid)); ++ ++ /* Trace the currently assigned address space */ ++ if (kctx->as_nr != KBASEP_AS_NR_INVALID) ++ __kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id, ++ kctx->as_nr); ++ ++ ++ /* Trace all KCPU queues in the context into the body stream. ++ * As we acquired the KCPU lock after resetting the body stream, ++ * it's possible that some KCPU-related events for this context ++ * occurred between that reset and now. ++ * These will cause errors to be emitted when parsing the ++ * timeline, but they will not affect the correctness of the ++ * object model. ++ */ ++ for (i = 0; i < KBASEP_MAX_KCPU_QUEUES; i++) { ++ const struct kbase_kcpu_command_queue *kcpu_queue = ++ kctx->csf.kcpu_queues.array[i]; ++ ++ if (kcpu_queue) ++ __kbase_tlstream_tl_kbase_new_kcpuqueue( ++ body, kcpu_queue, kcpu_queue->kctx->id, ++ kcpu_queue->num_pending_cmds); ++ } ++ ++ mutex_unlock(&kbdev->mmu_hw_mutex); ++ mutex_unlock(&kctx->csf.kcpu_queues.lock); ++ ++ /* Now that all per-context locks for this context have been ++ * released, any per-context tracepoints that are fired from ++ * any other threads will go into the body stream after ++ * everything that was just summarised into the body stream in ++ * this iteration of the loop, so will start to correctly update ++ * the object model state. ++ */ ++ } ++ ++ mutex_unlock(&timeline->tl_kctx_list_lock); ++ ++ /* Static object are placed into summary packet that needs to be ++ * transmitted first. Flush all streams to make it available to ++ * user space. ++ */ ++ kbase_timeline_streams_flush(timeline); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c +index c368ac7..9ba89f5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,11 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#include "../mali_kbase_tracepoints.h" +-#include "../mali_kbase_timeline.h" +-#include "../mali_kbase_timeline_priv.h" ++#include ++#include ++#include + + #include + +@@ -66,16 +65,16 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) + /* Lock the context list, to ensure no changes to the list are made + * while we're summarizing the contexts and their contents. + */ +- mutex_lock(&kbdev->kctx_list_lock); ++ mutex_lock(&timeline->tl_kctx_list_lock); + + /* For each context in the device... */ +- list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { ++ list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { + /* Summarize the context itself */ + __kbase_tlstream_tl_new_ctx(summary, + kctx, + kctx->id, + (u32)(kctx->tgid)); +- }; ++ } + + /* Reset body stream buffers while holding the kctx lock. + * This ensures we can't fire both summary and normal tracepoints for +@@ -87,11 +86,11 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) + */ + kbase_timeline_streams_body_reset(timeline); + +- mutex_unlock(&kbdev->kctx_list_lock); ++ mutex_unlock(&timeline->tl_kctx_list_lock); + + /* Static object are placed into summary packet that needs to be + * transmitted first. Flush all streams to make it available to + * user space. + */ + kbase_timeline_streams_flush(timeline); +-} +\ No newline at end of file ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c +index 88fba83..09818a5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_timeline.h" +@@ -109,11 +108,14 @@ int kbase_timeline_init(struct kbase_timeline **timeline, + { + enum tl_stream_type i; + struct kbase_timeline *result; ++#if MALI_USE_CSF ++ struct kbase_tlstream *csffw_stream; ++#endif + + if (!timeline || !timeline_flags) + return -EINVAL; + +- result = kzalloc(sizeof(*result), GFP_KERNEL); ++ result = vzalloc(sizeof(*result)); + if (!result) + return -ENOMEM; + +@@ -125,12 +127,20 @@ int kbase_timeline_init(struct kbase_timeline **timeline, + kbase_tlstream_init(&result->streams[i], i, + &result->event_queue); + ++ /* Initialize the kctx list */ ++ mutex_init(&result->tl_kctx_list_lock); ++ INIT_LIST_HEAD(&result->tl_kctx_list); ++ + /* Initialize autoflush timer. */ + atomic_set(&result->autoflush_timer_active, 0); + kbase_timer_setup(&result->autoflush_timer, + kbasep_timeline_autoflush_timer_callback); + result->timeline_flags = timeline_flags; + ++#if MALI_USE_CSF ++ csffw_stream = &result->streams[TL_STREAM_TYPE_CSFFW]; ++ kbase_csf_tl_reader_init(&result->csf_tl_reader, csffw_stream); ++#endif + + *timeline = result; + return 0; +@@ -143,11 +153,16 @@ void kbase_timeline_term(struct kbase_timeline *timeline) + if (!timeline) + return; + ++#if MALI_USE_CSF ++ kbase_csf_tl_reader_term(&timeline->csf_tl_reader); ++#endif ++ ++ WARN_ON(!list_empty(&timeline->tl_kctx_list)); + + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) + kbase_tlstream_term(&timeline->streams[i]); + +- kfree(timeline); ++ vfree(timeline); + } + + #ifdef CONFIG_MALI_DEVFREQ +@@ -162,11 +177,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) + unsigned long cur_freq = 0; + + mutex_lock(&devfreq->lock); +-#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE +- cur_freq = kbdev->current_nominal_freq; +-#else + cur_freq = devfreq->last_status.current_frequency; +-#endif + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq); + mutex_unlock(&devfreq->lock); + } +@@ -175,13 +186,24 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) + + int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) + { +- int ret; ++ int ret = 0; + u32 timeline_flags = TLSTREAM_ENABLED | flags; + struct kbase_timeline *timeline = kbdev->timeline; + + if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { + int rcode; + ++#if MALI_USE_CSF ++ if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { ++ ret = kbase_csf_tl_reader_start( ++ &timeline->csf_tl_reader, kbdev); ++ if (ret) ++ { ++ atomic_set(timeline->timeline_flags, 0); ++ return ret; ++ } ++ } ++#endif + ret = anon_inode_getfd( + "[mali_tlstream]", + &kbasep_tlstream_fops, +@@ -189,6 +211,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) + O_RDONLY | O_CLOEXEC); + if (ret < 0) { + atomic_set(timeline->timeline_flags, 0); ++#if MALI_USE_CSF ++ kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); ++#endif + return ret; + } + +@@ -206,6 +231,7 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + ++#if !MALI_USE_CSF + /* If job dumping is enabled, readjust the software event's + * timeout as the default value of 3 seconds is often + * insufficient. +@@ -216,6 +242,7 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + 1800000); + } ++#endif /* !MALI_USE_CSF */ + + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be +@@ -235,15 +262,30 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) + ret = -EBUSY; + } + ++ if (ret >= 0) ++ timeline->last_acquire_time = ktime_get(); ++ + return ret; + } + +-void kbase_timeline_streams_flush(struct kbase_timeline *timeline) ++int kbase_timeline_streams_flush(struct kbase_timeline *timeline) + { + enum tl_stream_type stype; ++ bool has_bytes = false; ++ size_t nbytes = 0; ++#if MALI_USE_CSF ++ int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); ++ ++ if (ret > 0) ++ has_bytes = true; ++#endif + +- for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) +- kbase_tlstream_flush_stream(&timeline->streams[stype]); ++ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { ++ nbytes = kbase_tlstream_flush_stream(&timeline->streams[stype]); ++ if (nbytes > 0) ++ has_bytes = true; ++ } ++ return has_bytes ? 0 : -EIO; + } + + void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) +@@ -252,6 +294,78 @@ void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) + &timeline->streams[TL_STREAM_TYPE_OBJ]); + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_AUX]); ++#if MALI_USE_CSF ++ kbase_tlstream_reset( ++ &timeline->streams[TL_STREAM_TYPE_CSFFW]); ++#endif ++} ++ ++void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_timeline *timeline = kbdev->timeline; ++ ++ /* Remove the context from the list to ensure we don't try and ++ * summarize a context that is being destroyed. ++ * ++ * It's unsafe to try and summarize a context being destroyed as the ++ * locks we might normally attempt to acquire, and the data structures ++ * we would normally attempt to traverse could already be destroyed. ++ * ++ * In the case where the tlstream is acquired between this pre destroy ++ * call and the post destroy call, we will get a context destroy ++ * tracepoint without the corresponding context create tracepoint, ++ * but this will not affect the correctness of the object model. ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); ++ list_del_init(&kctx->tl_kctx_list_node); ++ mutex_unlock(&timeline->tl_kctx_list_lock); ++} ++ ++void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ struct kbase_timeline *timeline = kbdev->timeline; ++ ++ /* On context create, add the context to the list to ensure it is ++ * summarized when timeline is acquired ++ */ ++ mutex_lock(&timeline->tl_kctx_list_lock); ++ ++ list_add(&kctx->tl_kctx_list_node, &timeline->tl_kctx_list); ++ ++ /* Fire the tracepoints with the lock held to ensure the tracepoints ++ * are either fired before or after the summarization, ++ * never in parallel with it. If fired in parallel, we could get ++ * duplicate creation tracepoints. ++ */ ++#if MALI_USE_CSF ++ KBASE_TLSTREAM_TL_KBASE_NEW_CTX( ++ kbdev, kctx->id, kbdev->gpu_props.props.raw_props.gpu_id); ++#endif ++ /* Trace with the AOM tracepoint even in CSF for dumping */ ++ KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, 0); ++ ++ mutex_unlock(&timeline->tl_kctx_list_lock); ++} ++ ++void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx) ++{ ++ struct kbase_device *const kbdev = kctx->kbdev; ++ ++ /* Trace with the AOM tracepoint even in CSF for dumping */ ++ KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); ++#if MALI_USE_CSF ++ KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kbdev, kctx->id); ++#endif ++ ++ /* Flush the timeline stream, so the user can see the termination ++ * tracepoints being fired. ++ * The "if" statement below is for optimization. It is safe to call ++ * kbase_timeline_streams_flush when timeline is disabled. ++ */ ++ if (atomic_read(&kbdev->timeline_flags) != 0) ++ kbase_timeline_streams_flush(kbdev->timeline); + } + + #if MALI_UNIT_TEST +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h +index cd48411..63926eb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #if !defined(_KBASE_TIMELINE_H) +@@ -70,8 +69,10 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags); + * @timeline: Timeline instance + * + * Function will flush pending data in all timeline streams. ++ * ++ * Return: Zero on success, errno on failure. + */ +-void kbase_timeline_streams_flush(struct kbase_timeline *timeline); ++int kbase_timeline_streams_flush(struct kbase_timeline *timeline); + + /** + * kbase_timeline_streams_body_reset - reset timeline body streams. +@@ -81,33 +82,31 @@ void kbase_timeline_streams_flush(struct kbase_timeline *timeline); + */ + void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline); + +-#if MALI_UNIT_TEST + /** +- * kbase_timeline_test - start timeline stream data generator +- * @kbdev: Kernel common context +- * @tpw_count: Number of trace point writers in each context +- * @msg_delay: Time delay in milliseconds between trace points written by one +- * writer +- * @msg_count: Number of trace points written by one writer +- * @aux_msg: If non-zero aux messages will be included ++ * kbase_timeline_post_kbase_context_create - Inform timeline that a new KBase ++ * Context has been created. ++ * @kctx: KBase Context ++ */ ++void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx); ++ ++/** ++ * kbase_timeline_pre_kbase_context_destroy - Inform timeline that a KBase ++ * Context is about to be destroyed. ++ * @kctx: KBase Context ++ */ ++void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx); ++ ++/** ++ * kbase_timeline_post_kbase_context_destroy - Inform timeline that a KBase ++ * Context has been destroyed. ++ * @kctx: KBase Context + * +- * This test starts a requested number of asynchronous writers in both IRQ and +- * thread context. Each writer will generate required number of test +- * tracepoints (tracepoints with embedded information about writer that +- * should be verified by user space reader). Tracepoints will be emitted in +- * all timeline body streams. If aux_msg is non-zero writer will also +- * generate not testable tracepoints (tracepoints without information about +- * writer). These tracepoints are used to check correctness of remaining +- * timeline message generating functions. Writer will wait requested time +- * between generating another set of messages. This call blocks until all +- * writers finish. ++ * Should be called immediately before the memory is freed, and the context ID ++ * and kbdev pointer should still be valid. + */ +-void kbase_timeline_test( +- struct kbase_device *kbdev, +- unsigned int tpw_count, +- unsigned int msg_delay, +- unsigned int msg_count, +- int aux_msg); ++void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); ++ ++#if MALI_UNIT_TEST + + /** + * kbase_timeline_stats - read timeline stream statistics +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c +index cdde928..eff30a0 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,35 +17,38 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_timeline_priv.h" + #include "mali_kbase_tlstream.h" + #include "mali_kbase_tracepoints.h" ++#include "mali_kbase_timeline.h" + ++#include + #include + + /* The timeline stream file operations functions. */ +-static ssize_t kbasep_timeline_io_read( +- struct file *filp, +- char __user *buffer, +- size_t size, +- loff_t *f_pos); +-static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait); ++static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, ++ size_t size, loff_t *f_pos); ++static unsigned int kbasep_timeline_io_poll(struct file *filp, ++ poll_table *wait); + static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); ++static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, ++ int datasync); + + /* The timeline stream file operations structure. */ + const struct file_operations kbasep_tlstream_fops = { + .owner = THIS_MODULE, + .release = kbasep_timeline_io_release, +- .read = kbasep_timeline_io_read, +- .poll = kbasep_timeline_io_poll, ++ .read = kbasep_timeline_io_read, ++ .poll = kbasep_timeline_io_poll, ++ .fsync = kbasep_timeline_io_fsync, + }; + + /** +- * kbasep_timeline_io_packet_pending - check timeline streams for pending packets ++ * kbasep_timeline_io_packet_pending - check timeline streams for pending ++ * packets ++ * + * @timeline: Timeline instance + * @ready_stream: Pointer to variable where stream will be placed + * @rb_idx_raw: Pointer to variable where read buffer index will be placed +@@ -56,10 +60,10 @@ const struct file_operations kbasep_tlstream_fops = { + * + * Return: non-zero if any of timeline streams has at last one packet ready + */ +-static int kbasep_timeline_io_packet_pending( +- struct kbase_timeline *timeline, +- struct kbase_tlstream **ready_stream, +- unsigned int *rb_idx_raw) ++static int ++kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, ++ struct kbase_tlstream **ready_stream, ++ unsigned int *rb_idx_raw) + { + enum tl_stream_type i; + +@@ -78,12 +82,28 @@ static int kbasep_timeline_io_packet_pending( + *ready_stream = stream; + return 1; + } +- + } + + return 0; + } + ++/** ++ * kbasep_timeline_has_header_data() - check timeline headers for pending ++ * packets ++ * ++ * @timeline: Timeline instance ++ * ++ * Return: non-zero if any of timeline headers has at last one packet ready. ++ */ ++static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline) ++{ ++ return timeline->obj_header_btc || timeline->aux_header_btc ++#if MALI_USE_CSF ++ || timeline->csf_tl_reader.tl_header.btc ++#endif ++ ; ++} ++ + /** + * copy_stream_header() - copy timeline stream header. + * +@@ -97,11 +117,9 @@ static int kbasep_timeline_io_packet_pending( + * + * Returns: 0 if success, -1 otherwise. + */ +-static inline int copy_stream_header( +- char __user *buffer, size_t size, ssize_t *copy_len, +- const char *hdr, +- size_t hdr_size, +- size_t *hdr_btc) ++static inline int copy_stream_header(char __user *buffer, size_t size, ++ ssize_t *copy_len, const char *hdr, ++ size_t hdr_size, size_t *hdr_btc) + { + const size_t offset = hdr_size - *hdr_btc; + const size_t copy_size = MIN(size - *copy_len, *hdr_btc); +@@ -123,6 +141,7 @@ static inline int copy_stream_header( + + /** + * kbasep_timeline_copy_header - copy timeline headers to the user ++ * + * @timeline: Timeline instance + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer +@@ -135,29 +154,30 @@ static inline int copy_stream_header( + * + * Returns: 0 if success, -1 if copy_to_user has failed. + */ +-static inline int kbasep_timeline_copy_headers( +- struct kbase_timeline *timeline, +- char __user *buffer, +- size_t size, +- ssize_t *copy_len) ++static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, ++ char __user *buffer, size_t size, ++ ssize_t *copy_len) + { +- if (copy_stream_header(buffer, size, copy_len, +- obj_desc_header, +- obj_desc_header_size, +- &timeline->obj_header_btc)) ++ if (copy_stream_header(buffer, size, copy_len, obj_desc_header, ++ obj_desc_header_size, &timeline->obj_header_btc)) + return -1; + ++ if (copy_stream_header(buffer, size, copy_len, aux_desc_header, ++ aux_desc_header_size, &timeline->aux_header_btc)) ++ return -1; ++#if MALI_USE_CSF + if (copy_stream_header(buffer, size, copy_len, +- aux_desc_header, +- aux_desc_header_size, +- &timeline->aux_header_btc)) ++ timeline->csf_tl_reader.tl_header.data, ++ timeline->csf_tl_reader.tl_header.size, ++ &timeline->csf_tl_reader.tl_header.btc)) + return -1; ++#endif + return 0; + } + +- + /** + * kbasep_timeline_io_read - copy data from streams to buffer provided by user ++ * + * @filp: Pointer to file structure + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer +@@ -165,11 +185,8 @@ static inline int kbasep_timeline_copy_headers( + * + * Return: number of bytes stored in the buffer + */ +-static ssize_t kbasep_timeline_io_read( +- struct file *filp, +- char __user *buffer, +- size_t size, +- loff_t *f_pos) ++static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, ++ size_t size, loff_t *f_pos) + { + ssize_t copy_len = 0; + struct kbase_timeline *timeline; +@@ -180,25 +197,25 @@ static ssize_t kbasep_timeline_io_read( + if (WARN_ON(!filp->private_data)) + return -EFAULT; + +- timeline = (struct kbase_timeline *) filp->private_data; ++ timeline = (struct kbase_timeline *)filp->private_data; + + if (!buffer) + return -EINVAL; + +- if ((*f_pos < 0) || (size < PACKET_SIZE)) ++ if (*f_pos < 0) + return -EINVAL; + + mutex_lock(&timeline->reader_lock); + + while (copy_len < size) { + struct kbase_tlstream *stream = NULL; +- unsigned int rb_idx_raw = 0; +- unsigned int wb_idx_raw; +- unsigned int rb_idx; +- size_t rb_size; ++ unsigned int rb_idx_raw = 0; ++ unsigned int wb_idx_raw; ++ unsigned int rb_idx; ++ size_t rb_size; + +- if (kbasep_timeline_copy_headers( +- timeline, buffer, size, ©_len)) { ++ if (kbasep_timeline_copy_headers(timeline, buffer, size, ++ ©_len)) { + copy_len = -EFAULT; + break; + } +@@ -210,17 +227,13 @@ static ssize_t kbasep_timeline_io_read( + */ + if (copy_len > 0) { + if (!kbasep_timeline_io_packet_pending( +- timeline, +- &stream, +- &rb_idx_raw)) ++ timeline, &stream, &rb_idx_raw)) + break; + } else { + if (wait_event_interruptible( +- timeline->event_queue, +- kbasep_timeline_io_packet_pending( +- timeline, +- &stream, +- &rb_idx_raw))) { ++ timeline->event_queue, ++ kbasep_timeline_io_packet_pending( ++ timeline, &stream, &rb_idx_raw))) { + copy_len = -ERESTARTSYS; + break; + } +@@ -238,10 +251,8 @@ static ssize_t kbasep_timeline_io_read( + rb_size = atomic_read(&stream->buffer[rb_idx].size); + if (rb_size > size - copy_len) + break; +- if (copy_to_user( +- &buffer[copy_len], +- stream->buffer[rb_idx].data, +- rb_size)) { ++ if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data, ++ rb_size)) { + copy_len = -EFAULT; + break; + } +@@ -283,7 +294,7 @@ static ssize_t kbasep_timeline_io_read( + static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) + { + struct kbase_tlstream *stream; +- unsigned int rb_idx; ++ unsigned int rb_idx; + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(filp); +@@ -292,7 +303,11 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) + if (WARN_ON(!filp->private_data)) + return -EFAULT; + +- timeline = (struct kbase_timeline *) filp->private_data; ++ timeline = (struct kbase_timeline *)filp->private_data; ++ ++ /* If there are header bytes to copy, read will not block */ ++ if (kbasep_timeline_has_header_data(timeline)) ++ return POLLIN; + + poll_wait(filp, &timeline->event_queue, wait); + if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) +@@ -310,6 +325,8 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) + static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) + { + struct kbase_timeline *timeline; ++ ktime_t elapsed_time; ++ s64 elapsed_time_ms, time_to_sleep; + + KBASE_DEBUG_ASSERT(inode); + KBASE_DEBUG_ASSERT(filp); +@@ -317,8 +334,23 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) + + CSTD_UNUSED(inode); + +- timeline = (struct kbase_timeline *) filp->private_data; ++ timeline = (struct kbase_timeline *)filp->private_data; ++ ++ /* Get the amount of time passed since the timeline was acquired and ensure ++ * we sleep for long enough such that it has been at least ++ * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. ++ * This prevents userspace from spamming acquire and release too quickly. ++ */ ++ elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time); ++ elapsed_time_ms = ktime_to_ms(elapsed_time); ++ time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS, ++ TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); ++ if (time_to_sleep > 0) ++ msleep(time_to_sleep); + ++#if MALI_USE_CSF ++ kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); ++#endif + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&timeline->autoflush_timer_active, 0); +@@ -327,3 +359,20 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) + atomic_set(timeline->timeline_flags, 0); + return 0; + } ++ ++static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, ++ int datasync) ++{ ++ struct kbase_timeline *timeline; ++ ++ CSTD_UNUSED(start); ++ CSTD_UNUSED(end); ++ CSTD_UNUSED(datasync); ++ ++ if (WARN_ON(!filp->private_data)) ++ return -EFAULT; ++ ++ timeline = (struct kbase_timeline *)filp->private_data; ++ ++ return kbase_timeline_streams_flush(timeline); ++} +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h +index 3596584..bf2c385 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #if !defined(_KBASE_TIMELINE_PRIV_H) +@@ -26,14 +25,25 @@ + #include + #include "mali_kbase_tlstream.h" + ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf_tl_reader.h" ++#include "csf/mali_kbase_csf_trace_buffer.h" ++#endif + + #include + #include + #include + ++/* The minimum amount of time timeline must be acquired for before release is ++ * allowed, to prevent DoS attacks. ++ */ ++#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500) ++ + /** + * struct kbase_timeline - timeline state structure + * @streams: The timeline streams generated by kernel ++ * @tl_kctx_list: List of contexts for timeline. ++ * @tl_kctx_list_lock: Lock to protect @tl_kctx_list. + * @autoflush_timer: Autoflush timer + * @autoflush_timer_active: If non-zero autoflush timer is active + * @reader_lock: Reader lock. Only one reader is allowed to +@@ -44,9 +54,13 @@ + * otherwise. See kbase_timeline_io_acquire(). + * @obj_header_btc: Remaining bytes to copy for the object stream header + * @aux_header_btc: Remaining bytes to copy for the aux stream header ++ * @last_acquire_time: The time at which timeline was last acquired. ++ * @csf_tl_reader: CSFFW timeline reader + */ + struct kbase_timeline { + struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; ++ struct list_head tl_kctx_list; ++ struct mutex tl_kctx_list_lock; + struct timer_list autoflush_timer; + atomic_t autoflush_timer_active; + struct mutex reader_lock; +@@ -57,6 +71,10 @@ struct kbase_timeline { + atomic_t *timeline_flags; + size_t obj_header_btc; + size_t aux_header_btc; ++ ktime_t last_acquire_time; ++#if MALI_USE_CSF ++ struct kbase_csf_tl_reader csf_tl_reader; ++#endif + }; + + extern const struct file_operations kbasep_tlstream_fops; +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h +index 3e37827..30d120d 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #if !defined(_KBASE_TL_SERIALIZE_H) +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c +index bec4be7..0275bad 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #include "mali_kbase_tlstream.h" +@@ -57,20 +56,19 @@ static void kbasep_packet_header_setup( + * @numbered: non-zero if the stream is numbered + * + * Function updates mutable part of packet header in the given buffer. +- * Note that value of data_size must not including size of the header. ++ * Note that value of data_size must not include size of the header. + */ + static void kbasep_packet_header_update( + char *buffer, + size_t data_size, + int numbered) + { +- u32 word0; + u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); + + KBASE_DEBUG_ASSERT(buffer); +- CSTD_UNUSED(word0); + +- memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); ++ /* we copy the contents of word1 to its respective position in the buffer */ ++ memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1)); + } + + /** +@@ -131,6 +129,14 @@ static const struct { + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, ++#if MALI_USE_CSF ++ { ++ TL_PACKET_FAMILY_TL, ++ TL_PACKET_CLASS_OBJ, ++ TL_PACKET_TYPE_BODY, ++ TL_STREAM_ID_CSFFW, ++ }, ++#endif + }; + + void kbase_tlstream_init( +@@ -141,12 +147,12 @@ void kbase_tlstream_init( + unsigned int i; + + KBASE_DEBUG_ASSERT(stream); +- KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); ++ KBASE_DEBUG_ASSERT(stream_type < TL_STREAM_TYPE_COUNT); + + spin_lock_init(&stream->lock); + + /* All packets carrying tracepoints shall be numbered. */ +- if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) ++ if (tl_stream_cfg[stream_type].pkt_type == TL_PACKET_TYPE_BODY) + stream->numbered = 1; + else + stream->numbered = 0; +@@ -209,7 +215,8 @@ static size_t kbasep_tlstream_msgbuf_submit( + + /* Increasing write buffer index will expose this packet to the reader. + * As stream->lock is not taken on reader side we must make sure memory +- * is updated correctly before this will happen. */ ++ * is updated correctly before this will happen. ++ */ + smp_wmb(); + atomic_inc(&stream->wbi); + +@@ -243,7 +250,7 @@ char *kbase_tlstream_msgbuf_acquire( + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + /* Select next buffer if data will not fit into current one. */ +- if (PACKET_SIZE < wb_size + msg_size) { ++ if (wb_size + msg_size > PACKET_SIZE) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; +@@ -269,7 +276,7 @@ void kbase_tlstream_msgbuf_release( + spin_unlock_irqrestore(&stream->lock, flags); + } + +-void kbase_tlstream_flush_stream( ++size_t kbase_tlstream_flush_stream( + struct kbase_tlstream *stream) + { + unsigned long flags; +@@ -278,6 +285,7 @@ void kbase_tlstream_flush_stream( + size_t wb_size; + size_t min_size = PACKET_HEADER_SIZE; + ++ + if (stream->numbered) + min_size += PACKET_NUMBER_SIZE; + +@@ -292,7 +300,14 @@ void kbase_tlstream_flush_stream( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, wb_size); ++ } else { ++ /* we return that there is no bytes to be read.*/ ++ /* Timeline io fsync will use this info the decide whether ++ * fsync should return an error ++ */ ++ wb_size = 0; + } ++ + spin_unlock_irqrestore(&stream->lock, flags); ++ return wb_size; + } +- +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h +index 427bb09..6660cf5 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #if !defined(_KBASE_TLSTREAM_H) +@@ -44,6 +43,8 @@ + * struct kbase_tlstream - timeline stream structure + * @lock: Message order lock + * @buffer: Array of buffers ++ * @buffer.size: Number of bytes in buffer ++ * @buffer.data: Buffer's data + * @wbi: Write buffer index + * @rbi: Read buffer index + * @numbered: If non-zero stream's packets are sequentially numbered +@@ -76,8 +77,8 @@ struct kbase_tlstream { + spinlock_t lock; + + struct { +- atomic_t size; /* number of bytes in buffer */ +- char data[PACKET_SIZE]; /* buffer's data */ ++ atomic_t size; ++ char data[PACKET_SIZE]; + } buffer[PACKET_COUNT]; + + atomic_t wbi; +@@ -97,6 +98,9 @@ enum tl_stream_type { + TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, + TL_STREAM_TYPE_OBJ, + TL_STREAM_TYPE_AUX, ++#if MALI_USE_CSF ++ TL_STREAM_TYPE_CSFFW, ++#endif + TL_STREAM_TYPE_COUNT + }; + +@@ -159,8 +163,10 @@ void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, + * @stream: Pointer to the stream structure + * + * Flush pending data in the timeline stream. ++ * ++ * Return: Number of bytes available flushed and available to be read ++ * + */ +-void kbase_tlstream_flush_stream(struct kbase_tlstream *stream); ++size_t kbase_tlstream_flush_stream(struct kbase_tlstream *stream); + + #endif /* _KBASE_TLSTREAM_H */ +- +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c +index e445a3a..2c0de01 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c +@@ -1,11 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -66,10 +65,11 @@ enum tl_msg_id_obj { + KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, + KBASE_TL_EVENT_ATOM_SOFTJOB_START, + KBASE_TL_EVENT_ATOM_SOFTJOB_END, +- KBASE_TL_EVENT_ARB_GRANTED, +- KBASE_TL_EVENT_ARB_STARTED, +- KBASE_TL_EVENT_ARB_STOP_REQUESTED, +- KBASE_TL_EVENT_ARB_STOPPED, ++ KBASE_TL_ARBITER_GRANTED, ++ KBASE_TL_ARBITER_STARTED, ++ KBASE_TL_ARBITER_STOP_REQUESTED, ++ KBASE_TL_ARBITER_STOPPED, ++ KBASE_TL_ARBITER_REQUESTED, + KBASE_JD_GPU_SOFT_RESET, + KBASE_TL_KBASE_NEW_DEVICE, + KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, +@@ -82,18 +82,13 @@ enum tl_msg_id_obj { + KBASE_TL_KBASE_DEL_KCPUQUEUE, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, +- KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, +- KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, +- KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, +- KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, +- KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, +- KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, +- KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, +- KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, +- KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, ++ KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, +@@ -107,8 +102,6 @@ enum tl_msg_id_obj { + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, +- KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, +- KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, +@@ -123,8 +116,11 @@ enum tl_msg_id_obj { + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, +- KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, ++ KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, + KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, ++ KBASE_TL_KBASE_CSFFW_RESET, + KBASE_OBJ_MSG_COUNT, + }; + +@@ -139,6 +135,7 @@ enum tl_msg_id_aux { + KBASE_AUX_PROTECTED_LEAVE_START, + KBASE_AUX_PROTECTED_LEAVE_END, + KBASE_AUX_JIT_STATS, ++ KBASE_AUX_TILER_HEAP_STATS, + KBASE_AUX_EVENT_JOB_SLOT, + KBASE_AUX_MSG_COUNT, + }; +@@ -276,22 +273,26 @@ enum tl_msg_id_aux { + "atom soft job has completed", \ + "@p", \ + "atom") \ +- TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_GRANTED, \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_GRANTED, \ + "Arbiter has granted gpu access", \ + "@p", \ + "gpu") \ +- TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STARTED, \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_STARTED, \ + "Driver is running again and able to process jobs", \ + "@p", \ + "gpu") \ +- TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOP_REQUESTED, \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_STOP_REQUESTED, \ + "Arbiter has requested driver to stop using gpu", \ + "@p", \ + "gpu") \ +- TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOPPED, \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_STOPPED, \ + "Driver has stopped using gpu", \ + "@p", \ + "gpu") \ ++ TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \ ++ "Driver has requested the arbiter for gpu access", \ ++ "@p", \ ++ "gpu") \ + TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ + "gpu soft reset", \ + "@p", \ +@@ -340,42 +341,14 @@ enum tl_msg_id_aux { + "KCPU Queue enqueues Wait on Fence", \ + "@pp", \ + "kcpu_queue,fence") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ +- "Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ +- "Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ +- "@pLI", \ +- "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ +- "End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ +- "Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ +- "Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ ++ "KCPU Queue enqueues Wait on Cross Queue Sync Object", \ ++ "@pLII", \ ++ "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \ ++ "KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@pL", \ + "kcpu_queue,cqs_obj_gpu_addr") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ +- "End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ +- "Begin array of KCPU Queue enqueues Debug Copy", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ +- "Array item of KCPU Queue enqueues Debug Copy", \ +- "@pL", \ +- "kcpu_queue,debugcopy_dst_size") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ +- "End array of KCPU Queue enqueues Debug Copy", \ +- "@p", \ +- "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ + "KCPU Queue enqueues Map Import", \ + "@pL", \ +@@ -388,6 +361,14 @@ enum tl_msg_id_aux { + "KCPU Queue enqueues Unmap Import ignoring reference count", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \ ++ "KCPU Queue enqueues Error Barrier", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \ ++ "KCPU Queue enqueues Group Suspend", \ ++ "@ppI", \ ++ "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "Begin array of KCPU Queue enqueues JIT Alloc", \ + "@p", \ +@@ -418,60 +399,52 @@ enum tl_msg_id_aux { + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ + "KCPU Queue ends a Signal on Fence", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ + "KCPU Queue starts a Wait on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ + "KCPU Queue ends a Wait on Fence", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ + "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ + "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ + "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ +- "KCPU Queue starts an array of Debug Copys", \ +- "@p", \ +- "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ +- "KCPU Queue ends an array of Debug Copys", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ + "KCPU Queue starts a Map Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ + "KCPU Queue ends a Map Import", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ + "KCPU Queue starts an Unmap Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ + "KCPU Queue ends an Unmap Import", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ + "KCPU Queue starts an Unmap Import ignoring reference count", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ + "KCPU Queue ends an Unmap Import ignoring reference count", \ +- "@p", \ +- "kcpu_queue") \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ + "KCPU Queue starts an array of JIT Allocs", \ + "@p", \ +@@ -482,8 +455,8 @@ enum tl_msg_id_aux { + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "Array item of KCPU Queue ends an array of JIT Allocs", \ +- "@pLL", \ +- "kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ ++ "@pILL", \ ++ "kcpu_queue,execute_error,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "End array of KCPU Queue ends an array of JIT Allocs", \ + "@p", \ +@@ -498,20 +471,32 @@ enum tl_msg_id_aux { + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "Array item of KCPU Queue ends an array of JIT Frees", \ +- "@pL", \ +- "kcpu_queue,jit_free_pages_used") \ ++ "@pIL", \ ++ "kcpu_queue,execute_error,jit_free_pages_used") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "End array of KCPU Queue ends an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ +- TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, \ + "KCPU Queue executes an Error Barrier", \ + "@p", \ + "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, \ ++ "KCPU Queue starts a group suspend", \ ++ "@p", \ ++ "kcpu_queue") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, \ ++ "KCPU Queue ends a group suspend", \ ++ "@pI", \ ++ "kcpu_queue,execute_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ + "An overflow has happened with the CSFFW Timeline stream", \ + "@LL", \ + "csffw_timestamp,csffw_cycle") \ ++ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_RESET, \ ++ "A reset has happened with the CSFFW", \ ++ "@L", \ ++ "csffw_cycle") \ + + #define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header + #define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +@@ -561,6 +546,10 @@ const size_t obj_desc_header_size = sizeof(__obj_desc_header); + "per-bin JIT statistics", \ + "@IIIIII", \ + "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ ++ TRACEPOINT_DESC(KBASE_AUX_TILER_HEAP_STATS, \ ++ "Tiler Heap statistics", \ ++ "@ILIIIIIII", \ ++ "ctx_nr,heap_id,va_pages,ph_pages,max_chunks,chunk_size,chunk_count,target_in_flight,nr_in_flight") \ + TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ + "event on a given job slot", \ + "@pIII", \ +@@ -1493,11 +1482,33 @@ void __kbase_tlstream_tl_event_atom_softjob_end( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_event_arb_granted( ++void __kbase_tlstream_tl_arbiter_granted( ++ struct kbase_tlstream *stream, ++ const void *gpu) ++{ ++ const u32 msg_id = KBASE_TL_ARBITER_GRANTED; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(gpu) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu, sizeof(gpu)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} ++ ++void __kbase_tlstream_tl_arbiter_started( + struct kbase_tlstream *stream, + const void *gpu) + { +- const u32 msg_id = KBASE_TL_EVENT_ARB_GRANTED; ++ const u32 msg_id = KBASE_TL_ARBITER_STARTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; +@@ -1515,11 +1526,11 @@ void __kbase_tlstream_tl_event_arb_granted( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_event_arb_started( ++void __kbase_tlstream_tl_arbiter_stop_requested( + struct kbase_tlstream *stream, + const void *gpu) + { +- const u32 msg_id = KBASE_TL_EVENT_ARB_STARTED; ++ const u32 msg_id = KBASE_TL_ARBITER_STOP_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; +@@ -1537,11 +1548,11 @@ void __kbase_tlstream_tl_event_arb_started( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_event_arb_stop_requested( ++void __kbase_tlstream_tl_arbiter_stopped( + struct kbase_tlstream *stream, + const void *gpu) + { +- const u32 msg_id = KBASE_TL_EVENT_ARB_STOP_REQUESTED; ++ const u32 msg_id = KBASE_TL_ARBITER_STOPPED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; +@@ -1559,11 +1570,11 @@ void __kbase_tlstream_tl_event_arb_stop_requested( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_event_arb_stopped( ++void __kbase_tlstream_tl_arbiter_requested( + struct kbase_tlstream *stream, + const void *gpu) + { +- const u32 msg_id = KBASE_TL_EVENT_ARB_STOPPED; ++ const u32 msg_id = KBASE_TL_ARBITER_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; +@@ -1837,6 +1848,60 @@ void __kbase_tlstream_aux_jit_stats( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + ++void __kbase_tlstream_aux_tiler_heap_stats( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 heap_id, ++ u32 va_pages, ++ u32 ph_pages, ++ u32 max_chunks, ++ u32 chunk_size, ++ u32 chunk_count, ++ u32 target_in_flight, ++ u32 nr_in_flight) ++{ ++ const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(ctx_nr) ++ + sizeof(heap_id) ++ + sizeof(va_pages) ++ + sizeof(ph_pages) ++ + sizeof(max_chunks) ++ + sizeof(chunk_size) ++ + sizeof(chunk_count) ++ + sizeof(target_in_flight) ++ + sizeof(nr_in_flight) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ctx_nr, sizeof(ctx_nr)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &heap_id, sizeof(heap_id)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &va_pages, sizeof(va_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &ph_pages, sizeof(ph_pages)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &max_chunks, sizeof(max_chunks)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &chunk_size, sizeof(chunk_size)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &chunk_count, sizeof(chunk_count)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &target_in_flight, sizeof(target_in_flight)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &nr_in_flight, sizeof(nr_in_flight)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} ++ + void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, +@@ -2161,39 +2226,19 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); +- pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, +- u32 cqs_obj_compare_value) ++ u32 cqs_obj_compare_value, ++ u32 cqs_obj_inherit_error) + { +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + + sizeof(cqs_obj_compare_value) ++ + sizeof(cqs_obj_inherit_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2209,60 +2254,18 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); +- pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr) + { +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) +@@ -2283,13 +2286,15 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u64 map_import_buf_gpu_addr) + { +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; +@@ -2301,41 +2306,21 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, +- u64 debugcopy_dst_size) ++ u64 map_import_buf_gpu_addr) + { +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) +- + sizeof(debugcopy_dst_size) ++ + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; +@@ -2348,39 +2333,17 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, +- pos, &debugcopy_dst_size, sizeof(debugcopy_dst_size)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); +- pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) + { +- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) +@@ -2401,15 +2364,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( + struct kbase_tlstream *stream, +- const void *kcpu_queue, +- u64 map_import_buf_gpu_addr) ++ const void *kcpu_queue) + { +- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) +- + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; +@@ -2421,21 +2382,21 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); +- pos = kbasep_serialize_bytes(buffer, +- pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( + struct kbase_tlstream *stream, + const void *kcpu_queue, +- u64 map_import_buf_gpu_addr) ++ const void *group_suspend_buf, ++ u32 gpu_cmdq_grp_handle) + { +- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) +- + sizeof(map_import_buf_gpu_addr) ++ + sizeof(group_suspend_buf) ++ + sizeof(gpu_cmdq_grp_handle) + ; + char *buffer; + unsigned long acq_flags; +@@ -2448,7 +2409,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, +- pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); ++ pos, &group_suspend_buf, sizeof(group_suspend_buf)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -2649,11 +2612,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2665,6 +2630,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -2693,11 +2660,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2709,6 +2678,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -2737,11 +2708,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2753,17 +2726,21 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2775,50 +2752,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); +- pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); +- +- kbase_tlstream_msgbuf_release(stream, acq_flags); +-} +- +-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( +- struct kbase_tlstream *stream, +- const void *kcpu_queue) +-{ +- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END; +- const size_t msg_size = sizeof(msg_id) + sizeof(u64) +- + sizeof(kcpu_queue) +- ; +- char *buffer; +- unsigned long acq_flags; +- size_t pos = 0; +- +- buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); +- +- pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); +- pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, +- pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -2847,11 +2782,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2863,6 +2800,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -2891,11 +2830,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2907,6 +2848,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -2935,11 +2878,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue) ++ const void *kcpu_queue, ++ u32 execute_error) + { + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -2951,6 +2896,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -3002,12 +2949,14 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, ++ u32 execute_error, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags) + { + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + + sizeof(jit_alloc_gpu_alloc_addr) + + sizeof(jit_alloc_mmu_flags) + ; +@@ -3021,6 +2970,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr)); + pos = kbasep_serialize_bytes(buffer, +@@ -3098,11 +3049,13 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, ++ u32 execute_error, + u64 jit_free_pages_used) + { + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + + sizeof(jit_free_pages_used) + ; + char *buffer; +@@ -3115,6 +3068,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_free_pages_used, sizeof(jit_free_pages_used)); + +@@ -3143,13 +3098,59 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + +-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} ++ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) + { +- const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER; ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(kcpu_queue) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &kcpu_queue, sizeof(kcpu_queue)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} ++ ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) ++ + sizeof(execute_error) + ; + char *buffer; + unsigned long acq_flags; +@@ -3161,6 +3162,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &execute_error, sizeof(execute_error)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); + } +@@ -3191,4 +3194,26 @@ void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + kbase_tlstream_msgbuf_release(stream, acq_flags); + } + ++void __kbase_tlstream_tl_kbase_csffw_reset( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle) ++{ ++ const u32 msg_id = KBASE_TL_KBASE_CSFFW_RESET; ++ const size_t msg_size = sizeof(msg_id) + sizeof(u64) ++ + sizeof(csffw_cycle) ++ ; ++ char *buffer; ++ unsigned long acq_flags; ++ size_t pos = 0; ++ ++ buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); ++ ++ pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); ++ pos = kbasep_serialize_timestamp(buffer, pos); ++ pos = kbasep_serialize_bytes(buffer, ++ pos, &csffw_cycle, sizeof(csffw_cycle)); ++ ++ kbase_tlstream_msgbuf_release(stream, acq_flags); ++} ++ + /* clang-format on */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h +index 7ea8ba8..887a1aa 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h ++++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + /* +@@ -226,16 +225,19 @@ void __kbase_tlstream_tl_event_atom_softjob_start( + void __kbase_tlstream_tl_event_atom_softjob_end( + struct kbase_tlstream *stream, + const void *atom); +-void __kbase_tlstream_tl_event_arb_granted( ++void __kbase_tlstream_tl_arbiter_granted( ++ struct kbase_tlstream *stream, ++ const void *gpu); ++void __kbase_tlstream_tl_arbiter_started( + struct kbase_tlstream *stream, + const void *gpu); +-void __kbase_tlstream_tl_event_arb_started( ++void __kbase_tlstream_tl_arbiter_stop_requested( + struct kbase_tlstream *stream, + const void *gpu); +-void __kbase_tlstream_tl_event_arb_stop_requested( ++void __kbase_tlstream_tl_arbiter_stopped( + struct kbase_tlstream *stream, + const void *gpu); +-void __kbase_tlstream_tl_event_arb_stopped( ++void __kbase_tlstream_tl_arbiter_requested( + struct kbase_tlstream *stream, + const void *gpu); + void __kbase_tlstream_jd_gpu_soft_reset( +@@ -277,6 +279,17 @@ void __kbase_tlstream_aux_jit_stats( + u32 allocs, + u32 va_pages, + u32 ph_pages); ++void __kbase_tlstream_aux_tiler_heap_stats( ++ struct kbase_tlstream *stream, ++ u32 ctx_nr, ++ u64 heap_id, ++ u32 va_pages, ++ u32 ph_pages, ++ u32 max_chunks, ++ u32 chunk_size, ++ u32 chunk_count, ++ u32 target_in_flight, ++ u32 nr_in_flight); + void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, +@@ -328,37 +341,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +-void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, +- u32 cqs_obj_compare_value); +-void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( ++ u32 cqs_obj_compare_value, ++ u32 cqs_obj_inherit_error); ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr); +-void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( +- struct kbase_tlstream *stream, +- const void *kcpu_queue, +- u64 debugcopy_dst_size); +-void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, +@@ -371,6 +363,14 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue); ++void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ const void *group_suspend_buf, ++ u32 gpu_cmdq_grp_handle); + void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +@@ -404,46 +404,47 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( +- struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, +- const void *kcpu_queue); ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +@@ -453,6 +454,7 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, ++ u32 execute_error, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags); + void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( +@@ -467,17 +469,28 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, ++ u32 execute_error, + u64 jit_free_pages_used); + void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); ++void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( ++ struct kbase_tlstream *stream, ++ const void *kcpu_queue, ++ u32 execute_error); + void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle); ++void __kbase_tlstream_tl_kbase_csffw_reset( ++ struct kbase_tlstream *stream, ++ u64 csffw_cycle); + + struct kbase_tlstream; + +@@ -1215,77 +1228,96 @@ struct kbase_tlstream; + } while (0) + + /** +- * KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED - ++ * KBASE_TLSTREAM_TL_ARBITER_GRANTED - + * Arbiter has granted gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +-#define KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED( \ ++#define KBASE_TLSTREAM_TL_ARBITER_GRANTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ +- __kbase_tlstream_tl_event_arb_granted( \ ++ __kbase_tlstream_tl_arbiter_granted( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + + /** +- * KBASE_TLSTREAM_TL_EVENT_ARB_STARTED - ++ * KBASE_TLSTREAM_TL_ARBITER_STARTED - + * Driver is running again and able to process jobs + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +-#define KBASE_TLSTREAM_TL_EVENT_ARB_STARTED( \ ++#define KBASE_TLSTREAM_TL_ARBITER_STARTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ +- __kbase_tlstream_tl_event_arb_started( \ ++ __kbase_tlstream_tl_arbiter_started( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + + /** +- * KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED - ++ * KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED - + * Arbiter has requested driver to stop using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +-#define KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED( \ ++#define KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ +- __kbase_tlstream_tl_event_arb_stop_requested( \ ++ __kbase_tlstream_tl_arbiter_stop_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + + /** +- * KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED - ++ * KBASE_TLSTREAM_TL_ARBITER_STOPPED - + * Driver has stopped using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +-#define KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED( \ ++#define KBASE_TLSTREAM_TL_ARBITER_STOPPED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ +- __kbase_tlstream_tl_event_arb_stopped( \ ++ __kbase_tlstream_tl_arbiter_stopped( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ gpu); \ ++ } while (0) ++ ++/** ++ * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - ++ * Driver has requested the arbiter for gpu access ++ * ++ * @kbdev: Kbase device ++ * @gpu: Name of the GPU object ++ */ ++#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \ ++ kbdev, \ ++ gpu \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_tl_arbiter_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) +@@ -1499,6 +1531,42 @@ struct kbase_tlstream; + ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages); \ + } while (0) + ++/** ++ * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS - ++ * Tiler Heap statistics ++ * ++ * @kbdev: Kbase device ++ * @ctx_nr: Kernel context number ++ * @heap_id: Unique id used to represent a heap under a context ++ * @va_pages: Number of virtual pages allocated in this bin ++ * @ph_pages: Number of physical pages allocated in this bin ++ * @max_chunks: The maximum number of chunks that the heap should be allowed to use ++ * @chunk_size: Size of each chunk in tiler heap, in bytes ++ * @chunk_count: The number of chunks currently allocated in the tiler heap ++ * @target_in_flight: Number of render-passes that the driver should attempt ++ * to keep in flight for which allocation of new chunks is allowed ++ * @nr_in_flight: Number of render-passes that are in flight ++ */ ++#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( \ ++ kbdev, \ ++ ctx_nr, \ ++ heap_id, \ ++ va_pages, \ ++ ph_pages, \ ++ max_chunks, \ ++ chunk_size, \ ++ chunk_count, \ ++ target_in_flight, \ ++ nr_in_flight \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & TLSTREAM_ENABLED) \ ++ __kbase_tlstream_aux_tiler_heap_stats( \ ++ __TL_DISPATCH_STREAM(kbdev, aux), \ ++ ctx_nr, heap_id, va_pages, ph_pages, max_chunks, chunk_size, chunk_count, target_in_flight, nr_in_flight); \ ++ } while (0) ++ + /** + * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - + * event on a given job slot +@@ -1534,6 +1602,22 @@ struct kbase_tlstream; + * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports + * @kbase_device_as_count: The number of address spaces the physical hardware has available + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_gpu_core_count, \ ++ kbase_device_max_num_csgs, \ ++ kbase_device_as_count \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_new_device( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ + kbdev, \ + kbase_device_id, \ +@@ -1542,6 +1626,7 @@ struct kbase_tlstream; + kbase_device_as_count \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - +@@ -1552,6 +1637,21 @@ struct kbase_tlstream; + * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ gpu_cmdq_grp_handle, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_program_csg( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ +@@ -1559,6 +1659,7 @@ struct kbase_tlstream; + kbase_device_csg_slot_index \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - +@@ -1568,12 +1669,27 @@ struct kbase_tlstream; + * @kbase_device_id: The id of the physical hardware + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ ++ kbdev, \ ++ kbase_device_id, \ ++ kbase_device_csg_slot_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_device_deprogram_csg( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kbase_device_id, kbase_device_csg_slot_index); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - +@@ -1583,12 +1699,27 @@ struct kbase_tlstream; + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_id: The id of the physical hardware + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ kbase_device_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_new_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id, kbase_device_id); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_id \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - +@@ -1597,11 +1728,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ ++ kbdev, \ ++ kernel_ctx_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_del_ctx( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - +@@ -1611,12 +1756,27 @@ struct kbase_tlstream; + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_as_index: The index of the device address space being assigned + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ ++ kbdev, \ ++ kernel_ctx_id, \ ++ kbase_device_as_index \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_ctx_assign_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id, kbase_device_as_index); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_as_index \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - +@@ -1625,11 +1785,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ ++ kbdev, \ ++ kernel_ctx_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_ctx_unassign_as( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kernel_ctx_id); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - +@@ -1641,6 +1815,21 @@ struct kbase_tlstream; + * @kcpuq_num_pending_cmds: Number of commands already enqueued + * in the KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ kernel_ctx_id, \ ++ kcpuq_num_pending_cmds \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_new_kcpuqueue( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, kernel_ctx_id, kcpuq_num_pending_cmds); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ + kbdev, \ + kcpu_queue, \ +@@ -1648,6 +1837,7 @@ struct kbase_tlstream; + kcpuq_num_pending_cmds \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - +@@ -1656,11 +1846,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_del_kcpuqueue( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - +@@ -1670,12 +1874,27 @@ struct kbase_tlstream; + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ ++ kbdev, \ ++ kcpu_queue, \ ++ fence \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, fence); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - +@@ -1685,183 +1904,244 @@ struct kbase_tlstream; + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT - +- * Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, fence); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ fence \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT - +- * Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT - ++ * KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue +- * @cqs_obj_gpu_addr: CQS Object GPU ptr ++ * @cqs_obj_gpu_addr: CQS Object GPU pointer + * @cqs_obj_compare_value: Semaphore value that should be exceeded + * for the WAIT to pass ++ * @cqs_obj_inherit_error: Indicates the error state should be inherited into the queue or not + */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr, \ ++ cqs_obj_compare_value, \ ++ cqs_obj_inherit_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value, cqs_obj_inherit_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ +- cqs_obj_compare_value \ ++ cqs_obj_compare_value, \ ++ cqs_obj_inherit_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT - +- * End array of KCPU Queue enqueues Wait on Cross Queue Sync Object ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET - ++ * KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @cqs_obj_gpu_addr: CQS Object GPU pointer + */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, cqs_obj_gpu_addr); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ ++ kbdev, \ ++ kcpu_queue, \ ++ cqs_obj_gpu_addr \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET - +- * Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - ++ * KCPU Queue enqueues Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @map_import_buf_gpu_addr: Map import buffer GPU pointer + */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, map_import_buf_gpu_addr); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET - +- * Array item of KCPU Queue enqueues Set on Cross Queue Sync Object ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - ++ * KCPU Queue enqueues Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue +- * @cqs_obj_gpu_addr: CQS Object GPU ptr ++ * @map_import_buf_gpu_addr: Map import buffer GPU pointer + */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ +- cqs_obj_gpu_addr \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, map_import_buf_gpu_addr); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET - +- * End array of KCPU Queue enqueues Set on Cross Queue Sync Object ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - ++ * KCPU Queue enqueues Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @map_import_buf_gpu_addr: Map import buffer GPU pointer + */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, map_import_buf_gpu_addr); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ map_import_buf_gpu_addr \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY - +- * Begin array of KCPU Queue enqueues Debug Copy ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - ++ * KCPU Queue enqueues Error Barrier + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY - +- * Array item of KCPU Queue enqueues Debug Copy ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - ++ * KCPU Queue enqueues Group Suspend + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue +- * @debugcopy_dst_size: Debug Copy destination size +- */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ +- kbdev, \ +- kcpu_queue, \ +- debugcopy_dst_size \ +- ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY - +- * End array of KCPU Queue enqueues Debug Copy +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- */ +-#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ +- kbdev, \ +- kcpu_queue \ +- ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - +- * KCPU Queue enqueues Map Import +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- * @map_import_buf_gpu_addr: Map import buffer GPU ptr +- */ +-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ +- kbdev, \ +- kcpu_queue, \ +- map_import_buf_gpu_addr \ +- ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - +- * KCPU Queue enqueues Unmap Import +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- * @map_import_buf_gpu_addr: Map import buffer GPU ptr ++ * @group_suspend_buf: Pointer to the suspend buffer structure ++ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace + */ +-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ + kbdev, \ + kcpu_queue, \ +- map_import_buf_gpu_addr \ ++ group_suspend_buf, \ ++ gpu_cmdq_grp_handle \ + ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - +- * KCPU Queue enqueues Unmap Import ignoring reference count +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- * @map_import_buf_gpu_addr: Map import buffer GPU ptr +- */ +-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, group_suspend_buf, gpu_cmdq_grp_handle); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ + kbdev, \ + kcpu_queue, \ +- map_import_buf_gpu_addr \ ++ group_suspend_buf, \ ++ gpu_cmdq_grp_handle \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - +@@ -1870,11 +2150,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - +@@ -1902,6 +2196,28 @@ struct kbase_tlstream; + * reused. The kernel should attempt to use a previous allocation with the same + * usage_id + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue, \ ++ jit_alloc_gpu_alloc_addr_dest, \ ++ jit_alloc_va_pages, \ ++ jit_alloc_commit_pages, \ ++ jit_alloc_extent, \ ++ jit_alloc_jit_id, \ ++ jit_alloc_bin_id, \ ++ jit_alloc_max_allocations, \ ++ jit_alloc_flags, \ ++ jit_alloc_usage_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, jit_alloc_gpu_alloc_addr_dest, jit_alloc_va_pages, jit_alloc_commit_pages, jit_alloc_extent, jit_alloc_jit_id, jit_alloc_bin_id, jit_alloc_max_allocations, jit_alloc_flags, jit_alloc_usage_id); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue, \ +@@ -1916,6 +2232,7 @@ struct kbase_tlstream; + jit_alloc_usage_id \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - +@@ -1924,11 +2241,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - +@@ -1937,11 +2268,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - +@@ -1952,12 +2297,27 @@ struct kbase_tlstream; + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue, \ ++ jit_alloc_jit_id \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, jit_alloc_jit_id); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_jit_id \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - +@@ -1966,11 +2326,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - +@@ -1979,11 +2353,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - +@@ -1991,12 +2379,29 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - +@@ -2005,11 +2410,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - +@@ -2017,12 +2436,29 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - +@@ -2031,11 +2467,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - +@@ -2043,12 +2493,29 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - +@@ -2056,51 +2523,56 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ + ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START - +- * KCPU Queue starts an array of Debug Copys +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- */ +-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START( \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END - +- * KCPU Queue ends an array of Debug Copys ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - ++ * KCPU Queue starts a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ +- do { } while (0) +- +-/** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - +- * KCPU Queue starts a Map Import +- * +- * @kbdev: Kbase device +- * @kcpu_queue: KCPU queue +- */ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - +@@ -2108,12 +2580,29 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - +@@ -2122,11 +2611,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - +@@ -2134,12 +2637,29 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - +@@ -2148,11 +2668,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - +@@ -2160,12 +2694,29 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ + kbdev, \ +- kcpu_queue \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - +@@ -2174,11 +2725,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - +@@ -2187,11 +2752,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - +@@ -2199,16 +2778,35 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address + * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue, \ ++ execute_error, \ ++ jit_alloc_gpu_alloc_addr, \ ++ jit_alloc_mmu_flags \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error, \ + jit_alloc_gpu_alloc_addr, \ + jit_alloc_mmu_flags \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - +@@ -2217,11 +2815,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - +@@ -2230,11 +2842,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - +@@ -2243,11 +2869,25 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - +@@ -2255,15 +2895,33 @@ struct kbase_tlstream; + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + * @jit_free_pages_used: The actual number of pages used by the JIT + * allocation + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error, \ ++ jit_free_pages_used \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error, jit_free_pages_used); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue, \ ++ execute_error, \ + jit_free_pages_used \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - +@@ -2272,24 +2930,109 @@ struct kbase_tlstream; + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else + #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** +- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER - ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER - + * KCPU Queue executes an Error Barrier + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER( \ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ ++ ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START - ++ * KCPU Queue starts a group suspend ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ ++ kbdev, \ ++ kcpu_queue \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ ++ ++/** ++ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END - ++ * KCPU Queue ends a group suspend ++ * ++ * @kbdev: Kbase device ++ * @kcpu_queue: KCPU queue ++ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ kcpu_queue, execute_error); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ ++ kbdev, \ ++ kcpu_queue, \ ++ execute_error \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ + + /** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - +@@ -2299,12 +3042,54 @@ struct kbase_tlstream; + * @csffw_timestamp: Timestamp of a CSFFW event + * @csffw_cycle: Cycle number of a CSFFW event + */ ++#if MALI_USE_CSF + #define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_timestamp, csffw_cycle); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ ++ kbdev, \ ++ csffw_timestamp, \ ++ csffw_cycle \ ++ ) \ ++ do { } while (0) ++#endif /* MALI_USE_CSF */ ++ ++/** ++ * KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET - ++ * A reset has happened with the CSFFW ++ * ++ * @kbdev: Kbase device ++ * @csffw_cycle: Cycle number of a CSFFW event ++ */ ++#if MALI_USE_CSF ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ ++ do { \ ++ int enabled = atomic_read(&kbdev->timeline_flags); \ ++ if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ ++ __kbase_tlstream_tl_kbase_csffw_reset( \ ++ __TL_DISPATCH_STREAM(kbdev, obj), \ ++ csffw_cycle); \ ++ } while (0) ++#else ++#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \ ++ kbdev, \ ++ csffw_cycle \ ++ ) \ + do { } while (0) ++#endif /* MALI_USE_CSF */ + + + /* Gator tracepoints are hooked into TLSTREAM interface. +diff --git a/dvalin/kernel/include/linux/dma-buf-test-exporter.h b/dvalin/kernel/include/linux/dma-buf-test-exporter.h +index 95bc6f8..5a310f6 100644 +--- a/dvalin/kernel/include/linux/dma-buf-test-exporter.h ++++ b/dvalin/kernel/include/linux/dma-buf-test-exporter.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2012-2013, 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,8 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- + #ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_ + #define _LINUX_DMA_BUF_TEST_EXPORTER_H_ + +diff --git a/dvalin/kernel/include/linux/memory_group_manager.h b/dvalin/kernel/include/linux/memory_group_manager.h +index b1ac253..c0e808b 100644 +--- a/dvalin/kernel/include/linux/memory_group_manager.h ++++ b/dvalin/kernel/include/linux/memory_group_manager.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _MEMORY_GROUP_MANAGER_H_ +diff --git a/dvalin/kernel/include/linux/priority_control_manager.h b/dvalin/kernel/include/linux/priority_control_manager.h +new file mode 100644 +index 0000000..002e78b +--- /dev/null ++++ b/dvalin/kernel/include/linux/priority_control_manager.h +@@ -0,0 +1,77 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _PRIORITY_CONTROL_MANAGER_H_ ++#define _PRIORITY_CONTROL_MANAGER_H_ ++ ++#include ++#include ++#include ++ ++struct priority_control_manager_device; ++ ++/** ++ * struct priority_control_manager_ops - Callbacks for priority control manager operations ++ * ++ * @pcm_scheduler_priority_check: Callback to check if scheduling priority level can be requested ++ */ ++struct priority_control_manager_ops { ++ /** ++ * pcm_scheduler_priority_check: This function can be used to check what priority its work ++ * would be treated as based on the requested_priority value. ++ * ++ * @pcm_dev: The priority control manager through which the request is ++ * being made. ++ * @task: The task struct of the process requesting the priority check. ++ * @requested_priority: The priority level being requested. ++ * ++ * The returned value will be: ++ * The same as requested_priority if the process has permission to use requested_priority ++ * A lower priority value if the process does not have permission to use requested_priority ++ * ++ * requested_priority has the following value range: ++ * 0-3 : Priority level, 0 being highest and 3 being lowest ++ * ++ * Return: The priority that would actually be given, could be lower than requested_priority ++ */ ++ int (*pcm_scheduler_priority_check)( ++ struct priority_control_manager_device *pcm_dev, ++ struct task_struct *task, int requested_priority); ++}; ++ ++/** ++ * struct priority_control_manager_device - Device structure for priority ++ * control manager ++ * ++ * @ops: Callbacks associated with this device ++ * @data: Pointer to device private data ++ * @owner: Pointer to the module owner ++ * ++ * This structure should be registered with the platform device using ++ * platform_set_drvdata(). ++ */ ++struct priority_control_manager_device { ++ struct priority_control_manager_ops ops; ++ void *data; ++ struct module *owner; ++}; ++ ++#endif /* _PRIORITY_CONTROL_MANAGER_H_ */ +diff --git a/dvalin/kernel/include/linux/protected_memory_allocator.h b/dvalin/kernel/include/linux/protected_memory_allocator.h +index 3b9205b..1e05bb8 100644 +--- a/dvalin/kernel/include/linux/protected_memory_allocator.h ++++ b/dvalin/kernel/include/linux/protected_memory_allocator.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _PROTECTED_MEMORY_ALLOCATOR_H_ +diff --git a/dvalin/kernel/include/linux/protected_mode_switcher.h b/dvalin/kernel/include/linux/protected_mode_switcher.h +index 8778d81..1a3c5f7 100644 +--- a/dvalin/kernel/include/linux/protected_mode_switcher.h ++++ b/dvalin/kernel/include/linux/protected_mode_switcher.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,8 +17,6 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + + #ifndef _PROTECTED_MODE_SWITCH_H_ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +new file mode 100644 +index 0000000..72572e5 +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +@@ -0,0 +1,765 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _UAPI_BASE_CSF_KERNEL_H_ ++#define _UAPI_BASE_CSF_KERNEL_H_ ++ ++#include ++ ++/* Memory allocation, access/hint flags. ++ * ++ * See base_mem_alloc_flags. ++ */ ++ ++/* IN */ ++/* Read access CPU side ++ */ ++#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) ++ ++/* Write access CPU side ++ */ ++#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) ++ ++/* Read access GPU side ++ */ ++#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) ++ ++/* Write access GPU side ++ */ ++#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) ++ ++/* Execute allowed on the GPU side ++ */ ++#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) ++ ++/* Will be permanently mapped in kernel space. ++ * Flag is only allowed on allocations originating from kbase. ++ */ ++#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) ++ ++/* The allocation will completely reside within the same 4GB chunk in the GPU ++ * virtual space. ++ * Since this flag is primarily required only for the TLS memory which will ++ * not be used to contain executable code and also not used for Tiler heap, ++ * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. ++ */ ++#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) ++ ++/* Userspace is not allowed to free this memory. ++ * Flag is only allowed on allocations originating from kbase. ++ */ ++#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) ++ ++#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) ++ ++/* Grow backing store on GPU Page Fault ++ */ ++#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) ++ ++/* Page coherence Outer shareable, if available ++ */ ++#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) ++ ++/* Page coherence Inner shareable ++ */ ++#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) ++ ++/* IN/OUT */ ++/* Should be cached on the CPU, returned if actually cached ++ */ ++#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) ++ ++/* IN/OUT */ ++/* Must have same VA on both the GPU and the CPU ++ */ ++#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) ++ ++/* OUT */ ++/* Must call mmap to acquire a GPU address for the alloc ++ */ ++#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) ++ ++/* IN */ ++/* Page coherence Outer shareable, required. ++ */ ++#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) ++ ++/* Protected memory ++ */ ++#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) ++ ++/* Not needed physical memory ++ */ ++#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) ++ ++/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the ++ * addresses to be the same ++ */ ++#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) ++ ++/* CSF event memory ++ * ++ * If Outer shareable coherence is not specified or not available, then on ++ * allocation kbase will automatically use the uncached GPU mapping. ++ * There is no need for the client to specify BASE_MEM_UNCACHED_GPU ++ * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag. ++ * ++ * This memory requires a permanent mapping ++ * ++ * See also kbase_reg_needs_kernel_mapping() ++ */ ++#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) ++ ++#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) ++ ++/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu ++ * mode. Some components within the GPU might only be able to access memory ++ * that is GPU cacheable. Refer to the specific GPU implementation for more ++ * details. The 3 shareability flags will be ignored for GPU uncached memory. ++ * If used while importing USER_BUFFER type memory, then the import will fail ++ * if the memory is not aligned to GPU and CPU cache line width. ++ */ ++#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) ++ ++/* ++ * Bits [22:25] for group_id (0~15). ++ * ++ * base_mem_group_id_set() should be used to pack a memory group ID into a ++ * base_mem_alloc_flags value instead of accessing the bits directly. ++ * base_mem_group_id_get() should be used to extract the memory group ID from ++ * a base_mem_alloc_flags value. ++ */ ++#define BASEP_MEM_GROUP_ID_SHIFT 22 ++#define BASE_MEM_GROUP_ID_MASK \ ++ ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) ++ ++/* Must do CPU cache maintenance when imported memory is mapped/unmapped ++ * on GPU. Currently applicable to dma-buf type only. ++ */ ++#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) ++ ++/* OUT */ ++/* Kernel side cache sync ops required */ ++#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) ++ ++/* Number of bits used as flags for base memory management ++ * ++ * Must be kept in sync with the base_mem_alloc_flags flags ++ */ ++#define BASE_MEM_FLAGS_NR_BITS 29 ++ ++/* A mask of all the flags which are only valid for allocations within kbase, ++ * and may not be passed from user space. ++ */ ++#define BASEP_MEM_FLAGS_KERNEL_ONLY \ ++ (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) ++ ++/* A mask for all output bits, excluding IN/OUT bits. ++ */ ++#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP ++ ++/* A mask for all input bits, including IN/OUT bits. ++ */ ++#define BASE_MEM_FLAGS_INPUT_MASK \ ++ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) ++ ++/* A mask of all currently reserved flags ++ */ ++#define BASE_MEM_FLAGS_RESERVED \ ++ BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20 ++ ++#define BASEP_MEM_INVALID_HANDLE (0ull << 12) ++#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) ++#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) ++#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) ++#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) ++/* reserved handles ..-47< for future special handles */ ++#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12) ++#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12) ++#define BASE_MEM_COOKIE_BASE (64ul << 12) ++#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ ++ BASE_MEM_COOKIE_BASE) ++ ++#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ ++ ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ ++ LOCAL_PAGE_SHIFT) ++ ++/** ++ * Valid set of just-in-time memory allocation flags ++ */ ++#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) ++ ++/* Flags to pass to ::base_context_init. ++ * Flags can be ORed together to enable multiple things. ++ * ++ * These share the same space as BASEP_CONTEXT_FLAG_*, and so must ++ * not collide with them. ++ */ ++typedef __u32 base_context_create_flags; ++ ++/* No flags set */ ++#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) ++ ++/* Base context is embedded in a cctx object (flag used for CINSTR ++ * software counter macros) ++ */ ++#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) ++ ++/* Base context is a 'System Monitor' context for Hardware counters. ++ * ++ * One important side effect of this is that job submission is disabled. ++ */ ++#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ ++ ((base_context_create_flags)1 << 1) ++ ++/* Base context creates a CSF event notification thread. ++ * ++ * The creation of a CSF event notification thread is conditional but ++ * mandatory for the handling of CSF events. ++ */ ++#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) ++ ++/* Bit-shift used to encode a memory group ID in base_context_create_flags ++ */ ++#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) ++ ++/* Bitmask used to encode a memory group ID in base_context_create_flags ++ */ ++#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ ++ ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) ++ ++/* Bitpattern describing the base_context_create_flags that can be ++ * passed to the kernel ++ */ ++#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ ++ (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ ++ BASEP_CONTEXT_MMU_GROUP_ID_MASK) ++ ++/* Bitpattern describing the ::base_context_create_flags that can be ++ * passed to base_context_init() ++ */ ++#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ ++ (BASE_CONTEXT_CCTX_EMBEDDED | \ ++ BASE_CONTEXT_CSF_EVENT_THREAD | \ ++ BASEP_CONTEXT_CREATE_KERNEL_FLAGS) ++ ++/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, ++ * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) ++ */ ++#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) ++ ++/* Indicate that job dumping is enabled. This could affect certain timers ++ * to account for the performance impact. ++ */ ++#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) ++ ++/* Enable KBase tracepoints for CSF builds */ ++#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2) ++ ++/* Enable additional CSF Firmware side tracepoints */ ++#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3) ++ ++#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ ++ BASE_TLSTREAM_JOB_DUMPING_ENABLED | \ ++ BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \ ++ BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) ++ ++/* Number of pages mapped into the process address space for a bound GPU ++ * command queue. A pair of input/output pages and a Hw doorbell page ++ * are mapped to enable direct submission of commands to Hw. ++ */ ++#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3) ++ ++#define BASE_QUEUE_MAX_PRIORITY (15U) ++ ++/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */ ++#define BASEP_EVENT_VAL_INDEX (0U) ++#define BASEP_EVENT_ERR_INDEX (1U) ++ ++/* The upper limit for number of objects that could be waited/set per command. ++ * This limit is now enforced as internally the error inherit inputs are ++ * converted to 32-bit flags in a __u32 variable occupying a previously padding ++ * field. ++ */ ++#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) ++ ++#if MALI_UNIT_TEST ++/** ++ * enum base_kcpu_command_type - Kernel CPU queue command type. ++ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, ++ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, ++ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, ++ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, ++ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, ++ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, ++ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, ++ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, ++ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, ++ * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: sample_time, ++ */ ++enum base_kcpu_command_type { ++ BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, ++ BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, ++ BASE_KCPU_COMMAND_TYPE_CQS_WAIT, ++ BASE_KCPU_COMMAND_TYPE_CQS_SET, ++ BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, ++ BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, ++ BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, ++ BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, ++ BASE_KCPU_COMMAND_TYPE_JIT_FREE, ++ BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, ++ BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, ++ BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME, ++}; ++#else ++/** ++ * enum base_kcpu_command_type - Kernel CPU queue command type. ++ * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, ++ * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, ++ * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, ++ * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, ++ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, ++ * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, ++ * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, ++ * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, ++ * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, ++ * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, ++ */ ++enum base_kcpu_command_type { ++ BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, ++ BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, ++ BASE_KCPU_COMMAND_TYPE_CQS_WAIT, ++ BASE_KCPU_COMMAND_TYPE_CQS_SET, ++ BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, ++ BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, ++ BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, ++ BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, ++ BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, ++ BASE_KCPU_COMMAND_TYPE_JIT_FREE, ++ BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, ++ BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, ++}; ++#endif /* MALI_UNIT_TEST */ ++ ++/** ++ * enum base_queue_group_priority - Priority of a GPU Command Queue Group. ++ * @BASE_QUEUE_GROUP_PRIORITY_HIGH: GPU Command Queue Group is of high ++ * priority. ++ * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM: GPU Command Queue Group is of medium ++ * priority. ++ * @BASE_QUEUE_GROUP_PRIORITY_LOW: GPU Command Queue Group is of low ++ * priority. ++ * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time ++ * priority. ++ * @BASE_QUEUE_GROUP_PRIORITY_COUNT: Number of GPU Command Queue Group ++ * priority levels. ++ * ++ * Currently this is in order of highest to lowest, but if new levels are added ++ * then those new levels may be out of order to preserve the ABI compatibility ++ * with previous releases. At that point, ensure assignment to ++ * the 'priority' member in &kbase_queue_group is updated to ensure it remains ++ * a linear ordering. ++ * ++ * There should be no gaps in the enum, otherwise use of ++ * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated. ++ */ ++enum base_queue_group_priority { ++ BASE_QUEUE_GROUP_PRIORITY_HIGH = 0, ++ BASE_QUEUE_GROUP_PRIORITY_MEDIUM, ++ BASE_QUEUE_GROUP_PRIORITY_LOW, ++ BASE_QUEUE_GROUP_PRIORITY_REALTIME, ++ BASE_QUEUE_GROUP_PRIORITY_COUNT ++}; ++ ++struct base_kcpu_command_fence_info { ++ __u64 fence; ++}; ++ ++struct base_cqs_wait_info { ++ __u64 addr; ++ __u32 val; ++ __u32 padding; ++}; ++ ++struct base_kcpu_command_cqs_wait_info { ++ __u64 objs; ++ __u32 nr_objs; ++ __u32 inherit_err_flags; ++}; ++ ++struct base_cqs_set { ++ __u64 addr; ++}; ++ ++struct base_kcpu_command_cqs_set_info { ++ __u64 objs; ++ __u32 nr_objs; ++ __u32 padding; ++}; ++ ++/** ++ * basep_cqs_data_type - Enumeration of CQS Data Types ++ * ++ * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value ++ * is an unsigned 32-bit integer ++ * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value ++ * is an unsigned 64-bit integer ++ */ ++typedef enum PACKED { ++ BASEP_CQS_DATA_TYPE_U32 = 0, ++ BASEP_CQS_DATA_TYPE_U64 = 1, ++} basep_cqs_data_type; ++ ++/** ++ * basep_cqs_wait_operation_op - Enumeration of CQS Object Wait ++ * Operation conditions ++ * ++ * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a ++ * wait will be satisfied when a CQS Object's ++ * value is Less than or Equal to ++ * the Wait Operation value ++ * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a ++ * wait will be satisfied when a CQS Object's ++ * value is Greater than the Wait Operation value ++ */ ++typedef enum { ++ BASEP_CQS_WAIT_OPERATION_LE = 0, ++ BASEP_CQS_WAIT_OPERATION_GT = 1, ++} basep_cqs_wait_operation_op; ++ ++struct base_cqs_wait_operation_info { ++ __u64 addr; ++ __u64 val; ++ __u8 operation; ++ __u8 data_type; ++ __u8 padding[6]; ++}; ++ ++/** ++ * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information ++ * about the Timeline CQS wait objects ++ * ++ * @objs: An array of Timeline CQS waits. ++ * @nr_objs: Number of Timeline CQS waits in the array. ++ * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field ++ * to be served as the source for importing into the ++ * queue's error-state. ++ */ ++struct base_kcpu_command_cqs_wait_operation_info { ++ __u64 objs; ++ __u32 nr_objs; ++ __u32 inherit_err_flags; ++}; ++ ++/** ++ * basep_cqs_set_operation_op - Enumeration of CQS Set Operations ++ * ++ * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value ++ * to a synchronization object ++ * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value ++ * of a synchronization object ++ */ ++typedef enum { ++ BASEP_CQS_SET_OPERATION_ADD = 0, ++ BASEP_CQS_SET_OPERATION_SET = 1, ++} basep_cqs_set_operation_op; ++ ++struct base_cqs_set_operation_info { ++ __u64 addr; ++ __u64 val; ++ __u8 operation; ++ __u8 data_type; ++ __u8 padding[6]; ++}; ++ ++/** ++ * struct base_kcpu_command_cqs_set_operation_info - structure which contains information ++ * about the Timeline CQS set objects ++ * ++ * @objs: An array of Timeline CQS sets. ++ * @nr_objs: Number of Timeline CQS sets in the array. ++ * @padding: Structure padding, unused bytes. ++ */ ++struct base_kcpu_command_cqs_set_operation_info { ++ __u64 objs; ++ __u32 nr_objs; ++ __u32 padding; ++}; ++ ++/** ++ * struct base_kcpu_command_import_info - structure which contains information ++ * about the imported buffer. ++ * ++ * @handle: Address of imported user buffer. ++ */ ++struct base_kcpu_command_import_info { ++ __u64 handle; ++}; ++ ++/** ++ * struct base_kcpu_command_jit_alloc_info - structure which contains ++ * information about jit memory allocation. ++ * ++ * @info: An array of elements of the ++ * struct base_jit_alloc_info type. ++ * @count: The number of elements in the info array. ++ * @padding: Padding to a multiple of 64 bits. ++ */ ++struct base_kcpu_command_jit_alloc_info { ++ __u64 info; ++ __u8 count; ++ __u8 padding[7]; ++}; ++ ++/** ++ * struct base_kcpu_command_jit_free_info - structure which contains ++ * information about jit memory which is to be freed. ++ * ++ * @ids: An array containing the JIT IDs to free. ++ * @count: The number of elements in the ids array. ++ * @padding: Padding to a multiple of 64 bits. ++ */ ++struct base_kcpu_command_jit_free_info { ++ __u64 ids; ++ __u8 count; ++ __u8 padding[7]; ++}; ++ ++/** ++ * struct base_kcpu_command_group_suspend_info - structure which contains ++ * suspend buffer data captured for a suspended queue group. ++ * ++ * @buffer: Pointer to an array of elements of the type char. ++ * @size: Number of elements in the @buffer array. ++ * @group_handle: Handle to the mapping of CSG. ++ * @padding: padding to a multiple of 64 bits. ++ */ ++struct base_kcpu_command_group_suspend_info { ++ __u64 buffer; ++ __u32 size; ++ __u8 group_handle; ++ __u8 padding[3]; ++}; ++ ++#if MALI_UNIT_TEST ++struct base_kcpu_command_sample_time_info { ++ __u64 time; ++}; ++#endif /* MALI_UNIT_TEST */ ++ ++/** ++ * struct base_kcpu_command - kcpu command. ++ * @type: type of the kcpu command, one enum base_kcpu_command_type ++ * @padding: padding to a multiple of 64 bits ++ * @info: structure which contains information about the kcpu command; ++ * actual type is determined by @p type ++ * @info.fence: Fence ++ * @info.cqs_wait: CQS wait ++ * @info.cqs_set: CQS set ++ * @info.import: import ++ * @info.jit_alloc: jit allocation ++ * @info.jit_free: jit deallocation ++ * @info.suspend_buf_copy: suspend buffer copy ++ * @info.sample_time: sample time ++ * @info.padding: padding ++ */ ++struct base_kcpu_command { ++ __u8 type; ++ __u8 padding[sizeof(__u64) - sizeof(__u8)]; ++ union { ++ struct base_kcpu_command_fence_info fence; ++ struct base_kcpu_command_cqs_wait_info cqs_wait; ++ struct base_kcpu_command_cqs_set_info cqs_set; ++ struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation; ++ struct base_kcpu_command_cqs_set_operation_info cqs_set_operation; ++ struct base_kcpu_command_import_info import; ++ struct base_kcpu_command_jit_alloc_info jit_alloc; ++ struct base_kcpu_command_jit_free_info jit_free; ++ struct base_kcpu_command_group_suspend_info suspend_buf_copy; ++#if MALI_UNIT_TEST ++ struct base_kcpu_command_sample_time_info sample_time; ++#endif /* MALI_UNIT_TEST */ ++ __u64 padding[2]; /* No sub-struct should be larger */ ++ } info; ++}; ++ ++/** ++ * struct basep_cs_stream_control - CSI capabilities. ++ * ++ * @features: Features of this stream ++ * @padding: Padding to a multiple of 64 bits. ++ */ ++struct basep_cs_stream_control { ++ __u32 features; ++ __u32 padding; ++}; ++ ++/** ++ * struct basep_cs_group_control - CSG interface capabilities. ++ * ++ * @features: Features of this group ++ * @stream_num: Number of streams in this group ++ * @suspend_size: Size in bytes of the suspend buffer for this group ++ * @padding: Padding to a multiple of 64 bits. ++ */ ++struct basep_cs_group_control { ++ __u32 features; ++ __u32 stream_num; ++ __u32 suspend_size; ++ __u32 padding; ++}; ++ ++/** ++ * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault ++ * error information associated with GPU command queue group. ++ * ++ * @sideband: Additional information of the unrecoverable fault. ++ * @status: Unrecoverable fault information. ++ * This consists of exception type (least significant byte) and ++ * data (remaining bytes). One example of exception type is ++ * CS_INVALID_INSTRUCTION (0x49). ++ * @padding: Padding to make multiple of 64bits ++ */ ++struct base_gpu_queue_group_error_fatal_payload { ++ __u64 sideband; ++ __u32 status; ++ __u32 padding; ++}; ++ ++/** ++ * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault ++ * error information related to GPU command queue. ++ * ++ * @sideband: Additional information about this unrecoverable fault. ++ * @status: Unrecoverable fault information. ++ * This consists of exception type (least significant byte) and ++ * data (remaining bytes). One example of exception type is ++ * CS_INVALID_INSTRUCTION (0x49). ++ * @csi_index: Index of the CSF interface the queue is bound to. ++ * @padding: Padding to make multiple of 64bits ++ */ ++struct base_gpu_queue_error_fatal_payload { ++ __u64 sideband; ++ __u32 status; ++ __u8 csi_index; ++ __u8 padding[3]; ++}; ++ ++/** ++ * enum base_gpu_queue_group_error_type - GPU Fatal error type. ++ * ++ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU ++ * command queue group. ++ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU ++ * command queue. ++ * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT: Fatal error associated with ++ * progress timeout. ++ * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out ++ * of tiler heap memory. ++ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types ++ * ++ * This type is used for &struct_base_gpu_queue_group_error.error_type. ++ */ ++enum base_gpu_queue_group_error_type { ++ BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0, ++ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, ++ BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, ++ BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, ++ BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT ++}; ++ ++/** ++ * struct base_gpu_queue_group_error - Unrecoverable fault information ++ * @error_type: Error type of @base_gpu_queue_group_error_type ++ * indicating which field in union payload is filled ++ * @padding: Unused bytes for 64bit boundary ++ * @payload: Input Payload ++ * @payload.fatal_group: Unrecoverable fault error associated with ++ * GPU command queue group ++ * @payload.fatal_queue: Unrecoverable fault error associated with command queue ++ */ ++struct base_gpu_queue_group_error { ++ __u8 error_type; ++ __u8 padding[7]; ++ union { ++ struct base_gpu_queue_group_error_fatal_payload fatal_group; ++ struct base_gpu_queue_error_fatal_payload fatal_queue; ++ } payload; ++}; ++ ++/** ++ * enum base_csf_notification_type - Notification type ++ * ++ * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event ++ * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal ++ * error ++ * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu ++ * queue ++ * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type ++ * ++ * This type is used for &struct_base_csf_notification.type. ++ */ ++enum base_csf_notification_type { ++ BASE_CSF_NOTIFICATION_EVENT = 0, ++ BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, ++ BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP, ++ BASE_CSF_NOTIFICATION_COUNT ++}; ++ ++/** ++ * struct base_csf_notification - Event or error notification ++ * ++ * @type: Notification type of @base_csf_notification_type ++ * @padding: Padding for 64bit boundary ++ * @payload: Input Payload ++ * @payload.align: To fit the struct into a 64-byte cache line ++ * @payload.csg_error: CSG error ++ * @payload.csg_error.handle: Handle of GPU command queue group associated with ++ * fatal error ++ * @payload.csg_error.padding: Padding ++ * @payload.csg_error.error: Unrecoverable fault error ++ * ++ */ ++struct base_csf_notification { ++ __u8 type; ++ __u8 padding[7]; ++ union { ++ struct { ++ __u8 handle; ++ __u8 padding[7]; ++ struct base_gpu_queue_group_error error; ++ } csg_error; ++ ++ __u8 align[56]; ++ } payload; ++}; ++ ++#endif /* _UAPI_BASE_CSF_KERNEL_H_ */ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h +new file mode 100644 +index 0000000..b62a8b0 +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h +@@ -0,0 +1,32 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * This header was autogenerated, it should not be edited. ++ */ ++ ++#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ ++#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ ++ ++/* GPU_REGISTERS register offsets */ ++#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */ ++ ++#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h +new file mode 100644 +index 0000000..06cc4c2 +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h +@@ -0,0 +1,1488 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * This header was autogenerated, it should not be edited. ++ */ ++ ++#ifndef _UAPI_GPU_CSF_REGISTERS_H_ ++#define _UAPI_GPU_CSF_REGISTERS_H_ ++ ++/* ++ * Begin register sets ++ */ ++ ++/* DOORBELLS base address */ ++#define DOORBELLS_BASE 0x0080000 ++#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r)) ++ ++/* CS_KERNEL_INPUT_BLOCK base address */ ++#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 ++#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) ++ ++/* CS_KERNEL_OUTPUT_BLOCK base address */ ++#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000 ++#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r)) ++ ++/* CS_USER_INPUT_BLOCK base address */ ++#define CS_USER_INPUT_BLOCK_BASE 0x0000 ++#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r)) ++ ++/* CS_USER_OUTPUT_BLOCK base address */ ++#define CS_USER_OUTPUT_BLOCK_BASE 0x0000 ++#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r)) ++ ++/* CSG_INPUT_BLOCK base address */ ++#define CSG_INPUT_BLOCK_BASE 0x0000 ++#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r)) ++ ++/* CSG_OUTPUT_BLOCK base address */ ++#define CSG_OUTPUT_BLOCK_BASE 0x0000 ++#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r)) ++ ++/* GLB_CONTROL_BLOCK base address */ ++#define GLB_CONTROL_BLOCK_BASE 0x04000000 ++#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r)) ++ ++/* GLB_INPUT_BLOCK base address */ ++#define GLB_INPUT_BLOCK_BASE 0x0000 ++#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r)) ++ ++/* GLB_OUTPUT_BLOCK base address */ ++#define GLB_OUTPUT_BLOCK_BASE 0x0000 ++#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) ++ ++/* USER base address */ ++#define USER_BASE 0x0010000 ++#define USER_REG(r) (USER_BASE + (r)) ++ ++/* End register sets */ ++ ++/* ++ * Begin register offsets ++ */ ++ ++/* DOORBELLS register offsets */ ++#define DOORBELL_0 0x0000 /* () Doorbell 0 register */ ++#define DOORBELL(n) (DOORBELL_0 + (n)*65536) ++#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r)) ++#define DOORBELL_COUNT 1024 ++ ++/* DOORBELL_BLOCK register offsets */ ++#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */ ++ ++/* CS_KERNEL_INPUT_BLOCK register offsets */ ++#define CS_REQ 0x0000 /* () CS request flags */ ++#define CS_CONFIG 0x0004 /* () CS configuration */ ++#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */ ++#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */ ++#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */ ++#define CS_SIZE 0x0018 /* () Size of the ring buffer */ ++#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */ ++#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */ ++#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */ ++#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */ ++#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */ ++#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */ ++#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */ ++#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */ ++#define CS_INSTR_CONFIG 0x0040 /* () Instrumentation buffer configuration */ ++#define CS_INSTR_BUFFER_SIZE 0x0044 /* () Instrumentation buffer size */ ++#define CS_INSTR_BUFFER_BASE_LO 0x0048 /* () Instrumentation buffer base pointer, low word */ ++#define CS_INSTR_BUFFER_BASE_HI 0x004C /* () Instrumentation buffer base pointer, high word */ ++#define CS_INSTR_BUFFER_OFFSET_POINTER_LO 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */ ++#define CS_INSTR_BUFFER_OFFSET_POINTER_HI 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */ ++ ++/* CS_KERNEL_OUTPUT_BLOCK register offsets */ ++#define CS_ACK 0x0000 /* () CS acknowledge flags */ ++#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */ ++#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */ ++#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */ ++#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */ ++#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */ ++#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */ ++#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */ ++#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */ ++#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */ ++#define CS_FAULT 0x0080 /* () Recoverable fault information */ ++#define CS_FATAL 0x0084 /* () Unrecoverable fault information */ ++#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ ++#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ ++#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ ++#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ ++#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ ++#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ ++#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ ++#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */ ++#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */ ++ ++/* CS_USER_INPUT_BLOCK register offsets */ ++#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */ ++#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */ ++#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */ ++#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */ ++ ++/* CS_USER_OUTPUT_BLOCK register offsets */ ++#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */ ++#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */ ++#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */ ++ ++/* CSG_INPUT_BLOCK register offsets */ ++#define CSG_REQ 0x0000 /* () CSG request */ ++#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ ++#define CSG_DB_REQ 0x0008 /* () Global doorbell request */ ++#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ ++#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ ++#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ ++#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ ++#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ ++#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ ++#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ ++#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ ++#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ ++#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ ++#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ ++#define CSG_CONFIG 0x0050 /* () CSG configuration options */ ++#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ ++ ++/* CSG_OUTPUT_BLOCK register offsets */ ++#define CSG_ACK 0x0000 /* () CSG acknowledge flags */ ++#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */ ++#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */ ++#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ ++#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ ++#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ ++ ++/* GLB_CONTROL_BLOCK register offsets */ ++#define GLB_VERSION 0x0000 /* () Global interface version */ ++#define GLB_FEATURES 0x0004 /* () Global interface features */ ++#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */ ++#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */ ++#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ ++#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ ++#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ ++#define GLB_INSTR_FEATURES \ ++ 0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */ ++#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ ++#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) ++#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) ++#define GROUP_CONTROL_COUNT 16 ++ ++/* STREAM_CONTROL_BLOCK register offsets */ ++#define STREAM_FEATURES 0x0000 /* () CSI features */ ++#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */ ++#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */ ++ ++/* GROUP_CONTROL_BLOCK register offsets */ ++#define GROUP_FEATURES 0x0000 /* () CSG interface features */ ++#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */ ++#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */ ++#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */ ++#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */ ++#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */ ++#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */ ++#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */ ++#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12) ++#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r)) ++#define STREAM_CONTROL_COUNT 16 ++ ++/* GLB_INPUT_BLOCK register offsets */ ++#define GLB_REQ 0x0000 /* () Global request */ ++#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ ++#define GLB_DB_REQ 0x0008 /* () Global doorbell request */ ++#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */ ++#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ ++#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ ++#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ ++#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */ ++ ++#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ ++#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ ++#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */ ++#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */ ++#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */ ++#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */ ++#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */ ++#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */ ++#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */ ++#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */ ++#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ ++#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ ++ ++#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ ++#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ ++#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ ++#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ ++#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ ++#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ ++ ++/* GLB_OUTPUT_BLOCK register offsets */ ++#define GLB_ACK 0x0000 /* () Global acknowledge */ ++#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ ++#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ ++#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ ++#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ ++#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ ++#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ ++ ++/* USER register offsets */ ++#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ ++ ++/* End register offsets */ ++ ++/* CS_KERNEL_INPUT_BLOCK register set definitions */ ++/* GLB_VERSION register */ ++#define GLB_VERSION_PATCH_SHIFT (0) ++#define GLB_VERSION_MINOR_SHIFT (16) ++#define GLB_VERSION_MAJOR_SHIFT (24) ++ ++/* CS_REQ register */ ++#define CS_REQ_STATE_SHIFT 0 ++#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT) ++#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT) ++#define CS_REQ_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK)) ++/* CS_REQ_STATE values */ ++#define CS_REQ_STATE_STOP 0x0 ++#define CS_REQ_STATE_START 0x1 ++/* End of CS_REQ_STATE values */ ++#define CS_REQ_EXTRACT_EVENT_SHIFT 4 ++#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) ++#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) ++#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) ++ ++#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 ++#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) ++#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) ++#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ ++ (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) ++#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 ++#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) ++#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) ++#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ ++ (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) ++#define CS_REQ_IDLE_EMPTY_SHIFT 10 ++#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) ++#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) ++#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) ++#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 ++#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) ++#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ ++ (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) ++#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ ++ (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) ++#define CS_REQ_TILER_OOM_SHIFT 26 ++#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) ++#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) ++#define CS_REQ_TILER_OOM_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) ++#define CS_REQ_PROTM_PEND_SHIFT 27 ++#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) ++#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) ++#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) ++#define CS_REQ_FATAL_SHIFT 30 ++#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) ++#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) ++#define CS_REQ_FATAL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK)) ++#define CS_REQ_FAULT_SHIFT 31 ++#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT) ++#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT) ++#define CS_REQ_FAULT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK)) ++ ++/* CS_CONFIG register */ ++#define CS_CONFIG_PRIORITY_SHIFT 0 ++#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) ++#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) ++#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ ++ (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) ++#define CS_CONFIG_USER_DOORBELL_SHIFT 8 ++#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) ++#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) ++#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ ++ (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) ++ ++/* CS_ACK_IRQ_MASK register */ ++#define CS_ACK_IRQ_MASK_STATE_SHIFT 0 ++#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) ++#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) ++#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) ++/* CS_ACK_IRQ_MASK_STATE values */ ++#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 ++#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7 ++/* End of CS_ACK_IRQ_MASK_STATE values */ ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4 ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ ++ (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) ++#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 ++#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) ++#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ ++ (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) ++#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) ++#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 ++#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) ++#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ ++ (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) ++#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) ++#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 ++#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) ++#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) ++#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) ++#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 ++#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) ++#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) ++#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ ++ (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) ++ ++/* CS_BASE register */ ++#define CS_BASE_POINTER_SHIFT 0 ++#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT) ++#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) ++#define CS_BASE_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) ++ ++/* CS_SIZE register */ ++#define CS_SIZE_SIZE_SHIFT 0 ++#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT) ++#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT) ++#define CS_SIZE_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK)) ++ ++/* CS_TILER_HEAP_START register */ ++#define CS_TILER_HEAP_START_POINTER_SHIFT 0 ++#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT) ++#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) ++#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ ++ (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) ++/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ ++/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ ++ ++/* CS_TILER_HEAP_END register */ ++#define CS_TILER_HEAP_END_POINTER_SHIFT 0 ++#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT) ++#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) ++#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ ++ (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) ++/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ ++/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ ++ ++/* CS_USER_INPUT register */ ++#define CS_USER_INPUT_POINTER_SHIFT 0 ++#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT) ++#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) ++#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ ++ (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) ++ ++/* CS_USER_OUTPUT register */ ++#define CS_USER_OUTPUT_POINTER_SHIFT 0 ++#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT) ++#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) ++#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ ++ (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) ++ ++/* CS_INSTR_CONFIG register */ ++#define CS_INSTR_CONFIG_JASID_SHIFT (0) ++#define CS_INSTR_CONFIG_JASID_MASK ((u32)0xF << CS_INSTR_CONFIG_JASID_SHIFT) ++#define CS_INSTR_CONFIG_JASID_GET(reg_val) (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT) ++#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) | \ ++ (((value) << CS_INSTR_CONFIG_JASID_SHIFT) & CS_INSTR_CONFIG_JASID_MASK)) ++#define CS_INSTR_CONFIG_EVENT_SIZE_SHIFT (4) ++#define CS_INSTR_CONFIG_EVENT_SIZE_MASK ((u32)0xF << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_SIZE_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_CONFIG_EVENT_SIZE_MASK) >> CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) | \ ++ (((value) << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) & CS_INSTR_CONFIG_EVENT_SIZE_MASK)) ++#define CS_INSTR_CONFIG_EVENT_STATE_SHIFT (16) ++#define CS_INSTR_CONFIG_EVENT_STATE_MASK ((u32)0xFF << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_STATE_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_CONFIG_EVENT_STATE_MASK) >> CS_INSTR_CONFIG_EVENT_STATE_SHIFT) ++#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) | \ ++ (((value) << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) & CS_INSTR_CONFIG_EVENT_STATE_MASK)) ++ ++/* CS_INSTR_BUFFER_SIZE register */ ++#define CS_INSTR_BUFFER_SIZE_SIZE_SHIFT (0) ++#define CS_INSTR_BUFFER_SIZE_SIZE_MASK ((u32)0xFFFFFFFF << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) ++#define CS_INSTR_BUFFER_SIZE_SIZE_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_BUFFER_SIZE_SIZE_MASK) >> CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) ++#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) | \ ++ (((value) << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) & CS_INSTR_BUFFER_SIZE_SIZE_MASK)) ++ ++/* CS_INSTR_BUFFER_BASE register */ ++#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0) ++#define CS_INSTR_BUFFER_BASE_POINTER_MASK ((u64)0xFFFFFFFFFFFFFFFF << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) | \ ++ (((value) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) & CS_INSTR_BUFFER_BASE_POINTER_MASK)) ++ ++/* CS_INSTR_BUFFER_OFFSET_POINTER register */ ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0) ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ ++ ((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) ++#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) | \ ++ (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK)) ++ ++/* End of CS_KERNEL_INPUT_BLOCK register set definitions */ ++ ++/* CS_KERNEL_OUTPUT_BLOCK register set definitions */ ++ ++/* CS_ACK register */ ++#define CS_ACK_STATE_SHIFT 0 ++#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT) ++#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT) ++#define CS_ACK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK)) ++/* CS_ACK_STATE values */ ++#define CS_ACK_STATE_STOP 0x0 ++#define CS_ACK_STATE_START 0x1 ++/* End of CS_ACK_STATE values */ ++#define CS_ACK_EXTRACT_EVENT_SHIFT 4 ++#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) ++#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) ++#define CS_ACK_TILER_OOM_SHIFT 26 ++#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) ++#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) ++#define CS_ACK_TILER_OOM_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) ++#define CS_ACK_PROTM_PEND_SHIFT 27 ++#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) ++#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) ++#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) ++#define CS_ACK_FATAL_SHIFT 30 ++#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) ++#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) ++#define CS_ACK_FATAL_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK)) ++#define CS_ACK_FAULT_SHIFT 31 ++#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT) ++#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT) ++#define CS_ACK_FAULT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK)) ++ ++/* CS_STATUS_CMD_PTR register */ ++#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 ++#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT) ++#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) ++#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ ++ (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) ++ ++/* CS_STATUS_WAIT register */ ++#define CS_STATUS_WAIT_SB_MASK_SHIFT 0 ++#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) ++#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) ++#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) ++/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 ++#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 ++/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ ++#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 ++#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) ++#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) ++#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ ++ (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) ++#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 ++#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) ++#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) ++#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ ++ (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) ++#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 ++#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) ++#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) ++ ++/* CS_STATUS_REQ_RESOURCE register */ ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) ++#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ ++ (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) ++ ++/* CS_STATUS_WAIT_SYNC_POINTER register */ ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) ++#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) ++ ++/* CS_STATUS_WAIT_SYNC_VALUE register */ ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) ++#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ ++ (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) ++ ++/* CS_STATUS_SCOREBOARDS register */ ++#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) ++#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ ++ ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) ++#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ ++ CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) ++#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ ++ (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ ++ CS_STATUS_SCOREBOARDS_NONZERO_MASK)) ++ ++/* CS_STATUS_BLOCKED_REASON register */ ++#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) ++#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ ++ ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) ++#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ ++ (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ ++ CS_STATUS_BLOCKED_REASON_REASON_SHIFT) ++#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ ++ (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ ++ (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ ++ CS_STATUS_BLOCKED_REASON_REASON_MASK)) ++/* CS_STATUS_BLOCKED_REASON_reason values */ ++#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 ++#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 ++#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 ++#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 ++#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 ++#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5 ++#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6 ++/* End of CS_STATUS_BLOCKED_REASON_reason values */ ++ ++/* CS_FAULT register */ ++#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 ++#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) ++#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) ++#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ ++ (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) ++/* CS_FAULT_EXCEPTION_TYPE values */ ++#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F ++#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B ++#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 ++#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 ++#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 ++#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 ++#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 ++#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A ++#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B ++#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 ++/* End of CS_FAULT_EXCEPTION_TYPE values */ ++#define CS_FAULT_EXCEPTION_DATA_SHIFT 8 ++#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) ++ ++/* CS_FATAL register */ ++#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 ++#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) ++#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) ++#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ ++ (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) ++/* CS_FATAL_EXCEPTION_TYPE values */ ++#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 ++#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 ++#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 ++#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 ++#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A ++#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 ++/* End of CS_FATAL_EXCEPTION_TYPE values */ ++#define CS_FATAL_EXCEPTION_DATA_SHIFT 8 ++#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) ++ ++/* CS_FAULT_INFO register */ ++#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 ++#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ ++ (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) ++ ++/* CS_FATAL_INFO register */ ++#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 ++#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ ++ (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) ++#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ ++ (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ ++ (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) ++ ++/* CS_HEAP_VT_START register */ ++#define CS_HEAP_VT_START_VALUE_SHIFT 0 ++#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) ++#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) ++#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ ++ (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) ++ ++/* CS_HEAP_VT_END register */ ++#define CS_HEAP_VT_END_VALUE_SHIFT 0 ++#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) ++#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) ++#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) ++ ++/* CS_HEAP_FRAG_END register */ ++#define CS_HEAP_FRAG_END_VALUE_SHIFT 0 ++#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) ++#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) ++#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ ++ (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) ++ ++/* CS_HEAP_ADDRESS register */ ++#define CS_HEAP_ADDRESS_POINTER_SHIFT 0 ++#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT) ++#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) ++#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ ++ (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) ++/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ ++ ++/* CS_USER_INPUT_BLOCK register set definitions */ ++ ++/* CS_INSERT register */ ++#define CS_INSERT_VALUE_SHIFT 0 ++#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT) ++#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) ++#define CS_INSERT_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) ++ ++/* CS_EXTRACT_INIT register */ ++#define CS_EXTRACT_INIT_VALUE_SHIFT 0 ++#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT) ++#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) ++#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ ++ (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) ++/* End of CS_USER_INPUT_BLOCK register set definitions */ ++ ++/* CS_USER_OUTPUT_BLOCK register set definitions */ ++ ++/* CS_EXTRACT register */ ++#define CS_EXTRACT_VALUE_SHIFT 0 ++#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT) ++#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) ++#define CS_EXTRACT_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) ++ ++/* CS_ACTIVE register */ ++#define CS_ACTIVE_HW_ACTIVE_SHIFT 0 ++#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) ++#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) ++#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ ++ (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) ++/* End of CS_USER_OUTPUT_BLOCK register set definitions */ ++ ++/* CSG_INPUT_BLOCK register set definitions */ ++ ++/* CSG_REQ register */ ++#define CSG_REQ_STATE_SHIFT 0 ++#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) ++#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) ++#define CSG_REQ_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) ++/* CSG_REQ_STATE values */ ++#define CSG_REQ_STATE_TERMINATE 0x0 ++#define CSG_REQ_STATE_START 0x1 ++#define CSG_REQ_STATE_SUSPEND 0x2 ++#define CSG_REQ_STATE_RESUME 0x3 ++/* End of CSG_REQ_STATE values */ ++#define CSG_REQ_EP_CFG_SHIFT 4 ++#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) ++#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) ++#define CSG_REQ_EP_CFG_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) ++#define CSG_REQ_STATUS_UPDATE_SHIFT 5 ++#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) ++#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) ++#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ ++ (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) ++#define CSG_REQ_SYNC_UPDATE_SHIFT 28 ++#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) ++#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) ++#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) ++#define CSG_REQ_IDLE_SHIFT 29 ++#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) ++#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) ++#define CSG_REQ_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) ++#define CSG_REQ_DOORBELL_SHIFT 30 ++#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT) ++#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT) ++#define CSG_REQ_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK)) ++#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 ++#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ ++ (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ ++ (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) ++ ++/* CSG_ACK_IRQ_MASK register */ ++#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 ++#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) ++/* CSG_ACK_IRQ_MASK_STATE values */ ++#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 ++#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7 ++/* End of CSG_ACK_IRQ_MASK_STATE values */ ++#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 ++#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) ++#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) ++#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) ++#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 ++#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) ++#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) ++#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) ++#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 ++#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) ++#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) ++#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ ++ (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) ++ ++/* CSG_EP_REQ register */ ++#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 ++#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ ++ (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) ++#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 ++#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ ++ (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) ++#define CSG_EP_REQ_TILER_EP_SHIFT 16 ++#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) ++#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) ++#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ ++ (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ ++ (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) ++#define CSG_EP_REQ_PRIORITY_SHIFT 28 ++#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) ++#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) ++#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) ++ ++/* CSG_SUSPEND_BUF register */ ++#define CSG_SUSPEND_BUF_POINTER_SHIFT 0 ++#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ ++ (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) ++ ++/* CSG_PROTM_SUSPEND_BUF register */ ++#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 ++#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ ++ (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) ++#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ ++ (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) ++ ++/* End of CSG_INPUT_BLOCK register set definitions */ ++ ++/* CSG_OUTPUT_BLOCK register set definitions */ ++ ++/* CSG_ACK register */ ++#define CSG_ACK_STATE_SHIFT 0 ++#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) ++#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) ++#define CSG_ACK_STATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) ++/* CSG_ACK_STATE values */ ++#define CSG_ACK_STATE_TERMINATE 0x0 ++#define CSG_ACK_STATE_START 0x1 ++#define CSG_ACK_STATE_SUSPEND 0x2 ++#define CSG_ACK_STATE_RESUME 0x3 ++/* End of CSG_ACK_STATE values */ ++#define CSG_ACK_EP_CFG_SHIFT 4 ++#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) ++#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) ++#define CSG_ACK_EP_CFG_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) ++#define CSG_ACK_STATUS_UPDATE_SHIFT 5 ++#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) ++#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ ++ (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) ++#define CSG_ACK_SYNC_UPDATE_SHIFT 28 ++#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) ++#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) ++#define CSG_ACK_IDLE_SHIFT 29 ++#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) ++#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) ++#define CSG_ACK_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK)) ++#define CSG_ACK_DOORBELL_SHIFT 30 ++#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) ++#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) ++#define CSG_ACK_DOORBELL_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) ++#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 ++#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ ++ (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) ++#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ ++ (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) ++ ++/* CSG_STATUS_EP_CURRENT register */ ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) ++#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 ++#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) ++ ++/* CSG_STATUS_EP_REQ register */ ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) ++#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 ++#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) ++#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) ++#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ ++ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) ++ ++/* End of CSG_OUTPUT_BLOCK register set definitions */ ++ ++/* STREAM_CONTROL_BLOCK register set definitions */ ++ ++/* STREAM_FEATURES register */ ++#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0 ++#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) ++#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ ++ (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) ++#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ ++ (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) ++#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 ++#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) ++#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ ++ (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) ++#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ ++ (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) ++#define STREAM_FEATURES_COMPUTE_SHIFT 16 ++#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) ++#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) ++#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ ++ (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) ++#define STREAM_FEATURES_FRAGMENT_SHIFT 17 ++#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) ++#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) ++#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ ++ (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) ++#define STREAM_FEATURES_TILER_SHIFT 18 ++#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) ++#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) ++#define STREAM_FEATURES_TILER_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ ++ (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) ++ ++/* STREAM_INPUT_VA register */ ++#define STREAM_INPUT_VA_VALUE_SHIFT 0 ++#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) ++#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) ++#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ ++ (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) ++ ++/* STREAM_OUTPUT_VA register */ ++#define STREAM_OUTPUT_VA_VALUE_SHIFT 0 ++#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) ++#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) ++#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ ++ (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ ++ (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) ++/* End of STREAM_CONTROL_BLOCK register set definitions */ ++ ++/* GLB_INPUT_BLOCK register set definitions */ ++ ++/* GLB_REQ register */ ++#define GLB_REQ_HALT_SHIFT 0 ++#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT) ++#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT) ++#define GLB_REQ_HALT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK)) ++#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1 ++#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ ++ (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) ++#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 ++#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) ++#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) ++#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) ++#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 ++#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ ++ (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) ++#define GLB_REQ_PROTM_ENTER_SHIFT 4 ++#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) ++#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) ++#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) ++#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 ++#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) ++#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) ++#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) ++#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 ++#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) ++#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) ++#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) ++#define GLB_REQ_COUNTER_ENABLE_SHIFT 7 ++#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) ++#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) ++#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ ++ (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) ++#define GLB_REQ_PING_SHIFT 8 ++#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) ++#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT) ++#define GLB_REQ_PING_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ ++ (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ ++ (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ ++ GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) ++#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 ++#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) ++#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) ++#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ ++ (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) ++#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 ++#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ ++ (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) ++#define GLB_REQ_INACTIVE_TILER_SHIFT 22 ++#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) ++#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) ++#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ ++ (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) ++#define GLB_REQ_PROTM_EXIT_SHIFT 23 ++#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) ++#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) ++#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) ++#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 ++#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ ++ GLB_REQ_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ ++ GLB_REQ_PRFCNT_THRESHOLD_MASK)) ++#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 ++#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ ++ (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ ++ GLB_REQ_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ ++ (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ ++ GLB_REQ_PRFCNT_OVERFLOW_MASK)) ++#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 ++#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) ++#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) ++#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ ++ (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) ++#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 ++#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) ++#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) ++#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ ++ (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) ++ ++/* GLB_ACK_IRQ_MASK register */ ++#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 ++#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) ++#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) ++#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) ++#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) ++#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) ++#define GLB_ACK_IRQ_MASK_PING_SHIFT 8 ++#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) ++#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) ++#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ ++ (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ ++ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) ++#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ ++ GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) ++#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) ++#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ ++ (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ ++ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ ++ (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) ++#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ ++ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) ++#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ ++ (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) ++ ++/* GLB_PROGRESS_TIMER register */ ++#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 ++#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) ++#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ ++ (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) ++#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ ++ (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) ++ ++/* GLB_PWROFF_TIMER register */ ++#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0 ++#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) ++#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ ++ (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) ++#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ ++ (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ ++ (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ ++ (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) ++/* GLB_PWROFF_TIMER_TIMER_SOURCE values */ ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 ++#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 ++/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ ++ ++/* GLB_ALLOC_EN register */ ++#define GLB_ALLOC_EN_MASK_SHIFT 0 ++#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT) ++#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) ++#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) ++ ++/* GLB_PROTM_COHERENCY register */ ++#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0 ++#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \ ++ (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) ++#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \ ++ (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \ ++ GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) ++#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \ ++ (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \ ++ GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK)) ++/* End of GLB_INPUT_BLOCK register set definitions */ ++ ++/* GLB_OUTPUT_BLOCK register set definitions */ ++ ++/* GLB_ACK register */ ++#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1 ++#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ ++ (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) ++#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ ++ (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) ++#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 ++#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) ++#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) ++/* End of GLB_OUTPUT_BLOCK register set definitions */ ++ ++/* The following register and fields are for headers before 10.x.7/11.x.4 */ ++#define GLB_REQ_IDLE_ENABLE_SHIFT (10) ++#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) ++#define GLB_REQ_IDLE_EVENT_SHIFT (26) ++#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) ++#define GLB_ACK_IDLE_ENABLE_SHIFT (10) ++#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT) ++#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT) ++#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT) ++#define GLB_ACK_IDLE_EVENT_SHIFT (26) ++#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) ++ ++#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26) ++#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT) ++ ++#define GLB_IDLE_TIMER (0x0080) ++/* GLB_IDLE_TIMER register */ ++#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) ++#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) ++#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) ++#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ ++ (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ ++ (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) ++#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ ++ (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) ++/* GLB_IDLE_TIMER_TIMER_SOURCE values */ ++#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 ++#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 ++/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ ++ ++/* GLB_INSTR_FEATURES register */ ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0) ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \ ++ (((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) ++#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) | \ ++ (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK)) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT (4) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK ((u32)0xF << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \ ++ (((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) ++#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \ ++ (((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) | \ ++ (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK)) ++ ++#define CSG_STATUS_STATE (0x0018) /* CSG state status register */ ++/* CSG_STATUS_STATE register */ ++#define CSG_STATUS_STATE_IDLE_SHIFT (0) ++#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) ++#define CSG_STATUS_STATE_IDLE_GET(reg_val) \ ++ (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) ++#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ ++ (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ ++ (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) ++ ++#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +new file mode 100644 +index 0000000..d2d7ce2 +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +@@ -0,0 +1,433 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _UAPI_KBASE_CSF_IOCTL_H_ ++#define _UAPI_KBASE_CSF_IOCTL_H_ ++ ++#include ++#include ++ ++/* ++ * 1.0: ++ * - CSF IOCTL header separated from JM ++ * 1.1: ++ * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME ++ * - Add ioctl 54: This controls the priority setting. ++ * 1.2: ++ * - Add new CSF GPU_FEATURES register into the property structure ++ * returned by KBASE_IOCTL_GET_GPUPROPS ++ * 1.3: ++ * - Add __u32 group_uid member to ++ * &struct_kbase_ioctl_cs_queue_group_create.out ++ * 1.4: ++ * - Replace padding in kbase_ioctl_cs_get_glb_iface with ++ * instr_features member of same size ++ * 1.5: ++ * - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new ++ * queue registration call with extended format for supporting CS ++ * trace configurations with CSF trace_command. ++ */ ++ ++#define BASE_UK_VERSION_MAJOR 1 ++#define BASE_UK_VERSION_MINOR 5 ++ ++/** ++ * struct kbase_ioctl_version_check - Check version compatibility between ++ * kernel and userspace ++ * ++ * @major: Major version number ++ * @minor: Minor version number ++ */ ++struct kbase_ioctl_version_check { ++ __u16 major; ++ __u16 minor; ++}; ++ ++#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ ++ _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) ++ ++ ++/** ++ * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the ++ * base back-end ++ * ++ * @buffer_gpu_addr: GPU address of the buffer backing the queue ++ * @buffer_size: Size of the buffer in bytes ++ * @priority: Priority of the queue within a group when run within a process ++ * @padding: Currently unused, must be zero ++ * ++ * @Note: There is an identical sub-section in kbase_ioctl_cs_queue_register_ex. ++ * Any change of this struct should also be mirrored to the latter. ++ */ ++struct kbase_ioctl_cs_queue_register { ++ __u64 buffer_gpu_addr; ++ __u32 buffer_size; ++ __u8 priority; ++ __u8 padding[3]; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_REGISTER \ ++ _IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register) ++ ++/** ++ * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler ++ * to notify that a queue has been updated ++ * ++ * @buffer_gpu_addr: GPU address of the buffer backing the queue ++ */ ++struct kbase_ioctl_cs_queue_kick { ++ __u64 buffer_gpu_addr; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_KICK \ ++ _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick) ++ ++/** ++ * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group ++ * ++ * @in: Input parameters ++ * @in.buffer_gpu_addr: GPU address of the buffer backing the queue ++ * @in.group_handle: Handle of the group to which the queue should be bound ++ * @in.csi_index: Index of the CSF interface the queue should be bound to ++ * @in.padding: Currently unused, must be zero ++ * @out: Output parameters ++ * @out.mmap_handle: Handle to be used for creating the mapping of CS ++ * input/output pages ++ */ ++union kbase_ioctl_cs_queue_bind { ++ struct { ++ __u64 buffer_gpu_addr; ++ __u8 group_handle; ++ __u8 csi_index; ++ __u8 padding[6]; ++ } in; ++ struct { ++ __u64 mmap_handle; ++ } out; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_BIND \ ++ _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind) ++ ++/** ++ * struct kbase_ioctl_cs_queue_register_ex - Register a GPU command queue with the ++ * base back-end in extended format, ++ * involving trace buffer configuration ++ * ++ * @buffer_gpu_addr: GPU address of the buffer backing the queue ++ * @buffer_size: Size of the buffer in bytes ++ * @priority: Priority of the queue within a group when run within a process ++ * @padding: Currently unused, must be zero ++ * @ex_offset_var_addr: GPU address of the trace buffer write offset variable ++ * @ex_buffer_base: Trace buffer GPU base address for the queue ++ * @ex_buffer_size: Size of the trace buffer in bytes ++ * @ex_event_size: Trace event write size, in log2 designation ++ * @ex_event_state: Trace event states configuration ++ * @ex_padding: Currently unused, must be zero ++ * ++ * @Note: There is an identical sub-section at the start of this struct to that ++ * of @ref kbase_ioctl_cs_queue_register. Any change of this sub-section ++ * must also be mirrored to the latter. Following the said sub-section, ++ * the remaining fields forms the extension, marked with ex_*. ++ */ ++struct kbase_ioctl_cs_queue_register_ex { ++ __u64 buffer_gpu_addr; ++ __u32 buffer_size; ++ __u8 priority; ++ __u8 padding[3]; ++ __u64 ex_offset_var_addr; ++ __u64 ex_buffer_base; ++ __u32 ex_buffer_size; ++ __u8 ex_event_size; ++ __u8 ex_event_state; ++ __u8 ex_padding[2]; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_REGISTER_EX \ ++ _IOW(KBASE_IOCTL_TYPE, 40, struct kbase_ioctl_cs_queue_register_ex) ++ ++/** ++ * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue ++ * ++ * @buffer_gpu_addr: GPU address of the buffer backing the queue ++ */ ++struct kbase_ioctl_cs_queue_terminate { ++ __u64 buffer_gpu_addr; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_TERMINATE \ ++ _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate) ++ ++/** ++ * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group ++ * @in: Input parameters ++ * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. ++ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. ++ * @in.compute_mask: Mask of compute endpoints the group is allowed to use. ++ * @in.cs_min: Minimum number of CSs required. ++ * @in.priority: Queue group's priority within a process. ++ * @in.tiler_max: Maximum number of tiler endpoints the group is allowed ++ * to use. ++ * @in.fragment_max: Maximum number of fragment endpoints the group is ++ * allowed to use. ++ * @in.compute_max: Maximum number of compute endpoints the group is allowed ++ * to use. ++ * @in.padding: Currently unused, must be zero ++ * @out: Output parameters ++ * @out.group_handle: Handle of a newly created queue group. ++ * @out.padding: Currently unused, must be zero ++ * @out.group_uid: UID of the queue group available to base. ++ */ ++union kbase_ioctl_cs_queue_group_create { ++ struct { ++ __u64 tiler_mask; ++ __u64 fragment_mask; ++ __u64 compute_mask; ++ __u8 cs_min; ++ __u8 priority; ++ __u8 tiler_max; ++ __u8 fragment_max; ++ __u8 compute_max; ++ __u8 padding[3]; ++ ++ } in; ++ struct { ++ __u8 group_handle; ++ __u8 padding[3]; ++ __u32 group_uid; ++ } out; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ ++ _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create) ++ ++/** ++ * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group ++ * ++ * @group_handle: Handle of the queue group to be terminated ++ * @padding: Padding to round up to a multiple of 8 bytes, must be zero ++ */ ++struct kbase_ioctl_cs_queue_group_term { ++ __u8 group_handle; ++ __u8 padding[7]; ++}; ++ ++#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \ ++ _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term) ++ ++#define KBASE_IOCTL_CS_EVENT_SIGNAL \ ++ _IO(KBASE_IOCTL_TYPE, 44) ++ ++typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */ ++ ++/** ++ * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue ++ * ++ * @id: ID of the new command queue returned by the kernel ++ * @padding: Padding to round up to a multiple of 8 bytes, must be zero ++ */ ++struct kbase_ioctl_kcpu_queue_new { ++ base_kcpu_queue_id id; ++ __u8 padding[7]; ++}; ++ ++#define KBASE_IOCTL_KCPU_QUEUE_CREATE \ ++ _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new) ++ ++/** ++ * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue ++ * ++ * @id: ID of the command queue to be destroyed ++ * @padding: Padding to round up to a multiple of 8 bytes, must be zero ++ */ ++struct kbase_ioctl_kcpu_queue_delete { ++ base_kcpu_queue_id id; ++ __u8 padding[7]; ++}; ++ ++#define KBASE_IOCTL_KCPU_QUEUE_DELETE \ ++ _IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete) ++ ++/** ++ * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue ++ * ++ * @addr: Memory address of an array of struct base_kcpu_queue_command ++ * @nr_commands: Number of commands in the array ++ * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl ++ * @padding: Padding to round up to a multiple of 8 bytes, must be zero ++ */ ++struct kbase_ioctl_kcpu_queue_enqueue { ++ __u64 addr; ++ __u32 nr_commands; ++ base_kcpu_queue_id id; ++ __u8 padding[3]; ++}; ++ ++#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \ ++ _IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue) ++ ++/** ++ * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap ++ * @in: Input parameters ++ * @in.chunk_size: Size of each chunk. ++ * @in.initial_chunks: Initial number of chunks that heap will be created with. ++ * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. ++ * @in.target_in_flight: Number of render-passes that the driver should attempt to ++ * keep in flight for which allocation of new chunks is ++ * allowed. ++ * @in.group_id: Group ID to be used for physical allocations. ++ * @in.padding: Padding ++ * @out: Output parameters ++ * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up ++ * for the heap. ++ * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, ++ * actually points to the header of heap chunk and not to ++ * the low address of free memory in the chunk. ++ */ ++union kbase_ioctl_cs_tiler_heap_init { ++ struct { ++ __u32 chunk_size; ++ __u32 initial_chunks; ++ __u32 max_chunks; ++ __u16 target_in_flight; ++ __u8 group_id; ++ __u8 padding; ++ } in; ++ struct { ++ __u64 gpu_heap_va; ++ __u64 first_chunk_va; ++ } out; ++}; ++ ++#define KBASE_IOCTL_CS_TILER_HEAP_INIT \ ++ _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) ++ ++/** ++ * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap ++ * instance ++ * ++ * @gpu_heap_va: GPU VA of Heap context that was set up for the heap. ++ */ ++struct kbase_ioctl_cs_tiler_heap_term { ++ __u64 gpu_heap_va; ++}; ++ ++#define KBASE_IOCTL_CS_TILER_HEAP_TERM \ ++ _IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term) ++ ++/** ++ * union kbase_ioctl_cs_get_glb_iface - Request the global control block ++ * of CSF interface capabilities ++ * ++ * @in: Input parameters ++ * @in.max_group_num: The maximum number of groups to be read. Can be 0, in ++ * which case groups_ptr is unused. ++ * @in.max_total_stream _num: The maximum number of CSs to be read. Can be 0, in ++ * which case streams_ptr is unused. ++ * @in.groups_ptr: Pointer where to store all the group data (sequentially). ++ * @in.streams_ptr: Pointer where to store all the CS data (sequentially). ++ * @out: Output parameters ++ * @out.glb_version: Global interface version. ++ * @out.features: Bit mask of features (e.g. whether certain types of job ++ * can be suspended). ++ * @out.group_num: Number of CSGs supported. ++ * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16 ++ * hold the size of firmware performance counter data ++ * and 15:0 hold the size of hardware performance counter ++ * data. ++ * @out.total_stream_num: Total number of CSs, summed across all groups. ++ * @out.instr_features: Instrumentation features. Bits 7:4 hold the maximum ++ * size of events. Bits 3:0 hold the offset update rate. ++ * (csf >= 1.1.0) ++ * ++ */ ++union kbase_ioctl_cs_get_glb_iface { ++ struct { ++ __u32 max_group_num; ++ __u32 max_total_stream_num; ++ __u64 groups_ptr; ++ __u64 streams_ptr; ++ } in; ++ struct { ++ __u32 glb_version; ++ __u32 features; ++ __u32 group_num; ++ __u32 prfcnt_size; ++ __u32 total_stream_num; ++ __u32 instr_features; ++ } out; ++}; ++ ++#define KBASE_IOCTL_CS_GET_GLB_IFACE \ ++ _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface) ++ ++struct kbase_ioctl_cs_cpu_queue_info { ++ __u64 buffer; ++ __u64 size; ++}; ++ ++#define KBASE_IOCTL_VERSION_CHECK \ ++ _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) ++ ++#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \ ++ _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info) ++ ++/*************** ++ * test ioctls * ++ ***************/ ++#if MALI_UNIT_TEST ++/* These ioctls are purely for test purposes and are not used in the production ++ * driver, they therefore may change without notice ++ */ ++ ++/** ++ * struct kbase_ioctl_cs_event_memory_write - Write an event memory address ++ * @cpu_addr: Memory address to write ++ * @value: Value to write ++ * @padding: Currently unused, must be zero ++ */ ++struct kbase_ioctl_cs_event_memory_write { ++ __u64 cpu_addr; ++ __u8 value; ++ __u8 padding[7]; ++}; ++ ++/** ++ * union kbase_ioctl_cs_event_memory_read - Read an event memory address ++ * @in: Input parameters ++ * @in.cpu_addr: Memory address to read ++ * @out: Output parameters ++ * @out.value: Value read ++ * @out.padding: Currently unused, must be zero ++ */ ++union kbase_ioctl_cs_event_memory_read { ++ struct { ++ __u64 cpu_addr; ++ } in; ++ struct { ++ __u8 value; ++ __u8 padding[7]; ++ } out; ++}; ++ ++#endif /* MALI_UNIT_TEST */ ++ ++#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +new file mode 100644 +index 0000000..2041739 +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +@@ -0,0 +1,335 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ ++#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ ++ ++#include ++ ++#if !MALI_USE_CSF && defined(__KERNEL__) ++#error "Cannot be compiled with JM" ++#endif ++ ++/* IPA control registers */ ++ ++#define IPA_CONTROL_BASE 0x40000 ++#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) ++#define COMMAND 0x000 /* (WO) Command register */ ++#define STATUS 0x004 /* (RO) Status register */ ++#define TIMER 0x008 /* (RW) Timer control register */ ++ ++#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ ++#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ ++#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ ++#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ ++#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ ++#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ ++#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ ++#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ ++ ++/* Accumulated counter values for CS hardware */ ++#define VALUE_CSHW_BASE 0x100 ++#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ ++/* Accumulated counter values for memory system */ ++#define VALUE_MEMSYS_BASE 0x140 ++#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ ++#define VALUE_TILER_BASE 0x180 ++#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ ++#define VALUE_SHADER_BASE 0x1C0 ++#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ ++#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ ++ ++#include "../../csf/mali_gpu_csf_control_registers.h" ++ ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull ++/* Set to inner non-cacheable, outer-non-cacheable ++ * Setting defined by the alloc bits is ignored, but set to a valid encoding: ++ * - no-alloc on read ++ * - no alloc on write ++ */ ++#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull ++/* Set to shared memory, that is inner cacheable on ACE and inner or outer ++ * shared, otherwise inner non-cacheable. ++ * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. ++ */ ++#define AS_MEMATTR_AARCH64_SHARED 0x8ull ++ ++/* Symbols for default MEMATTR to use ++ * Default is - HW implementation defined caching ++ */ ++#define AS_MEMATTR_INDEX_DEFAULT 0 ++#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 ++ ++/* HW implementation defined caching */ ++#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 ++/* Force cache on */ ++#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 ++/* Write-alloc */ ++#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 ++/* Outer coherent, inner implementation defined policy */ ++#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 ++/* Outer coherent, write alloc inner */ ++#define AS_MEMATTR_INDEX_OUTER_WA 4 ++/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ ++#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 ++/* Normal memory, shared between MCU and Host */ ++#define AS_MEMATTR_INDEX_SHARED 6 ++ ++/* Configuration bits for the CSF. */ ++#define CSF_CONFIG 0xF00 ++ ++/* CSF_CONFIG register */ ++#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 ++ ++/* GPU control registers */ ++#define CORE_FEATURES 0x008 /* () Shader Core Features */ ++#define MCU_CONTROL 0x700 ++#define MCU_STATUS 0x704 ++ ++#define MCU_CNTRL_ENABLE (1 << 0) ++#define MCU_CNTRL_AUTO (1 << 1) ++#define MCU_CNTRL_DISABLE (0) ++ ++#define MCU_STATUS_HALTED (1 << 1) ++ ++#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory ++ * region base address, low word ++ */ ++#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory ++ * region base address, high word ++ */ ++#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter ++ * configuration ++ */ ++ ++#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter ++ * enable for CS Hardware ++ */ ++ ++#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable ++ * flags for shader cores ++ */ ++#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable ++ * flags for tiler ++ */ ++#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable ++ * flags for MMU/L2 cache ++ */ ++ ++/* JOB IRQ flags */ ++#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ ++ ++/* GPU_COMMAND codes */ ++#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ ++#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ ++#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */ ++#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ ++#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ ++#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ ++#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ ++#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ ++ ++/* GPU_COMMAND_RESET payloads */ ++ ++/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. ++ * Power domains will remain powered on. ++ */ ++#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 ++ ++/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and ++ * idle state. ++ */ ++#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 ++ ++/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave ++ * the system bus in an inconsistent state. Use only as a last resort when nothing else works. ++ */ ++#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 ++ ++/* GPU_COMMAND_PRFCNT payloads */ ++#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */ ++#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */ ++ ++/* GPU_COMMAND_TIME payloads */ ++#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ ++#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ ++ ++/* GPU_COMMAND_FLUSH_CACHES payloads */ ++#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */ ++#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */ ++#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */ ++#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */ ++ ++/* GPU_COMMAND command + payload */ ++#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ ++ ((__u32)opcode | ((__u32)payload << 8)) ++ ++/* Final GPU_COMMAND form */ ++/* No operation, nothing happens */ ++#define GPU_COMMAND_NOP \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) ++ ++/* Stop all external bus interfaces, and then reset the entire GPU. */ ++#define GPU_COMMAND_SOFT_RESET \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) ++ ++/* Immediately reset the entire GPU. */ ++#define GPU_COMMAND_HARD_RESET \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) ++ ++/* Clear all performance counters, setting them all to zero. */ ++#define GPU_COMMAND_PRFCNT_CLEAR \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR) ++ ++/* Sample all performance counters, writing them out to memory */ ++#define GPU_COMMAND_PRFCNT_SAMPLE \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE) ++ ++/* Starts the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CYCLE_COUNT_START \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) ++ ++/* Stops the cycle counter, and system timestamp propagation */ ++#define GPU_COMMAND_CYCLE_COUNT_STOP \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) ++ ++/* Clean all caches */ ++#define GPU_COMMAND_CLEAN_CACHES \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN) ++ ++/* Clean and invalidate all caches */ ++#define GPU_COMMAND_CLEAN_INV_CACHES \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE) ++ ++/* Places the GPU in protected mode */ ++#define GPU_COMMAND_SET_PROTECTED_MODE \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) ++ ++/* Halt CSF */ ++#define GPU_COMMAND_FINISH_HALT \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) ++ ++/* Clear GPU faults */ ++#define GPU_COMMAND_CLEAR_FAULT \ ++ GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) ++ ++/* End Command Values */ ++ ++/* GPU_FAULTSTATUS register */ ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ ++ (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ ++ >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) ++#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ ++ (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) ++ ++#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 ++#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ ++ (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) ++ ++#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 ++#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ ++ (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) ++ ++#define GPU_FAULTSTATUS_JASID_SHIFT 12 ++#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) ++#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ ++ (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) ++#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ ++ (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ ++ (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) ++ ++#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 ++#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ ++ (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) ++/* End GPU_FAULTSTATUS register */ ++ ++/* GPU_FAULTSTATUS_ACCESS_TYPE values */ ++#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 ++#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 ++/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ ++ ++/* Implementation-dependent exception codes used to indicate CSG ++ * and CS errors that are not specified in the specs. ++ */ ++#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) ++#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) ++#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) ++ ++/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 ++#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A ++/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ ++ ++#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) ++#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) ++#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ ++ (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) ++#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ ++ (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ ++ (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) ++ ++/* IRQ flags */ ++#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ ++#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ ++#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ ++#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ ++#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ ++#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ ++#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ ++#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ ++ ++/* ++ * In Debug build, ++ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ ++ * by writing it onto GPU_IRQ_CLEAR/MASK registers. ++ * ++ * In Release build, ++ * GPU_IRQ_REG_COMMON is used. ++ * ++ * Note: ++ * CLEAN_CACHES_COMPLETED - Used separately for cache operation. ++ * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON ++ * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen ++ */ ++#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ ++ | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) ++ ++/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */ ++#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */ ++ ++#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +similarity index 89% +rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +index 258ff33..1be3541 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,13 +17,14 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_GPU_REGMAP_JM_H_ +-#define _KBASE_GPU_REGMAP_JM_H_ ++#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_ ++#define _UAPI_KBASE_GPU_REGMAP_JM_H_ + ++#if MALI_USE_CSF && defined(__KERNEL__) ++#error "Cannot be compiled with CSF" ++#endif + + /* Set to implementation defined, outer caching */ + #define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +@@ -136,8 +138,8 @@ + #define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ + #define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ + #define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +-#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job +- slot n */ ++/* (RO) Extended affinity mask for job slot n*/ ++#define JS_XAFFINITY 0x1C + + #define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ + #define JS_STATUS 0x24 /* (RO) Status register for job slot n */ +@@ -148,8 +150,8 @@ + #define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ + #define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ + #define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +-#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for +- job slot n */ ++/* (RW) Next extended affinity mask for job slot n */ ++#define JS_XAFFINITY_NEXT 0x5C + + #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +@@ -259,4 +261,27 @@ + #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ + #define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + +-#endif /* _KBASE_GPU_REGMAP_JM_H_ */ ++/* IRQ flags */ ++#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ ++#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ ++#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ ++#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ ++#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ ++#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ ++#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ ++ ++/* ++ * In Debug build, ++ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ ++ * by writing it onto GPU_IRQ_CLEAR/MASK registers. ++ * ++ * In Release build, ++ * GPU_IRQ_REG_COMMON is used. ++ * ++ * Note: ++ * CLEAN_CACHES_COMPLETED - Used separately for cache operation. ++ */ ++#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ ++ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) ++ ++#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h +similarity index 75% +rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h +index bb2b161..83d8413 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,16 +17,14 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_GPU_COHERENCY_H_ +-#define _KBASE_GPU_COHERENCY_H_ ++#ifndef _UAPI_KBASE_GPU_COHERENCY_H_ ++#define _UAPI_KBASE_GPU_COHERENCY_H_ + + #define COHERENCY_ACE_LITE 0 + #define COHERENCY_ACE 1 + #define COHERENCY_NONE 31 + #define COHERENCY_FEATURE_BIT(x) (1 << (x)) + +-#endif /* _KBASE_GPU_COHERENCY_H_ */ ++#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h +similarity index 74% +rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h +index 9f3d6b1..d093ce4 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,12 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_GPU_ID_H_ +-#define _KBASE_GPU_ID_H_ ++#ifndef _UAPI_KBASE_GPU_ID_H_ ++#define _UAPI_KBASE_GPU_ID_H_ ++ ++#include + + /* GPU_ID register */ + #define GPU_ID_VERSION_STATUS_SHIFT 0 +@@ -53,19 +54,21 @@ + GPU_ID2_VERSION_STATUS) + + /* Helper macro to create a partial GPU_ID (new format) that defines +- a product ignoring its version. */ ++ * a product ignoring its version. ++ */ + #define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ +- ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ +- (((u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ +- (((u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ +- (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) ++ ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ ++ (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ ++ (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ ++ (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + + /* Helper macro to create a partial GPU_ID (new format) that specifies the +- revision (major, minor, status) of a product */ ++ * revision (major, minor, status) of a product ++ */ + #define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ +- ((((u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ +- (((u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ +- (((u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) ++ ((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ ++ (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ ++ (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + + /* Helper macro to create a complete GPU_ID (new format) */ + #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ +@@ -76,16 +79,18 @@ + version_status)) + + /* Helper macro to create a partial GPU_ID (new format) that identifies +- a particular GPU model by its arch_major and product_major. */ ++ * a particular GPU model by its arch_major and product_major. ++ */ + #define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ +- ((((u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ +- (((u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) ++ ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ ++ (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + + /* Strip off the non-relevant bits from a product_id value and make it suitable +- for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU +- model. */ ++ * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU ++ * model. ++ */ + #define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ +- ((((u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ ++ ((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + GPU_ID2_PRODUCT_MODEL) + + #define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +@@ -98,21 +103,20 @@ + #define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) + #define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) + #define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) ++#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) + #define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) + #define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) + #define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) + #define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) + #define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) +-#define GPU_ID2_PRODUCT_TTUX GPU_ID2_MODEL_MAKE(11, 2) +-#define GPU_ID2_PRODUCT_LTUX GPU_ID2_MODEL_MAKE(11, 3) +-#define GPU_ID2_PRODUCT_TE2X GPU_ID2_MODEL_MAKE(11, 1) + + /* Helper macro to create a GPU_ID assuming valid values for id, major, +- minor, status */ ++ * minor, status ++ */ + #define GPU_ID_MAKE(id, major, minor, status) \ +- ((((u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ +- (((u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ +- (((u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ +- (((u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) ++ ((((__u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ ++ (((__u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ ++ (((__u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ ++ (((__u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) + +-#endif /* _KBASE_GPU_ID_H_ */ ++#endif /* _UAPI_KBASE_GPU_ID_H_ */ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +new file mode 100644 +index 0000000..84fad8d +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +@@ -0,0 +1,434 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++#ifndef _UAPI_KBASE_GPU_REGMAP_H_ ++#define _UAPI_KBASE_GPU_REGMAP_H_ ++ ++#include "mali_kbase_gpu_coherency.h" ++#include "mali_kbase_gpu_id.h" ++#if MALI_USE_CSF ++#include "backend/mali_kbase_gpu_regmap_csf.h" ++#else ++#include "backend/mali_kbase_gpu_regmap_jm.h" ++#endif ++ ++/* Begin Register Offsets */ ++/* GPU control registers */ ++ ++#define GPU_CONTROL_BASE 0x0000 ++#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) ++#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ ++#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ ++#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ ++#define MEM_FEATURES 0x010 /* (RO) Memory system features */ ++#define MMU_FEATURES 0x014 /* (RO) MMU features */ ++#define AS_PRESENT 0x018 /* (RO) Address space slots present */ ++#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ ++#define GPU_IRQ_CLEAR 0x024 /* (WO) */ ++#define GPU_IRQ_MASK 0x028 /* (RW) */ ++#define GPU_IRQ_STATUS 0x02C /* (RO) */ ++ ++#define GPU_COMMAND 0x030 /* (WO) */ ++#define GPU_STATUS 0x034 /* (RO) */ ++ ++#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ ++ ++#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ ++#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ ++#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ ++ ++#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ ++ ++#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ ++#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core ++ * supergroup are l2 coherent ++ */ ++ ++#define PWR_KEY 0x050 /* (WO) Power manager key register */ ++#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ ++#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ ++#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ ++#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ ++#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */ ++#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */ ++#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */ ++#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ ++#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ ++#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ ++#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ ++ ++#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ ++#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ ++#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ ++#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ ++#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ ++ ++#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ ++#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ ++#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ ++#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ ++ ++#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) ++ ++#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ ++#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ ++ ++#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ ++#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ ++ ++#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ ++#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ ++ ++#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ ++#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ ++ ++#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ ++#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ ++ ++#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ ++#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ ++ ++#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ ++#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ ++ ++#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ ++#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ ++ ++#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ ++#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ ++ ++#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ ++#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ ++ ++#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ ++#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ ++ ++#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ ++#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ ++ ++#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ ++#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ ++ ++#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ ++#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ ++ ++#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ ++#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ ++ ++#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ ++#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ ++ ++#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ ++#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ ++ ++#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ ++#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ ++ ++#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ ++#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ ++ ++#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ ++#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) ++#define ASN_HASH_COUNT 3 ++ ++#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ ++#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ ++ ++#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ ++#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ ++ ++#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ ++#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ ++ ++#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ ++#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ ++ ++#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ ++#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ ++ ++#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ ++#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ ++#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ ++ ++/* Job control registers */ ++ ++#define JOB_CONTROL_BASE 0x1000 ++ ++#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) ++ ++#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ ++#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ ++#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ ++#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ ++ ++/* MMU control registers */ ++ ++#define MEMORY_MANAGEMENT_BASE 0x2000 ++#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) ++ ++#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ ++#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ ++#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ ++#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ ++ ++#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ ++#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ ++#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ ++#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ ++#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ ++#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ ++#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ ++#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ ++#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ ++#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ ++#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ ++#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ ++#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ ++#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ ++#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ ++#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ ++ ++/* MMU address space control registers */ ++ ++#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) ++ ++#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ ++#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ ++#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ ++#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ ++#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ ++#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ ++#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ ++#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ ++#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ ++#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ ++#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ ++ ++/* (RW) Translation table configuration for address space n, low word */ ++#define AS_TRANSCFG_LO 0x30 ++/* (RW) Translation table configuration for address space n, high word */ ++#define AS_TRANSCFG_HI 0x34 ++/* (RO) Secondary fault address for address space n, low word */ ++#define AS_FAULTEXTRA_LO 0x38 ++/* (RO) Secondary fault address for address space n, high word */ ++#define AS_FAULTEXTRA_HI 0x3C ++ ++/* End Register Offsets */ ++ ++#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) ++ ++/* ++ * MMU_IRQ_RAWSTAT register values. Values are valid also for ++ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. ++ */ ++ ++#define MMU_PAGE_FAULT_FLAGS 16 ++ ++/* Macros returning a bitmask to retrieve page fault or bus error flags from ++ * MMU registers ++ */ ++#define MMU_PAGE_FAULT(n) (1UL << (n)) ++#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) ++ ++/* ++ * Begin AARCH64 MMU TRANSTAB register values ++ */ ++#define MMU_HW_OUTA_BITS 40 ++#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) ++ ++/* ++ * Begin MMU STATUS register values ++ */ ++#define AS_STATUS_AS_ACTIVE 0x01 ++ ++#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) ++#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) ++ ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ ++ (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) ++#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 ++ ++#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 ++#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) ++#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ ++ (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) ++ ++#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) ++#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) ++#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) ++#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) ++ ++#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 ++#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) ++#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ ++ (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) ++ ++#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0) ++#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \ ++ ((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) ++#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \ ++ (((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \ ++ PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) ++ ++/* ++ * Begin MMU TRANSCFG register values ++ */ ++#define AS_TRANSCFG_ADRMODE_LEGACY 0 ++#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 ++#define AS_TRANSCFG_ADRMODE_IDENTITY 2 ++#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 ++#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 ++ ++#define AS_TRANSCFG_ADRMODE_MASK 0xF ++ ++/* ++ * Begin TRANSCFG register values ++ */ ++#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) ++ ++#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) ++#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) ++#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) ++#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) ++ ++/* ++ * Begin Command Values ++ */ ++ ++/* AS_COMMAND register commands */ ++#define AS_COMMAND_NOP 0x00 /* NOP Operation */ ++#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ ++#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ ++#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ ++/* Flush all L2 caches then issue a flush region command to all MMUs ++ * (deprecated - only for use with T60x) ++ */ ++#define AS_COMMAND_FLUSH 0x04 ++/* Flush all L2 caches then issue a flush region command to all MMUs */ ++#define AS_COMMAND_FLUSH_PT 0x04 ++/* Wait for memory accesses to complete, flush all the L1s cache then flush all ++ * L2 caches then issue a flush region command to all MMUs ++ */ ++#define AS_COMMAND_FLUSH_MEM 0x05 ++ ++/* GPU_STATUS values */ ++#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ ++#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ ++#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ ++ ++/* PRFCNT_CONFIG register values */ ++#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ ++#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ ++#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ ++ ++/* The performance counters are disabled. */ ++#define PRFCNT_CONFIG_MODE_OFF 0 ++/* The performance counters are enabled, but are only written out when a ++ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. ++ */ ++#define PRFCNT_CONFIG_MODE_MANUAL 1 ++/* The performance counters are enabled, and are written out each time a tile ++ * finishes rendering. ++ */ ++#define PRFCNT_CONFIG_MODE_TILE 2 ++ ++/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ ++/* Use GPU implementation-defined caching policy. */ ++#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull ++/* The attribute set to force all resources to be cached. */ ++#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full ++/* Inner write-alloc cache setup, no outer caching */ ++#define AS_MEMATTR_WRITE_ALLOC 0x8Dull ++ ++/* Use GPU implementation-defined caching policy. */ ++#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull ++/* The attribute set to force all resources to be cached. */ ++#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full ++/* Inner write-alloc cache setup, no outer caching */ ++#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull ++/* Set to implementation defined, outer caching */ ++#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull ++/* Set to write back memory, outer caching */ ++#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull ++/* There is no LPAE support for non-cacheable, since the memory type is always ++ * write-back. ++ * Marking this setting as reserved for LPAE ++ */ ++#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED ++ ++/* L2_MMU_CONFIG register */ ++#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) ++#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) ++ ++/* End L2_MMU_CONFIG register */ ++ ++/* THREAD_* registers */ ++ ++/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ ++#define IMPLEMENTATION_UNSPECIFIED 0 ++#define IMPLEMENTATION_SILICON 1 ++#define IMPLEMENTATION_FPGA 2 ++#define IMPLEMENTATION_MODEL 3 ++ ++/* Default values when registers are not supported by the implemented hardware */ ++#define THREAD_MT_DEFAULT 256 ++#define THREAD_MWS_DEFAULT 256 ++#define THREAD_MBS_DEFAULT 256 ++#define THREAD_MR_DEFAULT 1024 ++#define THREAD_MTQ_DEFAULT 4 ++#define THREAD_MTGS_DEFAULT 10 ++ ++/* End THREAD_* registers */ ++ ++/* SHADER_CONFIG register */ ++#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) ++#define SC_TLS_HASH_ENABLE (1ul << 17) ++#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) ++#define SC_VAR_ALGORITHM (1ul << 29) ++/* End SHADER_CONFIG register */ ++ ++/* TILER_CONFIG register */ ++#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) ++/* End TILER_CONFIG register */ ++ ++/* L2_CONFIG register */ ++#define L2_CONFIG_SIZE_SHIFT 16 ++#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) ++#define L2_CONFIG_HASH_SHIFT 24 ++#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) ++#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 ++#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) ++/* End L2_CONFIG register */ ++ ++/* IDVS_GROUP register */ ++#define IDVS_GROUP_SIZE_SHIFT (16) ++#define IDVS_GROUP_MAX_SIZE (0x3F) ++ ++#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +similarity index 75% +rename from dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +index 879a436..749e1fa 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,11 +17,12 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ +-#ifndef _BASE_JM_KERNEL_H_ +-#define _BASE_JM_KERNEL_H_ ++ ++#ifndef _UAPI_BASE_JM_KERNEL_H_ ++#define _UAPI_BASE_JM_KERNEL_H_ ++ ++#include + + /* Memory allocation, access/hint flags. + * +@@ -80,7 +82,8 @@ + */ + #define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +-/* Should be cached on the CPU ++/* IN/OUT */ ++/* Should be cached on the CPU, returned if actually cached + */ + #define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +@@ -120,9 +123,9 @@ + #define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) + + /** +- * Memory starting from the end of the initial commit is aligned to 'extent' +- * pages, where 'extent' must be a power of 2 and no more than +- * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES ++ * Memory starting from the end of the initial commit is aligned to 'extension' ++ * pages, where 'extension' must be a power of 2 and no more than ++ * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES + */ + #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) + +@@ -155,18 +158,25 @@ + /* Use the GPU VA chosen by the kernel client */ + #define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) + ++/* OUT */ ++/* Kernel side cache sync ops required */ ++#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) ++ ++/* Force trimming of JIT allocations when creating a new allocation */ ++#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) ++ + /* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +-#define BASE_MEM_FLAGS_NR_BITS 28 ++#define BASE_MEM_FLAGS_NR_BITS 30 + + /* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ + #define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ +- BASE_MEM_FLAG_MAP_FIXED) ++ BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) + + /* A mask for all output bits, excluding IN/OUT bits. + */ +@@ -192,6 +202,28 @@ + #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + ++/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the ++ * initial commit is aligned to 'extension' pages, where 'extension' must be a power ++ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES ++ */ ++#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) ++ ++/** ++ * If set, the heap info address points to a __u32 holding the used size in bytes; ++ * otherwise it points to a __u64 holding the lowest address of unused memory. ++ */ ++#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) ++ ++/** ++ * Valid set of just-in-time memory allocation flags ++ * ++ * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr ++ * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set ++ * and heap_info_gpu_addr being 0 will be rejected). ++ */ ++#define BASE_JIT_ALLOC_VALID_FLAGS \ ++ (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) ++ + /** + * typedef base_context_create_flags - Flags to pass to ::base_context_init. + * +@@ -200,7 +232,7 @@ + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +-typedef u32 base_context_create_flags; ++typedef __u32 base_context_create_flags; + + /* No flags set */ + #define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) +@@ -290,7 +322,7 @@ typedef u32 base_context_create_flags; + * @blob: per-job data array + */ + struct base_jd_udata { +- u64 blob[2]; ++ __u64 blob[2]; + }; + + /** +@@ -303,7 +335,7 @@ struct base_jd_udata { + * When the flag is set for a particular dependency to signal that it is an + * ordering only dependency then errors will not be propagated. + */ +-typedef u8 base_jd_dep_type; ++typedef __u8 base_jd_dep_type; + + #define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ + #define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +@@ -319,7 +351,7 @@ typedef u8 base_jd_dep_type; + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +-typedef u32 base_jd_core_req; ++typedef __u32 base_jd_core_req; + + /* Requirements that come from the HW */ + +@@ -551,6 +583,13 @@ typedef u32 base_jd_core_req; + */ + #define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) + ++/* SW-only requirement: The atom needs to run on a limited core mask affinity. ++ * ++ * If this bit is set then the kbase_context.limited_core_mask will be applied ++ * to the affinity. ++ */ ++#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20) ++ + /* These requirement bits are currently unused in base_jd_core_req + */ + #define BASEP_JD_REQ_RESERVED \ +@@ -561,7 +600,7 @@ typedef u32 base_jd_core_req; + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ + BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ +- BASE_JD_REQ_END_RENDERPASS)) ++ BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK)) + + /* Mask of all bits in base_jd_core_req that control the type of the atom. + * +@@ -606,7 +645,7 @@ enum kbase_jd_atom_state { + /** + * typedef base_atom_id - Type big enough to store an atom number in. + */ +-typedef u8 base_atom_id; ++typedef __u8 base_atom_id; + + /** + * struct base_dependency - +@@ -669,10 +708,10 @@ struct base_dependency { + * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. + */ + struct base_jd_fragment { +- u64 norm_read_norm_write; +- u64 norm_read_forced_write; +- u64 forced_read_forced_write; +- u64 forced_read_norm_write; ++ __u64 norm_read_norm_write; ++ __u64 norm_read_forced_write; ++ __u64 forced_read_forced_write; ++ __u64 forced_read_norm_write; + }; + + /** +@@ -712,7 +751,7 @@ struct base_jd_fragment { + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. + */ +-typedef u8 base_jd_prio; ++typedef __u8 base_jd_prio; + + /* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ + #define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +@@ -722,11 +761,15 @@ typedef u8 base_jd_prio; + #define BASE_JD_PRIO_HIGH ((base_jd_prio)1) + /* Low atom priority. */ + #define BASE_JD_PRIO_LOW ((base_jd_prio)2) ++/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH, ++ * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW ++ */ ++#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) + + /* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting + */ +-#define BASE_JD_NR_PRIO_LEVELS 3 ++#define BASE_JD_NR_PRIO_LEVELS 4 + + /** + * struct base_jd_atom_v2 - Node of a dependency graph used to submit a +@@ -759,34 +802,82 @@ typedef u8 base_jd_prio; + * @padding: Unused. Must be zero. + * + * This structure has changed since UK 10.2 for which base_jd_core_req was a +- * u16 value. ++ * __u16 value. + * +- * In UK 10.3 a core_req field of a u32 type was added to the end of the +- * structure, and the place in the structure previously occupied by u16 ++ * In UK 10.3 a core_req field of a __u32 type was added to the end of the ++ * structure, and the place in the structure previously occupied by __u16 + * core_req was kept but renamed to compat_core_req. + * +- * From UK 11.20 - compat_core_req is now occupied by u8 jit_id[2]. ++ * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2]. + * Compatibility with UK 10.x from UK 11.y is not handled because + * the major version increase prevents this. + * + * For UK 11.20 jit_id[2] must be initialized to zero. + */ + struct base_jd_atom_v2 { +- u64 jc; ++ __u64 jc; + struct base_jd_udata udata; +- u64 extres_list; +- u16 nr_extres; +- u8 jit_id[2]; ++ __u64 extres_list; ++ __u16 nr_extres; ++ __u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; +- u8 device_nr; +- u8 jobslot; ++ __u8 device_nr; ++ __u8 jobslot; + base_jd_core_req core_req; +- u8 renderpass_id; +- u8 padding[7]; ++ __u8 renderpass_id; ++ __u8 padding[7]; + }; + ++/** ++ * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr ++ * at the beginning. ++ * ++ * @seq_nr: Sequence number of logical grouping of atoms. ++ * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS ++ * is set in the base_jd_core_req) the CPU address of a ++ * base_jd_fragment object. ++ * @udata: User data. ++ * @extres_list: List of external resources. ++ * @nr_extres: Number of external resources or JIT allocations. ++ * @jit_id: Zero-terminated array of IDs of just-in-time memory ++ * allocations written to by the atom. When the atom ++ * completes, the value stored at the ++ * &struct_base_jit_alloc_info.heap_info_gpu_addr of ++ * each allocation is read in order to enforce an ++ * overall physical memory usage limit. ++ * @pre_dep: Pre-dependencies. One need to use SETTER function to assign ++ * this field; this is done in order to reduce possibility of ++ * improper assignment of a dependency field. ++ * @atom_number: Unique number to identify the atom. ++ * @prio: Atom priority. Refer to base_jd_prio for more details. ++ * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ++ * specified. ++ * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. ++ * @core_req: Core requirements. ++ * @renderpass_id: Renderpass identifier used to associate an atom that has ++ * BASE_JD_REQ_START_RENDERPASS set in its core requirements ++ * with an atom that has BASE_JD_REQ_END_RENDERPASS set. ++ * @padding: Unused. Must be zero. ++ */ ++typedef struct base_jd_atom { ++ __u64 seq_nr; ++ __u64 jc; ++ struct base_jd_udata udata; ++ __u64 extres_list; ++ __u16 nr_extres; ++ __u8 jit_id[2]; ++ struct base_dependency pre_dep[2]; ++ base_atom_id atom_number; ++ base_jd_prio prio; ++ __u8 device_nr; ++ __u8 jobslot; ++ base_jd_core_req core_req; ++ __u8 renderpass_id; ++ __u8 padding[7]; ++} base_jd_atom; ++ + /* Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +@@ -834,6 +925,109 @@ enum { + * Such codes are never returned to + * user-space. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. ++ * @BASE_JD_EVENT_DONE: atom has completed successfull ++ * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which ++ * shall result in a failed atom ++ * @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the ++ * part of the memory system required to access ++ * job descriptors was not powered on ++ * @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job ++ * manager failed ++ * @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job ++ * manager failed ++ * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the ++ * specified affinity mask does not intersect ++ * any available cores ++ * @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job ++ * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program ++ * counter was executed. ++ * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal ++ * encoding was executed. ++ * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where ++ * the instruction encoding did not match the ++ * instruction type encoded in the program ++ * counter. ++ * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that ++ * contained invalid combinations of operands. ++ * @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried ++ * to access the thread local storage section ++ * of another thread. ++ * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that ++ * tried to do an unsupported unaligned memory ++ * access. ++ * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that ++ * failed to complete an instruction barrier. ++ * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job ++ * contains invalid combinations of data. ++ * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to ++ * process a tile that is entirely outside the ++ * bounding box of the frame. ++ * @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address ++ * has been found that exceeds the virtual ++ * address range. ++ * @BASE_JD_EVENT_OUT_OF_MEMORY: The tiler ran out of memory when executing a job. ++ * @BASE_JD_EVENT_UNKNOWN: If multiple jobs in a job chain fail, only ++ * the first one the reports an error will set ++ * and return full error information. ++ * Subsequent failing jobs will not update the ++ * error status registers, and may write an ++ * error status of UNKNOWN. ++ * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to ++ * physical memory where the original virtual ++ * address is no longer available. ++ * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache ++ * has detected that the same line has been ++ * accessed as both shareable and non-shareable ++ * memory from inside the GPU. ++ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table ++ * entry at level 1 of the translation table. ++ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table ++ * entry at level 2 of the translation table. ++ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table ++ * entry at level 3 of the translation table. ++ * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table ++ * entry at level 4 of the translation table. ++ * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to ++ * the permission flags set in translation ++ * table ++ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading ++ * level 0 of the translation tables. ++ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading ++ * level 1 of the translation tables. ++ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading ++ * level 2 of the translation tables. ++ * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading ++ * level 3 of the translation tables. ++ * @BASE_JD_EVENT_ACCESS_FLAG: Matches ACCESS_FLAG_0. A memory access hit a ++ * translation table entry with the ACCESS_FLAG ++ * bit set to zero in level 0 of the ++ * page table, and the DISABLE_AF_FAULT flag ++ * was not set. ++ * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to ++ * grow memory on demand ++ * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its ++ * dependencies failed ++ * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data ++ * in the atom which overlaps with ++ * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the ++ * platform doesn't support the feature specified in ++ * the atom. ++ * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used ++ * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used ++ * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used ++ * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used ++ * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used ++ * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate ++ * to userspace that the KBase context has been ++ * destroyed and Base should stop listening for ++ * further events ++ * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in ++ * the GPU has to be retried (but it has not ++ * started) due to e.g., GPU reset ++ * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal ++ * the completion of a renderpass. This value ++ * shouldn't be returned to userspace but I haven't ++ * seen where it is reset back to JD_EVENT_DONE. + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by +@@ -980,9 +1174,14 @@ struct base_jd_event_v2 { + * struct base_dump_cpu_gpu_counters - Structure for + * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS + * jobs. ++ * @system_time: gpu timestamp ++ * @cycle_counter: gpu cycle count ++ * @sec: cpu time(sec) ++ * @usec: cpu time(usec) ++ * @padding: padding + * + * This structure is stored into the memory pointed to by the @jc field +- * of &struct base_jd_atom_v2. ++ * of &struct base_jd_atom. + * + * It must not occupy the same CPU cache line(s) as any neighboring data. + * This is to avoid cases where access to pages containing the structure +@@ -991,11 +1190,11 @@ struct base_jd_event_v2 { + */ + + struct base_dump_cpu_gpu_counters { +- u64 system_time; +- u64 cycle_counter; +- u64 sec; +- u32 usec; +- u8 padding[36]; ++ __u64 system_time; ++ __u64 cycle_counter; ++ __u64 sec; ++ __u32 usec; ++ __u8 padding[36]; + }; + +-#endif /* _BASE_JM_KERNEL_H_ */ ++#endif /* _UAPI_BASE_JM_KERNEL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +similarity index 57% +rename from dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +index 408e98e..72d75cb 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_JM_IOCTL_H_ +-#define _KBASE_JM_IOCTL_H_ ++#ifndef _UAPI_KBASE_JM_IOCTL_H_ ++#define _UAPI_KBASE_JM_IOCTL_H_ + + #include + #include +@@ -94,16 +93,58 @@ + * - The above changes are checked for safe values in usual builds + * 11.21: + * - v2.0 of mali_trace debugfs file, which now versions the file separately ++ * 11.22: ++ * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2. ++ * KBASE_IOCTL_JOB_SUBMIT supports both in parallel. ++ * 11.23: ++ * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify ++ * the physical memory backing of JIT allocations. This was not supposed ++ * to be a valid use case, but it was allowed by the previous implementation. ++ * 11.24: ++ * - Added a sysfs file 'serialize_jobs' inside a new sub-directory ++ * 'scheduling'. ++ * 11.25: ++ * - Enabled JIT pressure limit in base/kbase by default ++ * 11.26 ++ * - Added kinstr_jm API ++ * 11.27 ++ * - Backwards compatible extension to HWC ioctl. ++ * 11.28: ++ * - Added kernel side cache ops needed hint ++ * 11.29: ++ * - Reserve ioctl 52 ++ * 11.30: ++ * - Add a new priority level BASE_JD_PRIO_REALTIME ++ * - Add ioctl 54: This controls the priority setting. ++ * 11.31: ++ * - Added BASE_JD_REQ_LIMITED_CORE_MASK. ++ * - Added ioctl 55: set_limited_core_count. + */ + #define BASE_UK_VERSION_MAJOR 11 +-#define BASE_UK_VERSION_MINOR 21 ++#define BASE_UK_VERSION_MINOR 31 ++ ++/** ++ * struct kbase_ioctl_version_check - Check version compatibility between ++ * kernel and userspace ++ * ++ * @major: Major version number ++ * @minor: Minor version number ++ */ ++struct kbase_ioctl_version_check { ++ __u16 major; ++ __u16 minor; ++}; ++ ++#define KBASE_IOCTL_VERSION_CHECK \ ++ _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) ++ + + /** + * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel + * +- * @addr: Memory address of an array of struct base_jd_atom_v2 ++ * @addr: Memory address of an array of struct base_jd_atom_v2 or v3 + * @nr_atoms: Number of entries in the array +- * @stride: sizeof(struct base_jd_atom_v2) ++ * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom) + */ + struct kbase_ioctl_job_submit { + __u64 addr; +@@ -132,5 +173,51 @@ struct kbase_ioctl_soft_event_update { + #define KBASE_IOCTL_SOFT_EVENT_UPDATE \ + _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + ++/** ++ * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for ++ * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the ++ * kernel ++ * ++ * @size: The size of the `struct kbase_kinstr_jm_atom_state_change` ++ * @version: Represents a breaking change in the ++ * `struct kbase_kinstr_jm_atom_state_change` ++ * @padding: Explicit padding to get the structure up to 64bits. See ++ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst ++ * ++ * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the ++ * end of the structure that older user space might not understand. If the ++ * `version` is the same, the structure is still compatible with newer kernels. ++ * The `size` can be used to cast the opaque memory returned from the kernel. ++ */ ++struct kbase_kinstr_jm_fd_out { ++ __u16 size; ++ __u8 version; ++ __u8 padding[5]; ++}; ++ ++/** ++ * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor ++ * ++ * @count: Number of atom states that can be stored in the kernel circular ++ * buffer. Must be a power of two ++ * @padding: Explicit padding to get the structure up to 64bits. See ++ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst ++ */ ++struct kbase_kinstr_jm_fd_in { ++ __u16 count; ++ __u8 padding[6]; ++}; ++ ++union kbase_kinstr_jm_fd { ++ struct kbase_kinstr_jm_fd_in in; ++ struct kbase_kinstr_jm_fd_out out; ++}; ++ ++#define KBASE_IOCTL_KINSTR_JM_FD \ ++ _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) ++ ++ ++#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ ++ _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) + +-#endif /* _KBASE_JM_IOCTL_H_ */ ++#endif /* _UAPI_KBASE_JM_IOCTL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h +similarity index 72% +rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h +index 1e2744d..a46c41f 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,48 +17,52 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /* + * Base structures shared with the kernel. + */ + +-#ifndef _BASE_KERNEL_H_ +-#define _BASE_KERNEL_H_ ++#ifndef _UAPI_BASE_KERNEL_H_ ++#define _UAPI_BASE_KERNEL_H_ ++ ++#include + + struct base_mem_handle { + struct { +- u64 handle; ++ __u64 handle; + } basep; + }; + + #include "mali_base_mem_priv.h" +-#include "gpu/mali_kbase_gpu_coherency.h" + #include "gpu/mali_kbase_gpu_id.h" ++#include "gpu/mali_kbase_gpu_coherency.h" + + #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + + #define BASE_MAX_COHERENT_GROUPS 16 + +-#if defined CDBG_ASSERT ++#if defined(CDBG_ASSERT) + #define LOCAL_ASSERT CDBG_ASSERT +-#elif defined KBASE_DEBUG_ASSERT ++#elif defined(KBASE_DEBUG_ASSERT) + #define LOCAL_ASSERT KBASE_DEBUG_ASSERT + #else ++#if defined(__KERNEL__) + #error assert macro not defined! ++#else ++#define LOCAL_ASSERT(...) ((void)#__VA_ARGS__) ++#endif + #endif + + #if defined(PAGE_MASK) && defined(PAGE_SHIFT) + #define LOCAL_PAGE_SHIFT PAGE_SHIFT + #define LOCAL_PAGE_LSB ~PAGE_MASK + #else +-#include ++#ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2 ++#define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12 ++#endif + +-#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 ++#if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2) + #define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 + #define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) + #else +@@ -85,7 +90,7 @@ struct base_mem_handle { + * More flags can be added to this list, as long as they don't clash + * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). + */ +-typedef u32 base_mem_alloc_flags; ++typedef __u32 base_mem_alloc_flags; + + /* A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. +@@ -121,7 +126,7 @@ typedef u32 base_mem_alloc_flags; + */ + enum base_mem_import_type { + BASE_MEM_IMPORT_TYPE_INVALID = 0, +- /** ++ /* + * Import type with value 1 is deprecated. + */ + BASE_MEM_IMPORT_TYPE_UMM = 2, +@@ -138,8 +143,8 @@ enum base_mem_import_type { + */ + + struct base_mem_import_user_buffer { +- u64 ptr; +- u64 length; ++ __u64 ptr; ++ __u64 length; + }; + + /* Mask to detect 4GB boundary alignment */ +@@ -147,15 +152,15 @@ struct base_mem_import_user_buffer { + /* Mask to detect 4GB boundary (in page units) alignment */ + #define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) + +-/* Limit on the 'extent' parameter for an allocation with the ++/* Limit on the 'extension' parameter for an allocation with the + * BASE_MEM_TILER_ALIGN_TOP flag set + * + * This is the same as the maximum limit for a Buffer Descriptor's chunk size + */ +-#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \ +- (21u - (LOCAL_PAGE_SHIFT)) +-#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \ +- (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2)) ++#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ ++ (21u - (LOCAL_PAGE_SHIFT)) ++#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ ++ (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) + + /* Bit mask of cookies used for for memory allocation setup */ + #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ +@@ -163,7 +168,7 @@ struct base_mem_import_user_buffer { + /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ + #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ + +-/** ++/* + * struct base_fence - Cross-device synchronisation fence. + * + * A fence is used to signal when the GPU has finished accessing a resource that +@@ -200,8 +205,8 @@ struct base_fence { + */ + struct base_mem_aliasing_info { + struct base_mem_handle handle; +- u64 offset; +- u64 length; ++ __u64 offset; ++ __u64 length; + }; + + /* Maximum percentage of just-in-time memory allocation trimming to perform +@@ -213,28 +218,6 @@ struct base_mem_aliasing_info { + */ + #define BASE_JIT_ALLOC_COUNT (255) + +-/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the +- * initial commit is aligned to 'extent' pages, where 'extent' must be a power +- * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES +- */ +-#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) +- +-/** +- * If set, the heap info address points to a u32 holding the used size in bytes; +- * otherwise it points to a u64 holding the lowest address of unused memory. +- */ +-#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) +- +-/** +- * Valid set of just-in-time memory allocation flags +- * +- * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr +- * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set +- * and heap_info_gpu_addr being 0 will be rejected). +- */ +-#define BASE_JIT_ALLOC_VALID_FLAGS \ +- (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) +- + /* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 + * + * jit_version is 1 +@@ -245,11 +228,11 @@ struct base_mem_aliasing_info { + * An array of structures was not supported + */ + struct base_jit_alloc_info_10_2 { +- u64 gpu_alloc_addr; +- u64 va_pages; +- u64 commit_pages; +- u64 extent; +- u8 id; ++ __u64 gpu_alloc_addr; ++ __u64 va_pages; ++ __u64 commit_pages; ++ __u64 extension; ++ __u8 id; + }; + + /* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up +@@ -272,16 +255,16 @@ struct base_jit_alloc_info_10_2 { + * 11.10: Arrays of this structure are supported + */ + struct base_jit_alloc_info_11_5 { +- u64 gpu_alloc_addr; +- u64 va_pages; +- u64 commit_pages; +- u64 extent; +- u8 id; +- u8 bin_id; +- u8 max_allocations; +- u8 flags; +- u8 padding[2]; +- u16 usage_id; ++ __u64 gpu_alloc_addr; ++ __u64 va_pages; ++ __u64 commit_pages; ++ __u64 extension; ++ __u8 id; ++ __u8 bin_id; ++ __u8 max_allocations; ++ __u8 flags; ++ __u8 padding[2]; ++ __u16 usage_id; + }; + + /** +@@ -292,7 +275,7 @@ struct base_jit_alloc_info_11_5 { + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. +- * @extent: Granularity of physical pages to grow the ++ * @extension: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. +@@ -327,17 +310,17 @@ struct base_jit_alloc_info_11_5 { + * 11.20: added @heap_info_gpu_addr + */ + struct base_jit_alloc_info { +- u64 gpu_alloc_addr; +- u64 va_pages; +- u64 commit_pages; +- u64 extent; +- u8 id; +- u8 bin_id; +- u8 max_allocations; +- u8 flags; +- u8 padding[2]; +- u16 usage_id; +- u64 heap_info_gpu_addr; ++ __u64 gpu_alloc_addr; ++ __u64 va_pages; ++ __u64 commit_pages; ++ __u64 extension; ++ __u8 id; ++ __u8 bin_id; ++ __u8 max_allocations; ++ __u8 flags; ++ __u8 padding[2]; ++ __u16 usage_id; ++ __u64 heap_info_gpu_addr; + }; + + enum base_external_resource_access { +@@ -346,7 +329,7 @@ enum base_external_resource_access { + }; + + struct base_external_resource { +- u64 ext_resource; ++ __u64 ext_resource; + }; + + +@@ -364,13 +347,13 @@ struct base_external_resource { + * sized at allocation time. + */ + struct base_external_resource_list { +- u64 count; ++ __u64 count; + struct base_external_resource ext_res[1]; + }; + + struct base_jd_debug_copy_buffer { +- u64 address; +- u64 size; ++ __u64 address; ++ __u64 size; + struct base_external_resource extres; + }; + +@@ -482,7 +465,7 @@ struct base_jd_debug_copy_buffer { + * population count, since faulty cores may be disabled during production, + * producing a non-contiguous mask. + * +- * The memory requirements for this algorithm can be determined either by a u64 ++ * The memory requirements for this algorithm can be determined either by a __u64 + * population count on the L2_PRESENT mask (a LUT helper already is + * required for the above), or simple assumption that there can be no more than + * 16 coherent groups, since core groups are typically 4 cores. +@@ -491,165 +474,170 @@ struct base_jd_debug_copy_buffer { + #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + + #define BASE_MAX_COHERENT_GROUPS 16 +- ++/** ++ * struct mali_base_gpu_core_props - GPU core props info ++ * @product_id: Pro specific value. ++ * @version_status: Status of the GPU release. No defined values, but starts at ++ * 0 and increases by one for each release status (alpha, beta, EAC, etc.). ++ * 4 bit values (0-15). ++ * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" ++ * release number. ++ * 8 bit values (0-255). ++ * @major_revision: Major release number of the GPU. "R" part of an "RnPn" ++ * release number. ++ * 4 bit values (0-15). ++ * @padding: padding to allign to 8-byte ++ * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by ++ * clGetDeviceInfo() ++ * @log2_program_counter_size: Size of the shader program counter, in bits. ++ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This ++ * is a bitpattern where a set bit indicates that the format is supported. ++ * Before using a texture format, it is recommended that the corresponding ++ * bit be checked. ++ * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. ++ * It is unlikely that a client will be able to allocate all of this memory ++ * for their own purposes, but this at least provides an upper bound on the ++ * memory available to the GPU. ++ * This is required for OpenCL's clGetDeviceInfo() call when ++ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The ++ * client will not be expecting to allocate anywhere near this value. ++ * @num_exec_engines: The number of execution engines. ++ */ + struct mali_base_gpu_core_props { +- /** +- * Product specific value. +- */ +- u32 product_id; +- +- /** +- * Status of the GPU release. +- * No defined values, but starts at 0 and increases by one for each +- * release status (alpha, beta, EAC, etc.). +- * 4 bit values (0-15). +- */ +- u16 version_status; +- +- /** +- * Minor release number of the GPU. "P" part of an "RnPn" release number. +- * 8 bit values (0-255). +- */ +- u16 minor_revision; +- +- /** +- * Major release number of the GPU. "R" part of an "RnPn" release number. +- * 4 bit values (0-15). +- */ +- u16 major_revision; +- +- u16 padding; +- +- /* The maximum GPU frequency. Reported to applications by +- * clGetDeviceInfo() +- */ +- u32 gpu_freq_khz_max; +- +- /** +- * Size of the shader program counter, in bits. +- */ +- u32 log2_program_counter_size; +- +- /** +- * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a +- * bitpattern where a set bit indicates that the format is supported. +- * +- * Before using a texture format, it is recommended that the corresponding +- * bit be checked. +- */ +- u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; +- +- /** +- * Theoretical maximum memory available to the GPU. It is unlikely that a +- * client will be able to allocate all of this memory for their own +- * purposes, but this at least provides an upper bound on the memory +- * available to the GPU. +- * +- * This is required for OpenCL's clGetDeviceInfo() call when +- * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The +- * client will not be expecting to allocate anywhere near this value. +- */ +- u64 gpu_available_memory_size; +- +- /** +- * The number of execution engines. +- */ +- u8 num_exec_engines; ++ __u32 product_id; ++ __u16 version_status; ++ __u16 minor_revision; ++ __u16 major_revision; ++ __u16 padding; ++ __u32 gpu_freq_khz_max; ++ __u32 log2_program_counter_size; ++ __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; ++ __u64 gpu_available_memory_size; ++ __u8 num_exec_engines; + }; + +-/** +- * ++/* + * More information is possible - but associativity and bus width are not + * required by upper-level apis. + */ + struct mali_base_gpu_l2_cache_props { +- u8 log2_line_size; +- u8 log2_cache_size; +- u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ +- u8 padding[5]; ++ __u8 log2_line_size; ++ __u8 log2_cache_size; ++ __u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ ++ __u8 padding[5]; + }; + + struct mali_base_gpu_tiler_props { +- u32 bin_size_bytes; /* Max is 4*2^15 */ +- u32 max_active_levels; /* Max is 2^15 */ ++ __u32 bin_size_bytes; /* Max is 4*2^15 */ ++ __u32 max_active_levels; /* Max is 2^15 */ + }; + + /** +- * GPU threading system details. ++ * struct mali_base_gpu_thread_props - GPU threading system details. ++ * @max_threads: Max. number of threads per core ++ * @max_workgroup_size: Max. number of threads per workgroup ++ * @max_barrier_size: Max. number of threads that can synchronize on a ++ * simple barrier ++ * @max_registers: Total size [1..65535] of the register file available ++ * per core. ++ * @max_task_queue: Max. tasks [1..255] which may be sent to a core ++ * before it becomes blocked. ++ * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split ++ * field. ++ * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, ++ * 3 = SW Model/Emulation ++ * @padding: padding to allign to 8-byte ++ * @tls_alloc: Number of threads per core that TLS must be ++ * allocated for + */ + struct mali_base_gpu_thread_props { +- u32 max_threads; /* Max. number of threads per core */ +- u32 max_workgroup_size; /* Max. number of threads per workgroup */ +- u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ +- u16 max_registers; /* Total size [1..65535] of the register file available per core. */ +- u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ +- u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ +- u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ +- u8 padding[3]; +- u32 tls_alloc; /* Number of threads per core that TLS must +- * be allocated for +- */ ++ __u32 max_threads; ++ __u32 max_workgroup_size; ++ __u32 max_barrier_size; ++ __u16 max_registers; ++ __u8 max_task_queue; ++ __u8 max_thread_group_split; ++ __u8 impl_tech; ++ __u8 padding[3]; ++ __u32 tls_alloc; + }; + + /** + * struct mali_base_gpu_coherent_group - descriptor for a coherent group ++ * @core_mask: Core restriction mask required for the group ++ * @num_cores: Number of cores in the group ++ * @padding: padding to allign to 8-byte + * + * \c core_mask exposes all cores in that coherent group, and \c num_cores +- * provides a cached population-count for that mask. ++ * provides a cached population-count for that mask. + * + * @note Whilst all cores are exposed in the mask, not all may be available to +- * the application, depending on the Kernel Power policy. ++ * the application, depending on the Kernel Power policy. + * +- * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. ++ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of ++ * wastage. + */ + struct mali_base_gpu_coherent_group { +- u64 core_mask; /**< Core restriction mask required for the group */ +- u16 num_cores; /**< Number of cores in the group */ +- u16 padding[3]; ++ __u64 core_mask; ++ __u16 num_cores; ++ __u16 padding[3]; + }; + + /** + * struct mali_base_gpu_coherent_group_info - Coherency group information ++ * @num_groups: Number of coherent groups in the GPU. ++ * @num_core_groups: Number of core groups (coherent or not) in the GPU. ++ * Equivalent to the number of L2 Caches. ++ * The GPU Counter dumping writes 2048 bytes per core group, regardless ++ * of whether the core groups are coherent or not. Hence this member is ++ * needed to calculate how much memory is required for dumping. ++ * @note Do not use it to work out how many valid elements are in the ++ * group[] member. Use num_groups instead. ++ * @coherency: Coherency features of the memory, accessed by gpu_mem_features ++ * methods ++ * @padding: padding to allign to 8-byte ++ * @group: Descriptors of coherent groups + * + * Note that the sizes of the members could be reduced. However, the \c group +- * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte ++ * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte + * aligned, thus leading to wastage if the other members sizes were reduced. + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ + struct mali_base_gpu_coherent_group_info { +- u32 num_groups; +- +- /** +- * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. +- * +- * The GPU Counter dumping writes 2048 bytes per core group, regardless of +- * whether the core groups are coherent or not. Hence this member is needed +- * to calculate how much memory is required for dumping. +- * +- * @note Do not use it to work out how many valid elements are in the +- * group[] member. Use num_groups instead. +- */ +- u32 num_core_groups; +- +- /** +- * Coherency features of the memory, accessed by gpu_mem_features +- * methods +- */ +- u32 coherency; +- +- u32 padding; +- +- /** +- * Descriptors of coherent groups +- */ ++ __u32 num_groups; ++ __u32 num_core_groups; ++ __u32 coherency; ++ __u32 padding; + struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; + }; + + /** + * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware + * Configuration Discovery registers. ++ * @shader_present: Shader core present bitmap ++ * @tiler_present: Tiler core present bitmap ++ * @l2_present: Level 2 cache present bitmap ++ * @stack_present: Core stack present bitmap ++ * @l2_features: L2 features ++ * @core_features: Core features ++ * @mem_features: Mem features ++ * @mmu_features: Mmu features ++ * @as_present: Bitmap of address spaces present ++ * @js_present: Job slots present ++ * @js_features: Array of job slot features. ++ * @tiler_features: Tiler features ++ * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU ++ * @gpu_id: GPU and revision identifier ++ * @thread_max_threads: Maximum number of threads per core ++ * @thread_max_workgroup_size: Maximum number of threads per workgroup ++ * @thread_max_barrier_size: Maximum number of threads per barrier ++ * @thread_features: Thread features ++ * @coherency_mode: Note: This is the _selected_ coherency mode rather than the ++ * available modes as exposed in the coherency_features register ++ * @thread_tls_alloc: Number of threads per core that TLS must be allocated for ++ * @gpu_features: GPU features + * + * The information is presented inefficiently for access. For frequent access, + * the values should be better expressed in an unpacked form in the +@@ -664,62 +652,69 @@ struct mali_base_gpu_coherent_group_info { + * + */ + struct gpu_raw_gpu_props { +- u64 shader_present; +- u64 tiler_present; +- u64 l2_present; +- u64 stack_present; ++ __u64 shader_present; ++ __u64 tiler_present; ++ __u64 l2_present; ++ __u64 stack_present; ++ __u32 l2_features; ++ __u32 core_features; ++ __u32 mem_features; ++ __u32 mmu_features; + +- u32 l2_features; +- u32 core_features; +- u32 mem_features; +- u32 mmu_features; ++ __u32 as_present; + +- u32 as_present; ++ __u32 js_present; ++ __u32 js_features[GPU_MAX_JOB_SLOTS]; ++ __u32 tiler_features; ++ __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + +- u32 js_present; +- u32 js_features[GPU_MAX_JOB_SLOTS]; +- u32 tiler_features; +- u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; ++ __u32 gpu_id; + +- u32 gpu_id; +- +- u32 thread_max_threads; +- u32 thread_max_workgroup_size; +- u32 thread_max_barrier_size; +- u32 thread_features; ++ __u32 thread_max_threads; ++ __u32 thread_max_workgroup_size; ++ __u32 thread_max_barrier_size; ++ __u32 thread_features; + + /* + * Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register. + */ +- u32 coherency_mode; ++ __u32 coherency_mode; + +- u32 thread_tls_alloc; ++ __u32 thread_tls_alloc; ++ __u64 gpu_features; + }; + + /** + * struct base_gpu_props - Return structure for base_get_gpu_props(). ++ * @core_props: Core props. ++ * @l2_props: L2 props. ++ * @unused_1: Keep for backwards compatibility. ++ * @tiler_props: Tiler props. ++ * @thread_props: Thread props. ++ * @raw_props: This member is large, likely to be 128 bytes. ++ * @coherency_info: This must be last member of the structure. + * + * NOTE: the raw_props member in this data structure contains the register + * values from which the value of the other members are derived. The derived + * members exist to allow for efficient access and/or shielding the details + * of the layout of the registers. +- * +- * @unused_1: Keep for backwards compatibility. +- * @raw_props: This member is large, likely to be 128 bytes. +- * @coherency_info: This must be last member of the structure. +- */ ++ * */ + struct base_gpu_props { + struct mali_base_gpu_core_props core_props; + struct mali_base_gpu_l2_cache_props l2_props; +- u64 unused_1; ++ __u64 unused_1; + struct mali_base_gpu_tiler_props tiler_props; + struct mali_base_gpu_thread_props thread_props; + struct gpu_raw_gpu_props raw_props; + struct mali_base_gpu_coherent_group_info coherency_info; + }; + ++#if MALI_USE_CSF ++#include "csf/mali_base_csf_kernel.h" ++#else + #include "jm/mali_base_jm_kernel.h" ++#endif + + /** + * base_mem_group_id_get() - Get group ID from flags +@@ -730,7 +725,7 @@ struct base_gpu_props { + * + * Return: group ID(0~15) extracted from the parameter + */ +-static inline int base_mem_group_id_get(base_mem_alloc_flags flags) ++static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags) + { + LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); + return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> +@@ -749,10 +744,12 @@ static inline int base_mem_group_id_get(base_mem_alloc_flags flags) + * The return value can be combined with other flags against base_mem_alloc + * to identify a specific memory group. + */ +-static inline base_mem_alloc_flags base_mem_group_id_set(int id) ++static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id) + { +- LOCAL_ASSERT(id >= 0); +- LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT); ++ if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { ++ /* Set to default value when id is out of range. */ ++ id = BASE_MEM_GROUP_DEFAULT; ++ } + + return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & + BASE_MEM_GROUP_ID_MASK; +@@ -768,7 +765,7 @@ static inline base_mem_alloc_flags base_mem_group_id_set(int id) + * + * Return: Bitmask of flags to pass to base_context_init. + */ +-static inline base_context_create_flags base_context_mmu_group_id_set( ++static __inline__ base_context_create_flags base_context_mmu_group_id_set( + int const group_id) + { + LOCAL_ASSERT(group_id >= 0); +@@ -788,7 +785,7 @@ static inline base_context_create_flags base_context_mmu_group_id_set( + * + * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). + */ +-static inline int base_context_mmu_group_id_get( ++static __inline__ int base_context_mmu_group_id_get( + base_context_create_flags const flags) + { + LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); +@@ -820,4 +817,10 @@ static inline int base_context_mmu_group_id_get( + BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ + BASE_TIMEINFO_USER_SOURCE_FLAG) + +-#endif /* _BASE_KERNEL_H_ */ ++/* Maximum number of source allocations allowed to create an alias allocation. ++ * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array ++ * layers, since each cube map in the array will have 6 faces. ++ */ ++#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576) ++ ++#endif /* _UAPI_BASE_KERNEL_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h +similarity index 80% +rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h +index 844a025..304a334 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010-2015, 2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,14 +17,14 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + ++#ifndef _UAPI_BASE_MEM_PRIV_H_ ++#define _UAPI_BASE_MEM_PRIV_H_ + ++#include + +-#ifndef _BASE_MEM_PRIV_H_ +-#define _BASE_MEM_PRIV_H_ ++#include "mali_base_kernel.h" + + #define BASE_SYNCSET_OP_MSYNC (1U << 0) + #define BASE_SYNCSET_OP_CSYNC (1U << 1) +@@ -48,10 +49,10 @@ + */ + struct basep_syncset { + struct base_mem_handle mem_handle; +- u64 user_addr; +- u64 size; +- u8 type; +- u8 padding[7]; ++ __u64 user_addr; ++ __u64 size; ++ __u8 type; ++ __u8 padding[7]; + }; + +-#endif ++#endif /* _UAPI_BASE_MEM_PRIV_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +similarity index 52% +rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +index 10706b8..9baaec1 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,38 +17,59 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_HWCNT_READER_H_ +-#define _KBASE_HWCNT_READER_H_ ++#ifndef _UAPI_KBASE_HWCNT_READER_H_ ++#define _UAPI_KBASE_HWCNT_READER_H_ ++ ++#include ++#include + + /* The ids of ioctl commands. */ + #define KBASE_HWCNT_READER 0xBE +-#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, u32) +-#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32) +-#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, u32) +-#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, u32) +-#define KBASE_HWCNT_READER_GET_BUFFER _IOR(KBASE_HWCNT_READER, 0x20,\ ++#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32) ++#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32) ++#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32) ++#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32) ++#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ ++ offsetof(struct kbase_hwcnt_reader_metadata, cycles)) ++#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ + struct kbase_hwcnt_reader_metadata) +-#define KBASE_HWCNT_READER_PUT_BUFFER _IOW(KBASE_HWCNT_READER, 0x21,\ ++#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ ++ offsetof(struct kbase_hwcnt_reader_metadata, cycles)) ++#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ + struct kbase_hwcnt_reader_metadata) +-#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, u32) +-#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, u32) +-#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, u32) +-#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32) ++#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32) ++#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32) ++#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32) ++#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32) ++#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ ++ _IOW(KBASE_HWCNT_READER, 0xFF, \ ++ struct kbase_hwcnt_reader_api_version) ++ ++/** ++ * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles ++ * @top: the number of cycles associated with the main clock for the ++ * GPU ++ * @shader_cores: the cycles that have elapsed on the GPU shader cores ++ */ ++struct kbase_hwcnt_reader_metadata_cycles { ++ __u64 top; ++ __u64 shader_cores; ++}; + + /** + * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata + * @timestamp: time when sample was collected + * @event_id: id of an event that triggered sample collection + * @buffer_idx: position in sampling area where sample buffer was stored ++ * @cycles: the GPU cycles that occurred since the last sample + */ + struct kbase_hwcnt_reader_metadata { +- u64 timestamp; +- u32 event_id; +- u32 buffer_idx; ++ __u64 timestamp; ++ __u32 event_id; ++ __u32 buffer_idx; ++ struct kbase_hwcnt_reader_metadata_cycles cycles; + }; + + /** +@@ -63,9 +85,21 @@ enum base_hwcnt_reader_event { + BASE_HWCNT_READER_EVENT_PERIODIC, + BASE_HWCNT_READER_EVENT_PREJOB, + BASE_HWCNT_READER_EVENT_POSTJOB, +- + BASE_HWCNT_READER_EVENT_COUNT + }; + +-#endif /* _KBASE_HWCNT_READER_H_ */ ++#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) ++#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) ++#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) ++/** ++ * struct kbase_hwcnt_reader_api_version - hwcnt reader API version ++ * @version: API version ++ * @features: available features in this API version ++ */ ++struct kbase_hwcnt_reader_api_version { ++ __u32 version; ++ __u32 features; ++}; ++ ++#endif /* _UAPI_KBASE_HWCNT_READER_H_ */ + +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +similarity index 83% +rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +index 977b194..29ff32a 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,12 +17,10 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +-#ifndef _KBASE_IOCTL_H_ +-#define _KBASE_IOCTL_H_ ++#ifndef _UAPI_KBASE_IOCTL_H_ ++#define _UAPI_KBASE_IOCTL_H_ + + #ifdef __cpluscplus + extern "C" { +@@ -30,24 +29,14 @@ extern "C" { + #include + #include + ++#if MALI_USE_CSF ++#include "csf/mali_kbase_csf_ioctl.h" ++#else + #include "jm/mali_kbase_jm_ioctl.h" ++#endif /* MALI_USE_CSF */ + + #define KBASE_IOCTL_TYPE 0x80 + +-/** +- * struct kbase_ioctl_version_check - Check version compatibility with kernel +- * +- * @major: Major version number +- * @minor: Minor version number +- */ +-struct kbase_ioctl_version_check { +- __u16 major; +- __u16 minor; +-}; +- +-#define KBASE_IOCTL_VERSION_CHECK \ +- _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) +- + /** + * struct kbase_ioctl_set_flags - Set kernel context creation flags + * +@@ -75,16 +64,16 @@ struct kbase_ioctl_set_flags { + * @flags may be used in the future to request a different format for the + * buffer. With @flags == 0 the following format is used. + * +- * The buffer will be filled with pairs of values, a u32 key identifying the ++ * The buffer will be filled with pairs of values, a __u32 key identifying the + * property followed by the value. The size of the value is identified using + * the bottom bits of the key. The value then immediately followed the key and + * is tightly packed (there is no padding). All keys and values are + * little-endian. + * +- * 00 = u8 +- * 01 = u16 +- * 10 = u32 +- * 11 = u64 ++ * 00 = __u8 ++ * 01 = __u16 ++ * 10 = __u32 ++ * 11 = __u64 + */ + struct kbase_ioctl_get_gpuprops { + __u64 buffer; +@@ -97,22 +86,20 @@ struct kbase_ioctl_get_gpuprops { + + /** + * union kbase_ioctl_mem_alloc - Allocate memory on the GPU +- * +- * @va_pages: The number of pages of virtual address space to reserve +- * @commit_pages: The number of physical pages to allocate +- * @extent: The number of extra pages to allocate on each GPU fault which grows +- * the region +- * @flags: Flags +- * @gpu_va: The GPU virtual address which is allocated +- * + * @in: Input parameters ++ * @in.va_pages: The number of pages of virtual address space to reserve ++ * @in.commit_pages: The number of physical pages to allocate ++ * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region ++ * @in.flags: Flags + * @out: Output parameters ++ * @out.flags: Flags ++ * @out.gpu_va: The GPU virtual address which is allocated + */ + union kbase_ioctl_mem_alloc { + struct { + __u64 va_pages; + __u64 commit_pages; +- __u64 extent; ++ __u64 extension; + __u64 flags; + } in; + struct { +@@ -126,14 +113,13 @@ union kbase_ioctl_mem_alloc { + + /** + * struct kbase_ioctl_mem_query - Query properties of a GPU memory region +- * @gpu_addr: A GPU address contained within the region +- * @query: The type of query +- * @value: The result of the query +- * +- * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. +- * + * @in: Input parameters ++ * @in.gpu_addr: A GPU address contained within the region ++ * @in.query: The type of query + * @out: Output parameters ++ * @out.value: The result of the query ++ * ++ * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. + */ + union kbase_ioctl_mem_query { + struct { +@@ -148,9 +134,9 @@ union kbase_ioctl_mem_query { + #define KBASE_IOCTL_MEM_QUERY \ + _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) + +-#define KBASE_MEM_QUERY_COMMIT_SIZE ((u64)1) +-#define KBASE_MEM_QUERY_VA_SIZE ((u64)2) +-#define KBASE_MEM_QUERY_FLAGS ((u64)3) ++#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) ++#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) ++#define KBASE_MEM_QUERY_FLAGS ((__u64)3) + + /** + * struct kbase_ioctl_mem_free - Free a memory region +@@ -166,7 +152,7 @@ struct kbase_ioctl_mem_free { + /** + * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * @buffer_count: requested number of dumping buffers +- * @jm_bm: counters selection bitmask (JM) ++ * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) +@@ -175,7 +161,7 @@ struct kbase_ioctl_mem_free { + */ + struct kbase_ioctl_hwcnt_reader_setup { + __u32 buffer_count; +- __u32 jm_bm; ++ __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +@@ -187,14 +173,14 @@ struct kbase_ioctl_hwcnt_reader_setup { + /** + * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection + * @dump_buffer: GPU address to write counters to +- * @jm_bm: counters selection bitmask (JM) ++ * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + */ + struct kbase_ioctl_hwcnt_enable { + __u64 dump_buffer; +- __u32 jm_bm; ++ __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +@@ -353,13 +339,12 @@ struct kbase_ioctl_mem_sync { + /** + * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer + * +- * @gpu_addr: The GPU address of the memory region +- * @cpu_addr: The CPU address to locate +- * @size: A size in bytes to validate is contained within the region +- * @offset: The offset from the start of the memory region to @cpu_addr +- * + * @in: Input parameters ++ * @in.gpu_addr: The GPU address of the memory region ++ * @in.cpu_addr: The CPU address to locate ++ * @in.size: A size in bytes to validate is contained within the region + * @out: Output parameters ++ * @out.offset: The offset from the start of the memory region to @cpu_addr + */ + union kbase_ioctl_mem_find_cpu_offset { + struct { +@@ -424,15 +409,15 @@ struct kbase_ioctl_mem_commit { + + /** + * union kbase_ioctl_mem_alias - Create an alias of memory regions +- * @flags: Flags, see BASE_MEM_xxx +- * @stride: Bytes between start of each memory region +- * @nents: The number of regions to pack together into the alias +- * @aliasing_info: Pointer to an array of struct base_mem_aliasing_info +- * @gpu_va: Address of the new alias +- * @va_pages: Size of the new alias +- * + * @in: Input parameters ++ * @in.flags: Flags, see BASE_MEM_xxx ++ * @in.stride: Bytes between start of each memory region ++ * @in.nents: The number of regions to pack together into the alias ++ * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info + * @out: Output parameters ++ * @out.flags: Flags, see BASE_MEM_xxx ++ * @out.gpu_va: Address of the new alias ++ * @out.va_pages: Size of the new alias + */ + union kbase_ioctl_mem_alias { + struct { +@@ -453,15 +438,15 @@ union kbase_ioctl_mem_alias { + + /** + * union kbase_ioctl_mem_import - Import memory for use by the GPU +- * @flags: Flags, see BASE_MEM_xxx +- * @phandle: Handle to the external memory +- * @type: Type of external memory, see base_mem_import_type +- * @padding: Amount of extra VA pages to append to the imported buffer +- * @gpu_va: Address of the new alias +- * @va_pages: Size of the new alias +- * + * @in: Input parameters ++ * @in.flags: Flags, see BASE_MEM_xxx ++ * @in.phandle: Handle to the external memory ++ * @in.type: Type of external memory, see base_mem_import_type ++ * @in.padding: Amount of extra VA pages to append to the imported buffer + * @out: Output parameters ++ * @out.flags: Flags, see BASE_MEM_xxx ++ * @out.gpu_va: Address of the new alias ++ * @out.va_pages: Size of the new alias + */ + union kbase_ioctl_mem_import { + struct { +@@ -544,7 +529,7 @@ struct kbase_ioctl_mem_profile_add { + /** + * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource + * @count: Number of resources +- * @address: Array of u64 GPU addresses of the external resources to map ++ * @address: Array of __u64 GPU addresses of the external resources to map + */ + struct kbase_ioctl_sticky_resource_map { + __u64 count; +@@ -558,7 +543,7 @@ struct kbase_ioctl_sticky_resource_map { + * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was + * previously permanently mapped + * @count: Number of resources +- * @address: Array of u64 GPU addresses of the external resources to unmap ++ * @address: Array of __u64 GPU addresses of the external resources to unmap + */ + struct kbase_ioctl_sticky_resource_unmap { + __u64 count; +@@ -574,15 +559,13 @@ struct kbase_ioctl_sticky_resource_unmap { + * the given gpu address and + * the offset of that address + * into the region +- * +- * @gpu_addr: GPU virtual address +- * @size: Size in bytes within the region +- * @start: Address of the beginning of the memory region enclosing @gpu_addr +- * for the length of @offset bytes +- * @offset: The offset from the start of the memory region to @gpu_addr +- * + * @in: Input parameters ++ * @in.gpu_addr: GPU virtual address ++ * @in.size: Size in bytes within the region + * @out: Output parameters ++ * @out.start: Address of the beginning of the memory region enclosing @gpu_addr ++ * for the length of @offset bytes ++ * @out.offset: The offset from the start of the memory region to @gpu_addr + */ + union kbase_ioctl_mem_find_gpu_start_and_offset { + struct { +@@ -598,7 +581,6 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { + #define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) + +- + #define KBASE_IOCTL_CINSTR_GWT_START \ + _IO(KBASE_IOCTL_TYPE, 33) + +@@ -607,14 +589,15 @@ union kbase_ioctl_mem_find_gpu_start_and_offset { + + /** + * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. +- * @addr_buffer: Address of buffer to hold addresses of gpu modified areas. +- * @size_buffer: Address of buffer to hold size of modified areas (in pages) +- * @len: Number of addresses the buffers can hold. +- * @more_data_available: Status indicating if more addresses are available. +- * @no_of_addr_collected: Number of addresses collected into addr_buffer. +- * + * @in: Input parameters ++ * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas. ++ * @in.size_buffer: Address of buffer to hold size of modified areas (in pages) ++ * @in.len: Number of addresses the buffers can hold. ++ * @in.padding: padding + * @out: Output parameters ++ * @out.no_of_addr_collected: Number of addresses collected into addr_buffer. ++ * @out.more_data_available: Status indicating if more addresses are available. ++ * @out.padding: padding + * + * This structure is used when performing a call to dump GPU write fault + * addresses. +@@ -652,18 +635,15 @@ struct kbase_ioctl_mem_exec_init { + /** + * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of + * cpu/gpu time (counter values) +- * +- * @request_flags: Bit-flags indicating the requested types. +- * @paddings: Unused, size alignment matching the out. +- * @sec: Integer field of the monotonic time, unit in seconds. +- * @nsec: Fractional sec of the monotonic time, in nano-seconds. +- * @padding: Unused, for u64 alignment +- * @timestamp: System wide timestamp (counter) value. +- * @cycle_counter: GPU cycle counter value. +- * + * @in: Input parameters ++ * @in.request_flags: Bit-flags indicating the requested types. ++ * @in.paddings: Unused, size alignment matching the out. + * @out: Output parameters +- * ++ * @out.sec: Integer field of the monotonic time, unit in seconds. ++ * @out.nsec: Fractional sec of the monotonic time, in nano-seconds. ++ * @out.padding: Unused, for __u64 alignment ++ * @out.timestamp: System wide timestamp (counter) value. ++ * @out.cycle_counter: GPU cycle counter value. + */ + union kbase_ioctl_get_cpu_gpu_timeinfo { + struct { +@@ -682,6 +662,31 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { + #define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ + _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) + ++/** ++ * struct kbase_ioctl_context_priority_check - Check the max possible priority ++ * @priority: Input priority & output priority ++ */ ++ ++struct kbase_ioctl_context_priority_check { ++ __u8 priority; ++}; ++ ++#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ ++ _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) ++ ++/** ++ * struct kbase_ioctl_set_limited_core_count - Set the limited core count. ++ * ++ * @max_core_count: Maximum core count ++ */ ++struct kbase_ioctl_set_limited_core_count { ++ __u8 max_core_count; ++}; ++ ++#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ ++ _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) ++ ++ + /*************** + * test ioctls * + ***************/ +@@ -692,23 +697,6 @@ union kbase_ioctl_get_cpu_gpu_timeinfo { + + #define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) + +-/** +- * struct kbase_ioctl_tlstream_test - Start a timeline stream test +- * +- * @tpw_count: number of trace point writers in each context +- * @msg_delay: time delay between tracepoints from one writer in milliseconds +- * @msg_count: number of trace points written by one writer +- * @aux_msg: if non-zero aux messages will be included +- */ +-struct kbase_ioctl_tlstream_test { +- __u32 tpw_count; +- __u32 msg_delay; +- __u32 msg_count; +- __u32 aux_msg; +-}; +- +-#define KBASE_IOCTL_TLSTREAM_TEST \ +- _IOW(KBASE_IOCTL_TEST_TYPE, 1, struct kbase_ioctl_tlstream_test) + + /** + * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes +@@ -836,13 +824,13 @@ struct kbase_ioctl_tlstream_stats { + #define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 + #define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 + +-#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 ++#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 + + #define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 + #define KBASE_GPUPROP_TLS_ALLOC 84 +- ++#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 + #ifdef __cpluscplus + } + #endif + +-#endif ++#endif /* _UAPI_KBASE_IOCTL_H_ */ +diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h +new file mode 100644 +index 0000000..72e1b9d +--- /dev/null ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h +@@ -0,0 +1,69 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * ++ * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. ++ * ++ * This program is free software and is provided to you under the terms of the ++ * GNU General Public License version 2 as published by the Free Software ++ * Foundation, and any use by you of this program is subject to the terms ++ * of such GNU license. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, you can access it online at ++ * http://www.gnu.org/licenses/gpl-2.0.html. ++ * ++ */ ++ ++/* ++ * mali_kbase_kinstr_jm_reader.h ++ * Provides an ioctl API to read kernel atom state changes. The flow of the ++ * API is: ++ * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD`` ++ * 2. Determine the buffer structure layout via the above ioctl's returned ++ * size and version fields in ``struct kbase_kinstr_jm_fd_out`` ++ * 4. Poll the file descriptor for ``POLLIN`` ++ * 5. Get data with read() on the fd ++ * 6. Use the structure version to understand how to read the data from the ++ * buffer ++ * 7. Repeat 4-6 ++ * 8. Close the file descriptor ++ */ ++ ++#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_ ++#define _UAPI_KBASE_KINSTR_JM_READER_H_ ++ ++/** ++ * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom ++ * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has ++ * entered a hardware queue ++ * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started ++ * on an atom ++ * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped ++ * on an atom ++ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has ++ * completed on an atom ++ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations ++ * ++ * We can add new states to the end of this if they do not break the existing ++ * state machine. Old user mode code can gracefully ignore states they do not ++ * understand. ++ * ++ * If we need to make a breaking change to the state machine, we can do that by ++ * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will ++ * mean that old user mode code will fail to understand the new state field in ++ * the structure and gracefully not use the state change API. ++ */ ++enum kbase_kinstr_jm_reader_atom_state { ++ KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE, ++ KBASE_KINSTR_JM_READER_ATOM_STATE_START, ++ KBASE_KINSTR_JM_READER_ATOM_STATE_STOP, ++ KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE, ++ KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT ++}; ++ ++#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */ +diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_uk.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h +similarity index 69% +rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_uk.h +rename to dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h +index 701f390..fcb6cb8 100644 +--- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_uk.h ++++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h +@@ -1,11 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* + * +- * (C) COPYRIGHT 2010, 2012-2015, 2018 ARM Limited. All rights reserved. ++ * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms +- * of such GNU licence. ++ * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of +@@ -16,32 +17,22 @@ + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * +- * SPDX-License-Identifier: GPL-2.0 +- * + */ + +- +- + /** +- * @file mali_uk.h + * Types and definitions that are common across OSs for both the user + * and kernel side of the User-Kernel interface. + */ + +-#ifndef _UK_H_ +-#define _UK_H_ ++#ifndef _UAPI_UK_H_ ++#define _UAPI_UK_H_ + + #ifdef __cplusplus + extern "C" { +-#endif /* __cplusplus */ +- +-/** +- * @addtogroup base_api +- * @{ +- */ ++#endif /* __cplusplus */ + + /** +- * @defgroup uk_api User-Kernel Interface API ++ * DOC: uk_api User-Kernel Interface API + * + * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. +@@ -51,12 +42,16 @@ extern "C" { + * + * This API is internal to the Midgard DDK and is not exposed to any applications. + * +- * @{ + */ + + /** +- * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The +- * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this ++ * enum uk_client_id - These are identifiers for kernel-side drivers ++ * implementing a UK interface, aka UKK clients. ++ * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client. ++ * @UK_CLIENT_COUNT: The number of uk clients supported. This must be ++ * the last member of the enum ++ * ++ * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this + * identifier to select a UKK client to the uku_open() function. + * + * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id +@@ -65,20 +60,11 @@ extern "C" { + * + */ + enum uk_client_id { +- /** +- * Value used to identify the Base driver UK client. +- */ + UK_CLIENT_MALI_T600_BASE, +- +- /** The number of uk clients supported. This must be the last member of the enum */ + UK_CLIENT_COUNT + }; + +-/** @} end group uk_api */ +- +-/** @} *//* end group base_api */ +- + #ifdef __cplusplus + } +-#endif /* __cplusplus */ +-#endif /* _UK_H_ */ ++#endif /* __cplusplus */ ++#endif /* _UAPI_UK_H_ */ +-- +2.29.0 + diff --git a/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt new file mode 100644 index 0000000..dd8f733 --- /dev/null +++ b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt @@ -0,0 +1,226 @@ +# +# (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +* ARM Mali Midgard / Bifrost devices + + +Required properties: + +- compatible : Should be mali, replacing digits with x from the back, +until malitxx, and it must end with one of: "arm,malit6xx" or +"arm,mali-midgard" or "arm,mali-bifrost" +- reg : Physical base address of the device and length of the register area. +- interrupts : Contains the three IRQ lines required by T-6xx devices +- interrupt-names : Contains the names of IRQ resources in the order they were +provided in the interrupts property. Must contain: "JOB, "MMU", "GPU". + +Optional: + +- clocks : One or more pairs of phandle to clock and clock specifier + for the Mali device. The order is important: the first clock + shall correspond to the "clk_mali" source, while the second clock + (that is optional) shall correspond to the "shadercores" source. +- clock-names : Shall be set to: "clk_mali", "shadercores". +- mali-supply : Phandle to the top level regulator for the Mali device. + Refer to +Documentation/devicetree/bindings/regulator/regulator.txt for details. +- shadercores-supply : Phandle to shader cores regulator for the Mali device. + This is optional. +- operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt +for details. +- quirks_jm : Used to write to the JM_CONFIG register or equivalent. + Should be used with care. Options passed here are used to override + certain default behavior. Note: This will override 'idvs-group-size' + field in devicetree and module param 'corestack_driver_control', + therefore if 'quirks_jm' is used then 'idvs-group-size' and + 'corestack_driver_control' value should be incorporated into 'quirks_jm'. +- quirks_sc : Used to write to the SHADER_CONFIG register. + Should be used with care. Options passed here are used to override + certain default behavior. +- quirks_tiler : Used to write to the TILER_CONFIG register. + Should be used with care. Options passed here are used to + disable or override certain default behavior. +- quirks_mmu : Used to write to the L2_CONFIG register. + Should be used with care. Options passed here are used to + disable or override certain default behavior. +- power_model : Sets the power model parameters. Defined power models include: + "mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model", + "mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model", + "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model" + and "mali-tbex-power-model". + - mali-simple-power-model: this model derives the GPU power usage based + on the GPU voltage scaled by the system temperature. Note: it was + designed for the Juno platform, and may not be suitable for others. + - compatible: Should be "arm,mali-simple-power-model" + - dynamic-coefficient: Coefficient, in pW/(Hz V^2), which is + multiplied by v^2*f to calculate the dynamic power consumption. + - static-coefficient: Coefficient, in uW/V^3, which is + multiplied by v^3 to calculate the static power consumption. + - ts: An array containing coefficients for the temperature + scaling factor. This is used to scale the static power by a + factor of tsf/1000000, + where tsf = ts[3]*T^3 + ts[2]*T^2 + ts[1]*T + ts[0], + and T = temperature in degrees. + - thermal-zone: A string identifying the thermal zone used for + the GPU + - temp-poll-interval-ms: the interval at which the system + temperature is polled + - mali-g*-power-model(s): unless being stated otherwise, these models derive + the GPU power usage based on performance counters, so they are more + accurate. + - compatible: Should be, as examples, "arm,mali-g51-power-model" / + "arm,mali-g72-power-model". + - scale: the dynamic power calculated by the power model is + multiplied by a factor of 'scale'. This value should be + chosen to match a particular implementation. + - min_sample_cycles: Fall back to the simple power model if the + number of GPU cycles for a given counter dump is less than + 'min_sample_cycles'. The default value of this should suffice. + * Note: when IPA is used, two separate power models (simple and counter-based) + are used at different points so care should be taken to configure + both power models in the device tree (specifically dynamic-coefficient, + static-coefficient and scale) to best match the platform. +- system-coherency : Sets the coherency protocol to be used for coherent + accesses made from the GPU. + If not set then no coherency is used. + - 0 : ACE-Lite + - 1 : ACE + - 31 : No coherency +- ipa-model : Sets the IPA model to be used for power management. GPU probe will fail if the + model is not found in the registered models list. If no model is specified here, + a gpu-id based model is picked if available, otherwise the default model is used. + - mali-simple-power-model: Default model used on mali +- idvs-group-size : Override the IDVS group size value. Tasks are sent to + cores in groups of N + 1, so i.e. 0xF means 16 tasks. + Valid values are between 0 to 0x3F (including). +- l2-size : Override L2 cache size on GPU that supports it +- l2-hash : Override L2 hash function on GPU that supports it +- arbiter_if : Phandle to the arbif platform device, used to provide KBASE with an interface + to the Arbiter. This is required when using arbitration; setting to a non-NULL + value will enable arbitration. + If arbitration is in use, then there should be no external GPU control. + When arbiter_if is in use then the following must not be: + - power_model (no IPA allowed with arbitration) + - #cooling-cells + - operating-points-v2 (no dvfs in kbase with arbitration) + - system-coherency with a value of 1 (no full coherency with arbitration) + + + +Example for a Mali GPU with 1 clock and no regulators: + +gpu@0xfc010000 { + compatible = "arm,malit602", "arm,malit60x", "arm,malit6xx", "arm,mali-midgard"; + reg = <0xfc010000 0x4000>; + interrupts = <0 36 4>, <0 37 4>, <0 38 4>; + interrupt-names = "JOB", "MMU", "GPU"; + + clocks = <&pclk_mali>; + clock-names = "clk_mali"; + mali-supply = <&vdd_mali>; + operating-points-v2 = <&gpu_opp_table>; + power_model@0 { + compatible = "arm,mali-simple-power-model"; + static-coefficient = <2427750>; + dynamic-coefficient = <4687>; + ts = <20000 2000 (-20) 2>; + thermal-zone = "gpu"; + }; + power_model@1 { + compatible = "arm,mali-g71-power-model"; + scale = <5>; + }; + + idvs-group-size = <0x7>; + l2-size = /bits/ 8 <0x10>; + l2-hash = /bits/ 8 <0x04>; +}; + +gpu_opp_table: opp_table0 { + compatible = "operating-points-v2"; + + opp@533000000 { + opp-hz = /bits/ 64 <533000000>; + opp-microvolt = <1250000>; + }; + opp@450000000 { + opp-hz = /bits/ 64 <450000000>; + opp-microvolt = <1150000>; + }; + opp@400000000 { + opp-hz = /bits/ 64 <400000000>; + opp-microvolt = <1125000>; + }; + opp@350000000 { + opp-hz = /bits/ 64 <350000000>; + opp-microvolt = <1075000>; + }; + opp@266000000 { + opp-hz = /bits/ 64 <266000000>; + opp-microvolt = <1025000>; + }; + opp@160000000 { + opp-hz = /bits/ 64 <160000000>; + opp-microvolt = <925000>; + }; + opp@100000000 { + opp-hz = /bits/ 64 <100000000>; + opp-microvolt = <912500>; + }; +}; + +Example for a Mali GPU with 2 clocks and 2 regulators: + +gpu: gpu@6e000000 { + compatible = "arm,mali-midgard"; + reg = <0x0 0x6e000000 0x0 0x200000>; + interrupts = <0 168 4>, <0 168 4>, <0 168 4>; + interrupt-names = "JOB", "MMU", "GPU"; + clocks = <&clk_mali 0>, <&clk_mali 1>; + clock-names = "clk_mali", "shadercores"; + mali-supply = <&supply0_3v3>; + shadercores-supply = <&supply1_3v3>; + system-coherency = <31>; + operating-points-v2 = <&gpu_opp_table>; +}; + +gpu_opp_table: opp_table0 { + compatible = "operating-points-v2", "operating-points-v2-mali"; + + opp@0 { + opp-hz = /bits/ 64 <50000000>; + opp-hz-real = /bits/ 64 <50000000>, /bits/ 64 <45000000>; + opp-microvolt = <820000>, <800000>; + opp-core-mask = /bits/ 64 <0xf>; + }; + opp@1 { + opp-hz = /bits/ 64 <40000000>; + opp-hz-real = /bits/ 64 <40000000>, /bits/ 64 <35000000>; + opp-microvolt = <720000>, <700000>; + opp-core-mask = /bits/ 64 <0x7>; + }; + opp@2 { + opp-hz = /bits/ 64 <30000000>; + opp-hz-real = /bits/ 64 <30000000>, /bits/ 64 <25000000>; + opp-microvolt = <620000>, <700000>; + opp-core-mask = /bits/ 64 <0x3>; + }; +}; diff --git a/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt new file mode 100644 index 0000000..fda8f00 --- /dev/null +++ b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt @@ -0,0 +1,49 @@ +# +# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +* Arm memory group manager for Mali GPU device drivers + +Required properties: + +- compatible: Must be "arm,physical-memory-group-manager" + +An example node: + + gpu_physical_memory_group_manager: physical-memory-group-manager { + compatible = "arm,physical-memory-group-manager"; + }; + +It must be referenced by the GPU as well, see physical-memory-group-manager: + + gpu: gpu@0x6e000000 { + compatible = "arm,mali-midgard"; + reg = <0x0 0x6e000000 0x0 0x200000>; + interrupts = <0 168 4>, <0 168 4>, <0 168 4>; + interrupt-names = "JOB", "MMU", "GPU"; + clocks = <&scpi_dvfs 2>; + clock-names = "clk_mali"; + system-coherency = <31>; + physical-memory-group-manager = <&gpu_physical_memory_group_manager>; + operating-points = < + /* KHz uV */ + 50000 820000 + >; + }; diff --git a/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt new file mode 100644 index 0000000..f054348 --- /dev/null +++ b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt @@ -0,0 +1,69 @@ +# +# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +* Arm protected memory allocator for Mali GPU device drivers + +Required properties: + +- compatible: Must be "arm,protected-memory-allocator" + +The protected memory allocator manages allocation of physical pages of a +reserved memory region of protected memory, therefore its device node shall +reference a reserved memory region. + +In addition to that, the protected memory allocator shall be referenced +by the GPU. + +A complete example configuration for the device tree: + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + mali_protected: mali_protected@c0000000 { + compatible = "mali-reserved"; + reg = <0x0 0xc0000000 0x0 0x1000000>; + }; + }; + + gpu_protected_memory_allocator: protected-memory-allocator { + compatible = "arm,protected-memory-allocator"; + memory-region = <&mali_protected>; + }; + + gpu_fpga: gpu@0x6e000000 { + compatible = "arm,mali-midgard"; + reg = <0x0 0x6e000000 0x0 0x200000>; + interrupts = <0 168 4>, <0 168 4>, <0 168 4>; + interrupt-names = "JOB", "MMU", "GPU"; + clocks = <&scpi_dvfs 2>; + clock-names = "clk_mali"; + protected-memory-allocator = <&gpu_protected_memory_allocator>; + operating-points = < + /* KHz uV */ + 50000 820000 + >; + }; + +The protected memory allocator is gpu_protected_memory_allocator. +It references the mali_protected reserved memory region and, in turn, +it is referenced by the GPU as protected-memory-allocator. diff --git a/bifrost/r25p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt new file mode 100644 index 0000000..49ed773 --- /dev/null +++ b/bifrost/r25p0/kernel/Documentation/devicetree/bindings/power/mali-opp.txt @@ -0,0 +1,202 @@ +# +# (C) COPYRIGHT 2017, 2019 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +* ARM Mali Midgard OPP + +* OPP Table Node + +This describes the OPPs belonging to a device. This node can have following +properties: + +Required properties: +- compatible: Allow OPPs to express their compatibility. It should be: + "operating-points-v2", "operating-points-v2-mali". + +- OPP nodes: One or more OPP nodes describing voltage-current-frequency + combinations. Their name isn't significant but their phandle can be used to + reference an OPP. + +* OPP Node + +This defines voltage-current-frequency combinations along with other related +properties. + +Required properties: +- opp-hz: Nominal frequency in Hz, expressed as a 64-bit big-endian integer. + This should be treated as a relative performance measurement, taking both GPU + frequency and core mask into account. + +Optional properties: +- opp-hz-real: List of one or two real frequencies in Hz, expressed as 64-bit + big-endian integers. They shall correspond to the clocks declared under + the Mali device node, and follow the same order. + +- opp-core-mask: Shader core mask. If neither this or opp-core-count are present + then all shader cores will be used for this OPP. + +- opp-core-count: Number of cores to use for this OPP. If this is present then + the driver will build a core mask using the available core mask provided by + the GPU hardware. + + If neither this nor opp-core-mask are present then all shader cores will be + used for this OPP. + + If both this and opp-core-mask are present then opp-core-mask is ignored. + +- opp-microvolt: List of one or two voltages in micro Volts. They shall correspond + to the regulators declared under the Mali device node, and follow the order: + "toplevel", "shadercores". + + A single regulator's voltage is specified with an array of size one or three. + Single entry is for target voltage and three entries are for + voltages. + + Entries for multiple regulators must be present in the same order as + regulators are specified in device's DT node. + +- opp-microvolt-: Named opp-microvolt property. This is exactly similar to + the above opp-microvolt property, but allows multiple voltage ranges to be + provided for the same OPP. At runtime, the platform can pick a and + matching opp-microvolt- property will be enabled for all OPPs. If the + platform doesn't pick a specific or the doesn't match with any + opp-microvolt- properties, then opp-microvolt property shall be used, if + present. + +- opp-microamp: The maximum current drawn by the device in microamperes + considering system specific parameters (such as transients, process, aging, + maximum operating temperature range etc.) as necessary. This may be used to + set the most efficient regulator operating mode. + + Should only be set if opp-microvolt is set for the OPP. + + Entries for multiple regulators must be present in the same order as + regulators are specified in device's DT node. If this property isn't required + for few regulators, then this should be marked as zero for them. If it isn't + required for any regulator, then this property need not be present. + +- opp-microamp-: Named opp-microamp property. Similar to + opp-microvolt- property, but for microamp instead. + +- clock-latency-ns: Specifies the maximum possible transition latency (in + nanoseconds) for switching to this OPP from any other OPP. + +- turbo-mode: Marks the OPP to be used only for turbo modes. Turbo mode is + available on some platforms, where the device can run over its operating + frequency for a short duration of time limited by the device's power, current + and thermal limits. + +- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in + the table should have this. + +- opp-mali-errata-1485982: Marks the OPP to be selected for suspend clock. + This will be effective only if MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is + enabled. It needs to be placed in any OPP that has proper suspend clock for + the HW workaround. + +- opp-supported-hw: This enables us to select only a subset of OPPs from the + larger OPP table, based on what version of the hardware we are running on. We + still can't have multiple nodes with the same opp-hz value in OPP table. + + It's an user defined array containing a hierarchy of hardware version numbers, + supported by the OPP. For example: a platform with hierarchy of three levels + of versions (A, B and C), this field should be like , where X + corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z + corresponds to version hierarchy C. + + Each level of hierarchy is represented by a 32 bit value, and so there can be + only 32 different supported version per hierarchy. i.e. 1 bit per version. A + value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy + level. And a value of 0x00000000 will disable the OPP completely, and so we + never want that to happen. + + If 32 values aren't sufficient for a version hierarchy, than that version + hierarchy can be contained in multiple 32 bit values. i.e. in the + above example, Z1 & Z2 refer to the version hierarchy Z. + +- status: Marks the node enabled/disabled. + +Example for a Juno with 1 clock and 1 regulator: + +gpu_opp_table: opp_table0 { + compatible = "operating-points-v2", "operating-points-v2-mali"; + + opp@112500000 { + opp-hz = /bits/ 64 <112500000>; + opp-hz-real = /bits/ 64 <450000000>; + opp-microvolt = <820000>; + opp-core-mask = /bits/ 64 <0x1>; + opp-suspend; + opp-mali-errata-1485982; + }; + opp@225000000 { + opp-hz = /bits/ 64 <225000000>; + opp-hz-real = /bits/ 64 <450000000>; + opp-microvolt = <820000>; + opp-core-count = <2>; + }; + opp@450000000 { + opp-hz = /bits/ 64 <450000000>; + opp-hz-real = /bits/ 64 <450000000>; + opp-microvolt = <820000>; + opp-core-mask = /bits/ 64 <0xf>; + }; + opp@487500000 { + opp-hz = /bits/ 64 <487500000>; + opp-microvolt = <825000>; + }; + opp@525000000 { + opp-hz = /bits/ 64 <525000000>; + opp-microvolt = <850000>; + }; + opp@562500000 { + opp-hz = /bits/ 64 <562500000>; + opp-microvolt = <875000>; + }; + opp@600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <900000>; + }; +}; + +Example for a Juno with 2 clocks and 2 regulators: + +gpu_opp_table: opp_table0 { + compatible = "operating-points-v2", "operating-points-v2-mali"; + + opp@0 { + opp-hz = /bits/ 64 <50000000>; + opp-hz-real = /bits/ 64 <50000000>, /bits/ 64 <45000000>; + opp-microvolt = <820000>, <800000>; + opp-core-mask = /bits/ 64 <0xf>; + }; + opp@1 { + opp-hz = /bits/ 64 <40000000>; + opp-hz-real = /bits/ 64 <40000000>, /bits/ 64 <35000000>; + opp-microvolt = <720000>, <700000>; + opp-core-mask = /bits/ 64 <0x7>; + }; + opp@2 { + opp-hz = /bits/ 64 <30000000>; + opp-hz-real = /bits/ 64 <30000000>, /bits/ 64 <25000000>; + opp-microvolt = <620000>, <700000>; + opp-core-mask = /bits/ 64 <0x3>; + }; +}; diff --git a/bifrost/r25p0/kernel/Documentation/dma-buf-test-exporter.txt b/bifrost/r25p0/kernel/Documentation/dma-buf-test-exporter.txt new file mode 100644 index 0000000..8d8cbc9 --- /dev/null +++ b/bifrost/r25p0/kernel/Documentation/dma-buf-test-exporter.txt @@ -0,0 +1,46 @@ +# +# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +===================== +dma-buf-test-exporter +===================== + +Overview +-------- + +The dma-buf-test-exporter is a simple exporter of dma_buf objects. +It has a private API to allocate and manipulate the buffers which are represented as dma_buf fds. +The private API allows: +* simple allocation of physically non-contiguous buffers +* simple allocation of physically contiguous buffers +* query kernel side API usage stats (number of attachments, number of mappings, mmaps) +* failure mode configuration (fail attach, mapping, mmap) +* kernel side memset of buffers + +The buffers support all of the dma_buf API, including mmap. + +It supports being compiled as a module both in-tree and out-of-tree. + +See include/linux/dma-buf-test-exporter.h for the ioctl interface. +See Documentation/dma-buf-sharing.txt for details on dma_buf. + + diff --git a/bifrost/r25p0/kernel/Mconfig b/bifrost/r25p0/kernel/Mconfig new file mode 100644 index 0000000..e451591 --- /dev/null +++ b/bifrost/r25p0/kernel/Mconfig @@ -0,0 +1,27 @@ +# +# +# (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + +source "kernel/drivers/gpu/arm/midgard/Mconfig" +source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig" + +config DMA_BUF_SYNC_IOCTL_SUPPORTED + bool "Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" + depends on BACKEND_KERNEL + default y + +config BUILD_CSF_ONLY_MODULE + bool "Build CSF GPU specific kernel modules" + depends on BUILD_KERNEL_MODULES && GPU_HAS_CSF + default y diff --git a/bifrost/r25p0/kernel/build.bp b/bifrost/r25p0/kernel/build.bp new file mode 100644 index 0000000..2bc725f --- /dev/null +++ b/bifrost/r25p0/kernel/build.bp @@ -0,0 +1,102 @@ +/* + * + * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +bob_install_group { + name: "IG_kernel_modules", + android: { + install_path: "{{.module_path}}", + }, + linux: { + install_path: "{{.install_dir}}/{{.module_path}}", + }, +} + +bob_defaults { + name: "kernel_defaults", + enabled: false, + exclude_srcs: [ + "**/*.mod.c", + ], + local_include_dirs: [ + "include", + ], + build_kernel_modules: { + enabled: true, + kernel_dir: "{{.kernel_dir}}", + kernel_cross_compile: "{{.kernel_compiler}}", + kernel_cc: "{{.kernel_cc}}", + kernel_hostcc: "{{.kernel_hostcc}}", + kernel_clang_triple: "{{.kernel_clang_triple}}", + kernel_ld: "{{.kernel_ld}}", + }, + install_group: "IG_kernel_modules", + cflags: [ + "-Wall", + ], + werror: { + cflags: ["-Werror"], + }, + allow_android_tags: { + tags: [ + "eng", + "optional", + ], + }, + kbuild_options: [ + // Start of CS experimental features definitions. + // If there is nothing below, definition should be added as follows: + // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" + // experimental_feature above comes from Mconfig in + // /product/base/ + // However, in Mconfig, experimental_feature should be looked up (for + // similar explanation to this one) as ALLCAPS, i.e. + // EXPERIMENTAL_FEATURE. + // + // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it + // is an umbrella feature that would be open for inappropriate use + // (catch-all for experimental CS code without separating it into + // different features). + "MALI_JIT_PRESSURE_LIMIT={{.jit_pressure_limit}}", + "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}", + ], +} + +bob_defaults { + name: "kutf_includes", + local_include_dirs: [ + "drivers/gpu/arm/midgard/tests/include", + ], +} + +bob_defaults { + name: "kernel_test_includes", + defaults: ["kutf_includes"], + local_include_dirs: [ + "drivers/gpu/arm", + "drivers/gpu/arm/midgard", + "drivers/gpu/arm/midgard/backend/gpu", + "drivers/gpu/arm/midgard/debug", + "drivers/gpu/arm/midgard/debug/backend", + ], +} + +bob_alias { + name: "kernel", + srcs: [ + "dma-buf-test-exporter", + "memory_group_manager", + "mali_kbase", + ], +} diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/Kbuild b/bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/Kbuild similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_lock/src/Kbuild rename to bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/Kbuild diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/Makefile b/bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/Makefile similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_lock/src/Makefile rename to bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/Makefile diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c b/bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c rename to bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.c diff --git a/dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h b/bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h rename to bifrost/r25p0/kernel/drivers/base/dma_buf_lock/src/dma_buf_lock.h diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kbuild b/bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/Kbuild rename to bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/Kbuild diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Kconfig b/bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/Kconfig rename to bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/Kconfig diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/Makefile b/bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/Makefile similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/Makefile rename to bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/Makefile diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/build.bp b/bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/build.bp similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/build.bp rename to bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/build.bp diff --git a/dvalin/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c b/bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c similarity index 100% rename from dvalin/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c rename to bifrost/r25p0/kernel/drivers/base/dma_buf_test_exporter/dma-buf-test-exporter.c diff --git a/dvalin/kernel/drivers/base/memory_group_manager/Kbuild b/bifrost/r25p0/kernel/drivers/base/memory_group_manager/Kbuild similarity index 100% rename from dvalin/kernel/drivers/base/memory_group_manager/Kbuild rename to bifrost/r25p0/kernel/drivers/base/memory_group_manager/Kbuild diff --git a/dvalin/kernel/drivers/base/memory_group_manager/Kconfig b/bifrost/r25p0/kernel/drivers/base/memory_group_manager/Kconfig similarity index 100% rename from dvalin/kernel/drivers/base/memory_group_manager/Kconfig rename to bifrost/r25p0/kernel/drivers/base/memory_group_manager/Kconfig diff --git a/dvalin/kernel/drivers/base/memory_group_manager/Makefile b/bifrost/r25p0/kernel/drivers/base/memory_group_manager/Makefile similarity index 100% rename from dvalin/kernel/drivers/base/memory_group_manager/Makefile rename to bifrost/r25p0/kernel/drivers/base/memory_group_manager/Makefile diff --git a/dvalin/kernel/drivers/base/memory_group_manager/build.bp b/bifrost/r25p0/kernel/drivers/base/memory_group_manager/build.bp similarity index 100% rename from dvalin/kernel/drivers/base/memory_group_manager/build.bp rename to bifrost/r25p0/kernel/drivers/base/memory_group_manager/build.bp diff --git a/dvalin/kernel/drivers/base/memory_group_manager/memory_group_manager.c b/bifrost/r25p0/kernel/drivers/base/memory_group_manager/memory_group_manager.c similarity index 100% rename from dvalin/kernel/drivers/base/memory_group_manager/memory_group_manager.c rename to bifrost/r25p0/kernel/drivers/base/memory_group_manager/memory_group_manager.c diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/Kbuild b/bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/Kbuild similarity index 100% rename from dvalin/kernel/drivers/base/protected_memory_allocator/Kbuild rename to bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/Kbuild diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/Kconfig b/bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/Kconfig similarity index 100% rename from dvalin/kernel/drivers/base/protected_memory_allocator/Kconfig rename to bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/Kconfig diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/Makefile b/bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/Makefile similarity index 100% rename from dvalin/kernel/drivers/base/protected_memory_allocator/Makefile rename to bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/Makefile diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/build.bp b/bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/build.bp similarity index 100% rename from dvalin/kernel/drivers/base/protected_memory_allocator/build.bp rename to bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/build.bp diff --git a/dvalin/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c b/bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c similarity index 100% rename from dvalin/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c rename to bifrost/r25p0/kernel/drivers/base/protected_memory_allocator/protected_memory_allocator.c diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/Kbuild new file mode 100644 index 0000000..1a6fa3c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/Kbuild @@ -0,0 +1,23 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +obj-$(CONFIG_MALI_MIDGARD) += midgard/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/Kconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/Kconfig new file mode 100644 index 0000000..693b86f --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/Kconfig @@ -0,0 +1,25 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +menu "ARM GPU Configuration" +source "drivers/gpu/arm/midgard/Kconfig" +endmenu diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kbuild new file mode 100755 index 0000000..fa52548 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kbuild @@ -0,0 +1,216 @@ +# +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +# Driver version string which is returned to userspace via an ioctl +MALI_RELEASE_NAME ?= "r25p0-01rel0" + +# Paths required for build +# make $(src) as absolute path if it isn't already, by prefixing $(srctree) +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) +KBASE_PATH = $(src) +KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy +UMP_PATH = $(src)/../../../base + +# Set up defaults if not defined by build system +MALI_CUSTOMER_RELEASE ?= 1 +MALI_USE_CSF ?= 0 +MALI_UNIT_TEST ?= 0 +MALI_KERNEL_TEST_API ?= 0 +MALI_COVERAGE ?= 0 +CONFIG_MALI_PLATFORM_NAME ?= "devicetree" +# Experimental features (corresponding -D definition should be appended to +# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, +# -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) +# +# Experimental features must default to disabled, e.g.: +# MALI_EXPERIMENTAL_FEATURE ?= 0 +MALI_JIT_PRESSURE_LIMIT ?= 0 +MALI_INCREMENTAL_RENDERING ?= 0 + +# Set up our defines, which will be passed to gcc +DEFINES = \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_USE_CSF=$(MALI_USE_CSF) \ + -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_COVERAGE=$(MALI_COVERAGE) \ + -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \ + -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) + +ifeq ($(KBUILD_EXTMOD),) +# in-tree +DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) +else +# out-of-tree +DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) +endif + +DEFINES += -I$(srctree)/drivers/staging/android +#meson graphics start +ldflags-y += --strip-debug +#meson graphics end + +DEFINES += -DMALI_KBASE_BUILD + +# Use our defines when compiling +ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux + +SRC := \ + context/mali_kbase_context.c \ + debug/mali_kbase_debug_ktrace.c \ + device/mali_kbase_device.c \ + mali_kbase_cache_policy.c \ + mali_kbase_mem.c \ + mali_kbase_mem_pool_group.c \ + mali_kbase_native_mgm.c \ + mali_kbase_ctx_sched.c \ + mali_kbase_jm.c \ + mali_kbase_gpuprops.c \ + mali_kbase_pm.c \ + mali_kbase_config.c \ + mali_kbase_vinstr.c \ + mali_kbase_hwcnt.c \ + mali_kbase_hwcnt_backend_gpu.c \ + mali_kbase_hwcnt_gpu.c \ + mali_kbase_hwcnt_legacy.c \ + mali_kbase_hwcnt_types.c \ + mali_kbase_hwcnt_virtualizer.c \ + mali_kbase_softjobs.c \ + mali_kbase_hw.c \ + mali_kbase_debug.c \ + mali_kbase_gpu_memory_debugfs.c \ + mali_kbase_mem_linux.c \ + mali_kbase_core_linux.c \ + mali_kbase_mem_profile_debugfs.c \ + mmu/mali_kbase_mmu.c \ + mmu/mali_kbase_mmu_hw_direct.c \ + mmu/mali_kbase_mmu_mode_lpae.c \ + mmu/mali_kbase_mmu_mode_aarch64.c \ + mali_kbase_disjoint_events.c \ + mali_kbase_debug_mem_view.c \ + mali_kbase_smc.c \ + mali_kbase_mem_pool.c \ + mali_kbase_mem_pool_debugfs.c \ + mali_kbase_debugfs_helper.c \ + mali_kbase_strings.c \ + mali_kbase_as_fault_debugfs.c \ + mali_kbase_regs_history_debugfs.c \ + thirdparty/mali_kbase_mmap.c \ + tl/mali_kbase_timeline.c \ + tl/mali_kbase_timeline_io.c \ + tl/mali_kbase_tlstream.c \ + tl/mali_kbase_tracepoints.c \ + gpu/mali_kbase_gpu.c + +ifeq ($(MALI_USE_CSF),1) + SRC += \ + debug/backend/mali_kbase_debug_ktrace_csf.c \ + device/backend/mali_kbase_device_csf.c \ + gpu/backend/mali_kbase_gpu_fault_csf.c \ + tl/backend/mali_kbase_timeline_csf.c \ + mmu/backend/mali_kbase_mmu_csf.c \ + context/backend/mali_kbase_context_csf.c +else + SRC += \ + mali_kbase_dummy_job_wa.c \ + mali_kbase_debug_job_fault.c \ + mali_kbase_event.c \ + mali_kbase_jd.c \ + mali_kbase_jd_debugfs.c \ + mali_kbase_js.c \ + mali_kbase_js_ctx_attr.c \ + debug/backend/mali_kbase_debug_ktrace_jm.c \ + device/backend/mali_kbase_device_jm.c \ + gpu/backend/mali_kbase_gpu_fault_jm.c \ + tl/backend/mali_kbase_timeline_jm.c \ + mmu/backend/mali_kbase_mmu_jm.c \ + context/backend/mali_kbase_context_jm.c +endif + +ifeq ($(CONFIG_MALI_CINSTR_GWT),y) + SRC += mali_kbase_gwt.c +endif + +ifeq ($(MALI_UNIT_TEST),1) + SRC += tl/mali_kbase_timeline_test.c +endif + +ifeq ($(MALI_CUSTOMER_RELEASE),0) + SRC += mali_kbase_regs_dump_debugfs.c +endif + + +ccflags-y += -I$(KBASE_PATH) -I$(KBASE_PATH)/debug \ + -I$(KBASE_PATH)/debug/backend + +# Tell the Linux build system from which .o file to create the kernel module +obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o + +# Tell the Linux build system to enable building of our .c files +mali_kbase-y := $(SRC:.c=.o) + +# Kconfig passes in the name with quotes for in-tree builds - remove them. +platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) +MALI_PLATFORM_DIR := platform/$(platform_name) +ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR) +include $(src)/$(MALI_PLATFORM_DIR)/Kbuild + +ifeq ($(CONFIG_MALI_DEVFREQ),y) + ifeq ($(CONFIG_DEVFREQ_THERMAL),y) + include $(src)/ipa/Kbuild + endif +endif + +ifeq ($(MALI_USE_CSF),1) + include $(src)/csf/Kbuild +else +# empty +endif + +ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) + include $(src)/arbiter/Kbuild +else +# empty +endif + +mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ + mali_kbase_dma_fence.o \ + mali_kbase_fence.o +mali_kbase-$(CONFIG_SYNC) += \ + mali_kbase_sync_android.o \ + mali_kbase_sync_common.o +mali_kbase-$(CONFIG_SYNC_FILE) += \ + mali_kbase_sync_file.o \ + mali_kbase_sync_common.o \ + mali_kbase_fence.o + +include $(src)/backend/gpu/Kbuild +mali_kbase-y += $(BACKEND:.c=.o) + + +ccflags-y += -I$(src)/backend/gpu +subdir-ccflags-y += -I$(src)/backend/gpu + +# For kutf and mali_kutf_irq_latency_test +obj-$(CONFIG_MALI_KUTF) += tests/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kconfig new file mode 100644 index 0000000..ca59dbb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Kconfig @@ -0,0 +1,309 @@ +# +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +menuconfig MALI_MIDGARD + tristate "Mali Midgard series support" + select GPU_TRACEPOINTS if ANDROID + select DMA_SHARED_BUFFER + default n + help + Enable this option to build support for a ARM Mali Midgard GPU. + + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +config MALI_GATOR_SUPPORT + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD + default y + help + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. + +config MALI_MIDGARD_DVFS + bool "Enable legacy DVFS" + depends on MALI_MIDGARD && !MALI_DEVFREQ + default n + help + Choose this option to enable legacy DVFS in the Mali Midgard DDK. + +config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default y if MALI_DEBUG + default n + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_DEVFREQ + bool "devfreq support for Mali" + depends on MALI_MIDGARD && PM_DEVFREQ + default y + help + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simpleondemand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. + +config MALI_DMA_FENCE + bool "DMA_BUF fence support for Mali" + depends on MALI_MIDGARD + default n + help + Support DMA_BUF fences for Mali. + + This option should only be enabled if the Linux Kernel has built in + support for DMA_BUF fences. + +config MALI_PLATFORM_NAME + depends on MALI_MIDGARD + string "Platform name" + default "devicetree" + help + Enter the name of the desired platform configuration directory to + include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must + exist. + +config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" + depends on MALI_MIDGARD + default n + help + Enable support for the arbiter interface in the driver. + This allows an external arbiter to manage driver access + to GPU hardware in a virtualized environment + + If unsure, say N. + +# MALI_EXPERT configuration options + +menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default n + help + Enabling this option and modifying the default settings may produce a driver with performance or + other limitations. + +config MALI_CORESTACK + bool "Support controlling power to the GPU core stack" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enabling this feature on supported GPUs will let the driver powering + on/off the GPU core stack independently without involving the Power + Domain Controller. This should only be enabled on platforms which + integration of the PDC to the Mali GPU is known to be problematic. + This feature is currently only supported on t-Six and t-HEx GPUs. + + If unsure, say N. + +config MALI_DEBUG + bool "Debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_FENCE_DEBUG + bool "Debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) + default y if MALI_DEBUG + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + +config MALI_NO_MALI + bool "No Mali" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + +config MALI_REAL_HW + def_bool !MALI_NO_MALI + +config MALI_ERROR_INJECT + bool "Error injection" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI + default n + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + default n + help + Choose this option to enable system trace events for each + kbase event. This is typically used for debugging but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_2MB_ALLOC + bool "Attempt to allocate 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Rather than allocating all GPU memory page-by-page, attempt to + allocate 2MB pages from the kernel. This reduces TLB pressure and + helps to prevent memory fragmentation. + + If in doubt, say N + +config MALI_PWRSOFT_765 + bool "PWRSOFT-765 ticket" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged + in kernel v4.10, however if backported into the kernel then this + option must be manually selected. + + If using kernel >= v4.10 then say N, otherwise if devfreq cooling + changes have been backported say Y to avoid compilation errors. + +config MALI_MEMORY_FULLY_BACKED + bool "Memory fully physically-backed" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option enables full physical backing of all virtual + memory allocations in the kernel. Notice that this build + option only affects allocations of grow-on-GPU-page-fault + memory. + +config MALI_DMA_BUF_MAP_ON_DEMAND + bool "Map imported dma-bufs on demand" + depends on MALI_MIDGARD + default n + help + This option caused kbase to set up the GPU mapping of imported + dma-buf when needed to run atoms. This is the legacy behaviour. + + This is intended for testing and the option will get removed in the + future. + +config MALI_DMA_BUF_LEGACY_COMPAT + bool "Enable legacy compatibility cache flush on dma-buf map" + depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND + default n + help + This option enables compatibility with legacy dma-buf mapping + behavior, then the dma-buf is mapped on import, by adding cache + maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, + including a cache flush. + + This option might work-around issues related to missing cache + flushes in other drivers. This only has an effect for clients using + UK 11.18 or older. For later UK versions it is not possible. + +config MALI_HW_ERRATA_1485982_NOT_AFFECTED + bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option disables the default workaround for GPU2017-1336. The + workaround keeps the L2 cache powered up except for powerdown and reset. + + The workaround introduces a limitation that will prevent the running of + protected mode content on fully coherent platforms, as the switch to IO + coherency mode requires the L2 to be turned off. + +config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE + bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED + default n + help + This option uses an alternative workaround for GPU2017-1336. Lowering + the GPU clock to a, platform specific, known good frequeuncy before + powering down the L2 cache. The clock can be specified in the device + tree using the property, opp-mali-errata-1485982. Otherwise the + slowest clock will be selected. + +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_MIDGARD + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + +# Instrumentation options. + +config MALI_JOB_DUMP + bool "Enable system level support needed for job dumping" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Choose this option to enable system level support needed for + job dumping. This is typically used for instrumentation but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_PRFCNT_SET_SECONDARY + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, say N. + +config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool "Use secondary set of performance counters" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS + default n + help + Select this option to make the secondary set of performance counters + available at runtime via debugfs. Kernel features that depend on an + access to the primary set of counters may become unavailable. + + This feature is unsupported and unstable, and may break at any time. + Enabling this option will prevent power management from working + optimally and may cause instrumentation tools to return bogus results. + + If unsure, say N. + +source "drivers/gpu/arm/midgard/platform/Kconfig" +source "drivers/gpu/arm/midgard/tests/Kconfig" diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Makefile b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Makefile new file mode 100644 index 0000000..53a1209 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Makefile @@ -0,0 +1,38 @@ +# +# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +KDIR ?= /lib/modules/$(shell uname -r)/build + +BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. +KBASE_PATH_RELATIVE = $(CURDIR) + +ifeq ($(CONFIG_MALI_BUSLOG),y) +#Add bus logger symbols +EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers +endif + +# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) clean diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile.kbase b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/Makefile.kbase rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Makefile.kbase diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Mconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Mconfig new file mode 100644 index 0000000..b137793 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/Mconfig @@ -0,0 +1,278 @@ +# +# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +menuconfig MALI_MIDGARD + bool "Mali Midgard series support" + default y + help + Enable this option to build support for a ARM Mali Midgard GPU. + + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +config MALI_GATOR_SUPPORT + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD && !BACKEND_USER + default y + help + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. + +config MALI_MIDGARD_DVFS + bool "Enable legacy DVFS" + depends on MALI_MIDGARD && !MALI_DEVFREQ + default n + help + Choose this option to enable legacy DVFS in the Mali Midgard DDK. + +config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default y if MALI_DEBUG + default n + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_DEVFREQ + bool "devfreq support for Mali" + depends on MALI_MIDGARD + default y if PLATFORM_JUNO + default y if PLATFORM_CUSTOM + help + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simpleondemand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. + +config MALI_DMA_FENCE + bool "DMA_BUF fence support for Mali" + depends on MALI_MIDGARD + default n + help + Support DMA_BUF fences for Mali. + + This option should only be enabled if the Linux Kernel has built in + support for DMA_BUF fences. + +config MALI_PLATFORM_NAME + depends on MALI_MIDGARD + string "Platform name" + default "hisilicon" if PLATFORM_HIKEY960 + default "hisilicon" if PLATFORM_HIKEY970 + default "devicetree" + help + Enter the name of the desired platform configuration directory to + include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must + exist. + + When PLATFORM_CUSTOM is set, this needs to be set manually to + pick up the desired platform files. + +config MALI_ARBITER_SUPPORT + bool "Enable arbiter support for Mali" + depends on MALI_MIDGARD + default n + help + Enable support for the arbiter interface in the driver. + This allows an external arbiter to manage driver access + to GPU hardware in a virtualized environment + + If unsure, say N. + +# MALI_EXPERT configuration options + +menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default y + help + Enabling this option and modifying the default settings may produce a driver with performance or + other limitations. + +config MALI_CORESTACK + bool "Support controlling power to the GPU core stack" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enabling this feature on supported GPUs will let the driver powering + on/off the GPU core stack independently without involving the Power + Domain Controller. This should only be enabled on platforms which + integration of the PDC to the Mali GPU is known to be problematic. + This feature is currently only supported on t-Six and t-HEx GPUs. + + If unsure, say N. + +config MALI_DEBUG + bool "Debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default y if DEBUG + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_FENCE_DEBUG + bool "Debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + +choice + prompt "Error injection level" + default MALI_ERROR_INJECT_NONE + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_ERROR_INJECT_NONE + bool "disabled" + help + Error injection is disabled. + +config MALI_ERROR_INJECT_TRACK_LIST + bool "error track list" + depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI + help + Errors to inject are pre-configured by the user. + +config MALI_ERROR_INJECT_RANDOM + bool "random error injection" + depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI + help + Injected errors are random, rather than user-driven. + +endchoice + +config MALI_ERROR_INJECT_ON + string + default "0" if MALI_ERROR_INJECT_NONE + default "1" if MALI_ERROR_INJECT_TRACK_LIST + default "2" if MALI_ERROR_INJECT_RANDOM + +config MALI_ERROR_INJECT + bool + default y if !MALI_ERROR_INJECT_NONE + +config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + default n + help + Choose this option to enable system trace events for each + kbase event. This is typically used for debugging but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_2MB_ALLOC + bool "Attempt to allocate 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Rather than allocating all GPU memory page-by-page, attempt to + allocate 2MB pages from the kernel. This reduces TLB pressure and + helps to prevent memory fragmentation. + + If in doubt, say N + +config MALI_PWRSOFT_765 + bool "PWRSOFT-765 ticket" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + PWRSOFT-765 fixes devfreq cooling devices issues. However, they are + not merged in mainline kernel yet. So this define helps to guard those + parts of the code. + +config MALI_MEMORY_FULLY_BACKED + bool "Memory fully physically-backed" + default n + help + This option enables full backing of all virtual memory allocations + for the kernel. This only affects grow-on-GPU-page-fault memory. + +config MALI_DMA_BUF_MAP_ON_DEMAND + bool "Map imported dma-bufs on demand" + depends on MALI_MIDGARD + default n + default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED + help + This option caused kbase to set up the GPU mapping of imported + dma-buf when needed to run atoms. This is the legacy behaviour. + +config MALI_DMA_BUF_LEGACY_COMPAT + bool "Enable legacy compatibility cache flush on dma-buf map" + depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND + default n + help + This option enables compatibility with legacy dma-buf mapping + behavior, then the dma-buf is mapped on import, by adding cache + maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, + including a cache flush. + +config MALI_REAL_HW + bool + default y + default n if NO_MALI + +config MALI_HW_ERRATA_1485982_NOT_AFFECTED + bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT + default n + default y if PLATFORM_JUNO + help + This option disables the default workaround for GPU2017-1336. The + workaround keeps the L2 cache powered up except for powerdown and reset. + + The workaround introduces a limitation that will prevent the running of + protected mode content on fully coherent platforms, as the switch to IO + coherency mode requires the L2 to be turned off. + +config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE + bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336" + depends on MALI_MIDGARD && MALI_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED + default n + help + This option uses an alternative workaround for GPU2017-1336. Lowering + the GPU clock to a, platform specific, known good frequeuncy before + powering down the L2 cache. The clock can be specified in the device + tree using the property, opp-mali-errata-1485982. Otherwise the + slowest clock will be selected. + +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_MIDGARD + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + +# Instrumentation options. + +# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. +# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. + +source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild new file mode 100644 index 0000000..98e47be --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + arbiter/mali_kbase_arbif.o \ + arbiter/mali_kbase_arbiter_pm.o diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c new file mode 100644 index 0000000..d193cb9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_arbif.c + * Mali arbiter interface APIs to share GPU between Virtual Machines + */ + +#include +#include "mali_kbase_arbif.h" +#include +#include +#include +#include "mali_kbase_arbiter_interface.h" + +static void on_gpu_stop(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); +} + +static void on_gpu_granted(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED(kbdev, kbdev); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); +} + +static void on_gpu_lost(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); +} + +int kbase_arbif_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_OF + struct arbiter_if_arb_vm_ops ops; + struct arbiter_if_dev *arb_if; + struct device_node *arbiter_if_node; + struct platform_device *pdev; + int err; + + dev_dbg(kbdev->dev, "%s\n", __func__); + + arbiter_if_node = of_parse_phandle(kbdev->dev->of_node, + "arbiter_if", 0); + if (!arbiter_if_node) { + dev_dbg(kbdev->dev, "No arbiter_if in Device Tree\n"); + /* no arbiter interface defined in device tree */ + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; + return 0; + } + + pdev = of_find_device_by_node(arbiter_if_node); + if (!pdev) { + dev_err(kbdev->dev, "Failed to find arbiter_if device\n"); + return -EPROBE_DEFER; + } + + if (!pdev->dev.driver || !try_module_get(pdev->dev.driver->owner)) { + dev_err(kbdev->dev, "arbiter_if driver not available\n"); + return -EPROBE_DEFER; + } + kbdev->arb.arb_dev = &pdev->dev; + arb_if = platform_get_drvdata(pdev); + if (!arb_if) { + dev_err(kbdev->dev, "arbiter_if driver not ready\n"); + module_put(pdev->dev.driver->owner); + return -EPROBE_DEFER; + } + + kbdev->arb.arb_if = arb_if; + ops.arb_vm_gpu_stop = on_gpu_stop; + ops.arb_vm_gpu_granted = on_gpu_granted; + ops.arb_vm_gpu_lost = on_gpu_lost; + + /* register kbase arbiter_if callbacks */ + if (arb_if->vm_ops.vm_arb_register_dev) { + err = arb_if->vm_ops.vm_arb_register_dev(arb_if, + kbdev->dev, &ops); + if (err) { + dev_err(kbdev->dev, "Arbiter registration failed.\n"); + module_put(pdev->dev.driver->owner); + return err; + } + } +#else /* CONFIG_OF */ + dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); + kbdev->arb.arb_dev = NULL; + kbdev->arb.arb_if = NULL; +#endif + return 0; +} + +void kbase_arbif_destroy(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_unregister_dev) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_unregister_dev(kbdev->arb.arb_if); + } + kbdev->arb.arb_if = NULL; + if (kbdev->arb.arb_dev) + module_put(kbdev->arb.arb_dev->driver->owner); + kbdev->arb.arb_dev = NULL; +} + +void kbase_arbif_gpu_request(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_gpu_request(arb_if); + } +} + +void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { + dev_dbg(kbdev->dev, "%s\n", __func__); + KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED(kbdev, kbdev); + arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); + } +} + +void kbase_arbif_gpu_active(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_active) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_gpu_active(arb_if); + } +} + +void kbase_arbif_gpu_idle(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_gpu_idle) { + dev_dbg(kbdev->dev, "vm_arb_gpu_idle\n"); + arb_if->vm_ops.vm_arb_gpu_idle(arb_if); + } +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h new file mode 100644 index 0000000..e7e9de7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h @@ -0,0 +1,133 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Mali arbiter interface APIs to share GPU between Virtual Machines + */ + +#ifndef _MALI_KBASE_ARBIF_H_ +#define _MALI_KBASE_ARBIF_H_ + +/** + * enum kbase_arbif_evt - Internal Arbiter event. + * + * @KBASE_VM_GPU_INITIALIZED_EVT: KBase has finished initializing + * and can be stopped + * @KBASE_VM_GPU_STOP_EVT: Stop message received from Arbiter + * @KBASE_VM_GPU_GRANTED_EVT: Grant message received from Arbiter + * @KBASE_VM_GPU_LOST_EVT: Lost message received from Arbiter + * @KBASE_VM_GPU_IDLE_EVENT: KBase has transitioned into an inactive state. + * @KBASE_VM_REF_EVENT: KBase has transitioned into an active state. + * @KBASE_VM_OS_SUSPEND_EVENT: KBase is suspending + * @KBASE_VM_OS_RESUME_EVENT: Kbase is resuming + */ +enum kbase_arbif_evt { + KBASE_VM_GPU_INITIALIZED_EVT = 1, + KBASE_VM_GPU_STOP_EVT, + KBASE_VM_GPU_GRANTED_EVT, + KBASE_VM_GPU_LOST_EVT, + KBASE_VM_GPU_IDLE_EVENT, + KBASE_VM_REF_EVENT, + KBASE_VM_OS_SUSPEND_EVENT, + KBASE_VM_OS_RESUME_EVENT, +}; + +/** + * kbase_arbif_init() - Initialize the arbiter interface functionality. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter interface and also determines + * if Arbiter functionality is required. + * + * Return: 0 if the Arbiter interface was successfully initialized or the + * Arbiter was not required. + */ +int kbase_arbif_init(struct kbase_device *kbdev); + +/** + * kbase_arbif_destroy() - Cleanups the arbiter interface functionality. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Cleans up the arbiter interface functionality and resets the reference count + * of the arbif module used + */ +void kbase_arbif_destroy(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_request() - Send GPU request message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to request GPU access. + */ +void kbase_arbif_gpu_request(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_stopped() - Send GPU stopped message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @gpu_required: true if GPU access is still required + * (Arbiter will automatically send another grant message) + * + * Sends a message to Arbiter to notify that the GPU has stopped. + * @note Once this call has been made, KBase must not attempt to access the GPU + * until the #KBASE_VM_GPU_GRANTED_EVT event has been received. + */ +void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required); + +/** + * kbase_arbif_gpu_active() - Send a GPU active message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to report that KBase has gone active. + */ +void kbase_arbif_gpu_active(struct kbase_device *kbdev); + +/** + * kbase_arbif_gpu_idle() - Send a GPU idle message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Sends a message to Arbiter to report that KBase has gone idle. + */ +void kbase_arbif_gpu_idle(struct kbase_device *kbdev); + +#endif /* _MALI_KBASE_ARBIF_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h new file mode 100644 index 0000000..1f53cbf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h @@ -0,0 +1,95 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Mali structures define to support arbitration feature + */ + +#ifndef _MALI_KBASE_ARBITER_DEFS_H_ +#define _MALI_KBASE_ARBITER_DEFS_H_ + +#include "mali_kbase_arbiter_pm.h" + +/** + * struct kbase_arbiter_vm_state - Struct representing the state and containing the + * data of pm work + * @kbdev: Pointer to kbase device structure (must be a valid pointer) + * @vm_state_lock: The lock protecting the VM state when arbiter is used. + * This lock must also be held whenever the VM state is being + * transitioned + * @vm_state_wait: Wait queue set when GPU is granted + * @vm_state: Current state of VM + * @vm_arb_wq: Work queue for resuming or stopping work on the GPU for use + * with the Arbiter + * @vm_suspend_work: Work item for vm_arb_wq to stop current work on GPU + * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU + * @vm_arb_starting: Work queue resume in progress + * @vm_arb_stopping: Work queue suspend in progress + * @vm_arb_users_waiting: Count of users waiting for GPU + */ +struct kbase_arbiter_vm_state { + struct kbase_device *kbdev; + struct mutex vm_state_lock; + wait_queue_head_t vm_state_wait; + enum kbase_vm_state vm_state; + struct workqueue_struct *vm_arb_wq; + struct work_struct vm_suspend_work; + struct work_struct vm_resume_work; + bool vm_arb_starting; + bool vm_arb_stopping; + int vm_arb_users_waiting; +}; + +/** + * struct kbase_arbiter_device - Representing an instance of arbiter device, + * allocated from the probe method of Mali driver + * @arb_if: Pointer to the arbiter interface device + * @arb_dev: Pointer to the arbiter device + */ +struct kbase_arbiter_device { + struct arbiter_if_dev *arb_if; + struct device *arb_dev; +}; + +#endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h new file mode 100644 index 0000000..5d5d8a7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * + */ + +/** + * @file + * Defines the Mali arbiter interface + */ + +#ifndef _MALI_KBASE_ARBITER_INTERFACE_H_ +#define _MALI_KBASE_ARBITER_INTERFACE_H_ + +/** + * @brief Mali arbiter interface version + * + * This specifies the current version of the configuration interface. Whenever + * the arbiter interface changes, so that integration effort is required, the + * version number will be increased. Each configuration must make an effort + * to check that it implements the correct version. + * + * Version history: + * 1 - Added the Mali arbiter configuration interface. + * 2 - Strip out reference code from header + * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) + */ +#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 + +struct arbiter_if_dev; + +/** + * struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM + * + * This struct contains callbacks used to deliver messages + * from the arbiter to the corresponding VM. + * + * Note that calls into these callbacks may have synchronous calls back into + * the arbiter arbiter_if_vm_arb_ops callbacks below. + * For example vm_arb_gpu_stopped() may be called as a side effect of + * arb_vm_gpu_stop() being called here. + */ +struct arbiter_if_arb_vm_ops { + /** + * arb_vm_gpu_stop() - Ask VM to stop using GPU + * @dev: The arbif kernel module device. + * + * Informs KBase to stop using the GPU as soon as possible. + * @Note: Once the driver is no longer using the GPU, a call to + * vm_arb_gpu_stopped is expected by the arbiter. + */ + void (*arb_vm_gpu_stop)(struct device *dev); + + /** + * arb_vm_gpu_granted() - GPU has been granted to VM + * @dev: The arbif kernel module device. + * + * Informs KBase that the GPU can now be used by the VM. + */ + void (*arb_vm_gpu_granted)(struct device *dev); + + /** + * arb_vm_gpu_lost() - VM has lost the GPU + * @dev: The arbif kernel module device. + * + * This is called if KBase takes too long to respond to the arbiter + * stop request. + * Once this is called, KBase will assume that access to the GPU + * has been lost and will fail all running jobs and reset its + * internal state. + * If successful, will respond with a vm_arb_gpu_stopped message. + */ + void (*arb_vm_gpu_lost)(struct device *dev); +}; + +/** + * struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter + * + * This struct contains callbacks used to request operations + * from the VM to the arbiter + * + * Note that we must not make any synchronous calls back in to the VM + * (via arbiter_if_arb_vm_ops above) in the context of these callbacks. + */ +struct arbiter_if_vm_arb_ops { + /** + * vm_arb_register_dev() - Register VM device driver callbacks. + * @arbif_dev: The arbiter interface we are registering device callbacks + * @dev: The device structure to supply in the callbacks. + * @ops: The callbacks that the device driver supports + * (none are optional). + */ + int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, + struct device *dev, struct arbiter_if_arb_vm_ops *ops); + + /** + * vm_arb_unregister_dev() - Unregister VM device driver callbacks. + * @arbif_dev: The arbiter interface we are unregistering from. + */ + void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. + * @arbif_dev: The arbiter interface we want to issue the request. + */ + void (*vm_arb_gpu_request)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_active() - Inform arbiter that the driver has gone active + * @arbif_dev: The arbiter interface device. + */ + void (*vm_arb_gpu_active)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_idle() - Inform the arbiter that the driver has gone idle + * @arbif_dev: The arbiter interface device. + */ + void (*vm_arb_gpu_idle)(struct arbiter_if_dev *arbif_dev); + + /** + * vm_arb_gpu_stopped() - Inform the arbiter that the driver has stopped + * using the GPU + * @arbif_dev: The arbiter interface device. + * @gpu_required: The GPU is still needed to do more work. + */ + void (*vm_arb_gpu_stopped)(struct arbiter_if_dev *arbif_dev, + u8 gpu_required); +}; + +/** + * struct arbiter_if_dev - Arbiter Interface + * @vm_ops: Callback functions for connecting KBase with + * arbiter interface device. + * @priv_data: Internal arbif data not used by KBASE. + * + * Arbiter Interface Kernel Module State used for linking KBase + * with an arbiter interface platform device + */ +struct arbiter_if_dev { + struct arbiter_if_vm_arb_ops vm_ops; + void *priv_data; +}; + +#endif /* _MALI_KBASE_ARBITER_INTERFACE_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c new file mode 100644 index 0000000..6c35e16 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_arbiter_pm.c + * Mali arbiter power manager state machine and APIs + */ + +#include +#include +#include +#include +#include +#include +#include + +static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev); + +static inline const char *kbase_arbiter_pm_vm_state_str( + enum kbase_vm_state state) +{ + switch (state) { + case KBASE_VM_STATE_INITIALIZING: + return "KBASE_VM_STATE_INITIALIZING"; + case KBASE_VM_STATE_INITIALIZING_WITH_GPU: + return "KBASE_VM_STATE_INITIALIZING_WITH_GPU"; + case KBASE_VM_STATE_SUSPENDED: + return "KBASE_VM_STATE_SUSPENDED"; + case KBASE_VM_STATE_STOPPED: + return "KBASE_VM_STATE_STOPPED"; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + return "KBASE_VM_STATE_STOPPED_GPU_REQUESTED"; + case KBASE_VM_STATE_STARTING: + return "KBASE_VM_STATE_STARTING"; + case KBASE_VM_STATE_IDLE: + return "KBASE_VM_STATE_IDLE"; + case KBASE_VM_STATE_ACTIVE: + return "KBASE_VM_STATE_ACTIVE"; + case KBASE_VM_STATE_STOPPING_IDLE: + return "KBASE_VM_STATE_STOPPING_IDLE"; + case KBASE_VM_STATE_STOPPING_ACTIVE: + return "KBASE_VM_STATE_STOPPING_ACTIVE"; + case KBASE_VM_STATE_SUSPEND_PENDING: + return "KBASE_VM_STATE_SUSPEND_PENDING"; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + return "KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT"; + default: + KBASE_DEBUG_ASSERT(false); + return "[UnknownState]"; + } +} + +static inline const char *kbase_arbiter_pm_vm_event_str( + enum kbase_arbif_evt evt) +{ + switch (evt) { + case KBASE_VM_GPU_INITIALIZED_EVT: + return "KBASE_VM_GPU_INITIALIZED_EVT"; + case KBASE_VM_GPU_STOP_EVT: + return "KBASE_VM_GPU_STOP_EVT"; + case KBASE_VM_GPU_GRANTED_EVT: + return "KBASE_VM_GPU_GRANTED_EVT"; + case KBASE_VM_GPU_LOST_EVT: + return "KBASE_VM_GPU_LOST_EVT"; + case KBASE_VM_OS_SUSPEND_EVENT: + return "KBASE_VM_OS_SUSPEND_EVENT"; + case KBASE_VM_OS_RESUME_EVENT: + return "KBASE_VM_OS_RESUME_EVENT"; + case KBASE_VM_GPU_IDLE_EVENT: + return "KBASE_VM_GPU_IDLE_EVENT"; + case KBASE_VM_REF_EVENT: + return "KBASE_VM_REF_EVENT"; + default: + KBASE_DEBUG_ASSERT(false); + return "[UnknownEvent]"; + } +} + +static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, + enum kbase_vm_state new_state) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + dev_dbg(kbdev->dev, "VM set_state %s -> %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), + kbase_arbiter_pm_vm_state_str(new_state)); + lockdep_assert_held(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_state = new_state; + wake_up(&arb_vm_state->vm_state_wait); +} + +static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, + struct kbase_arbiter_vm_state, + vm_suspend_work); + struct kbase_device *kbdev = arb_vm_state->kbdev; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">%s\n", __func__); + if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE || + arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPING_ACTIVE || + arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPEND_PENDING) { + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">kbase_pm_driver_suspend\n"); + kbase_pm_driver_suspend(kbdev); + dev_dbg(kbdev->dev, "vm_state_lock); + } + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "<%s\n", __func__); +} + +static void kbase_arbiter_pm_resume_wq(struct work_struct *data) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, + struct kbase_arbiter_vm_state, + vm_resume_work); + struct kbase_device *kbdev = arb_vm_state->kbdev; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">%s\n", __func__); + arb_vm_state->vm_arb_starting = true; + if (arb_vm_state->vm_state == KBASE_VM_STATE_STARTING) { + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, ">kbase_pm_driver_resume\n"); + kbase_pm_driver_resume(kbdev, true); + dev_dbg(kbdev->dev, "vm_state_lock); + } else if (arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_ACTIVE) { + kbase_arbiter_pm_vm_stopped(kbdev); + } + arb_vm_state->vm_arb_starting = false; + mutex_unlock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "<%s\n", __func__); +} + +int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) +{ + int err; + struct kbase_arbiter_vm_state *arb_vm_state = NULL; + + arb_vm_state = kmalloc(sizeof(struct kbase_arbiter_vm_state), + GFP_KERNEL); + if (arb_vm_state == NULL) + return -ENOMEM; + + arb_vm_state->kbdev = kbdev; + arb_vm_state->vm_state = KBASE_VM_STATE_INITIALIZING; + + mutex_init(&arb_vm_state->vm_state_lock); + init_waitqueue_head(&arb_vm_state->vm_state_wait); + arb_vm_state->vm_arb_wq = alloc_ordered_workqueue("kbase_vm_arb_wq", + WQ_HIGHPRI); + if (!arb_vm_state->vm_arb_wq) { + dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); + return -ENOMEM; + } + INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); + INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); + arb_vm_state->vm_arb_starting = false; + arb_vm_state->vm_arb_users_waiting = 0; + kbdev->pm.arb_vm_state = arb_vm_state; + + err = kbase_arbif_init(kbdev); + if (err) { + goto arbif_init_fail; + } + if (kbdev->arb.arb_if) { + kbase_arbif_gpu_request(kbdev); + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU); + dev_dbg(kbdev->dev, + "Waiting for initial GPU assignment - done\n"); + } + return 0; + +arbif_init_fail: + destroy_workqueue(arb_vm_state->vm_arb_wq); + kfree(arb_vm_state); + kbdev->pm.arb_vm_state = NULL; + return err; +} + +void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + kbase_arbif_gpu_stopped(kbdev, false); + + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbif_destroy(kbdev); + destroy_workqueue(arb_vm_state->vm_arb_wq); + arb_vm_state->vm_arb_wq = NULL; + kfree(kbdev->pm.arb_vm_state); + kbdev->pm.arb_vm_state = NULL; +} + +void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + if (!kbdev->arb.arb_if || + arb_vm_state->vm_state > + KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + kbase_release_interrupts(kbdev); + + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) +{ + bool request_gpu = false; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + + if (arb_vm_state->vm_arb_users_waiting > 0 && + arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_release_interrupts(kbdev); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + request_gpu = true; + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED); + break; + case KBASE_VM_STATE_SUSPEND_PENDING: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + dev_warn(kbdev->dev, "unexpected pm_stop VM state %u", + arb_vm_state->vm_state); + break; + } + + kbase_arbif_gpu_stopped(kbdev, request_gpu); +} + +static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_INITIALIZING_WITH_GPU); + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); + kbase_install_interrupts(kbdev); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_resume_work); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + kbase_arbif_gpu_stopped(kbdev, false); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + dev_warn(kbdev->dev, + "GPU_GRANTED when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + break; + } +} + +static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) { + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + } + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_IDLE); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_STARTING: + dev_dbg(kbdev->dev, "Got GPU_STOP event while STARTING."); + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + if (arb_vm_state->vm_arb_starting) + queue_work(arb_vm_state->vm_arb_wq, + &arb_vm_state->vm_suspend_work); + break; + case KBASE_VM_STATE_SUSPEND_PENDING: + /* Suspend finishes with a stop so nothing else to do */ + break; + default: + dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + break; + } +} + +static void kbase_gpu_lost(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: + case KBASE_VM_STATE_ACTIVE: + case KBASE_VM_STATE_IDLE: + dev_warn(kbdev->dev, "GPU lost in state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + kbase_arbiter_pm_vm_gpu_stop(kbdev); + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_SUSPEND_PENDING: + dev_info(kbdev->dev, "GPU lost while stopping"); + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + dev_info(kbdev->dev, "GPU lost while already stopped"); + break; + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + dev_info(kbdev->dev, "GPU lost while waiting to suspend"); + kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); + break; + default: + break; + } + +} + +static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( + struct kbase_device *kbdev) +{ + switch (kbdev->pm.arb_vm_state->vm_state) { + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_IDLE: + case KBASE_VM_STATE_ACTIVE: + return true; + default: + return false; + } +} + +static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + enum kbase_vm_state prev_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + if (kbdev->arb.arb_if) { + if (kbdev->pm.arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPENDED) + return; + } + /* Block suspend OS function until we are in a stable state + * with vm_state_lock + */ + while (!kbase_arbiter_pm_vm_os_suspend_ready_state(kbdev)) { + prev_state = arb_vm_state->vm_state; + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPING_ACTIVE: + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + break; + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT); + break; + case KBASE_VM_STATE_STARTING: + if (!arb_vm_state->vm_arb_starting) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + kbase_arbiter_pm_vm_stopped(kbdev); + } + break; + default: + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state != prev_state); + mutex_lock(&arb_vm_state->vm_state_lock); + } + + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STOPPED: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPENDED); + break; + case KBASE_VM_STATE_IDLE: + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_SUSPEND_PENDING); + mutex_unlock(&arb_vm_state->vm_state_lock); + /* Ensure resume has completed fully before starting suspend */ + flush_work(&arb_vm_state->vm_resume_work); + kbase_pm_driver_suspend(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + break; + case KBASE_VM_STATE_SUSPENDED: + break; + default: + KBASE_DEBUG_ASSERT_MSG(false, "Unexpected state to suspend"); + break; + } +} + +static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + KBASE_DEBUG_ASSERT_MSG(arb_vm_state->vm_state == + KBASE_VM_STATE_SUSPENDED, + "Unexpected state to resume"); + + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + + /* Release lock and block resume OS function until we have + * asynchronously received the GRANT message from the Arbiter and + * fully resumed + */ + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + flush_work(&arb_vm_state->vm_resume_work); + mutex_lock(&arb_vm_state->vm_state_lock); +} + +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt evt) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + if (!kbdev->arb.arb_if) + return; + + mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "%s %s\n", __func__, + kbase_arbiter_pm_vm_event_str(evt)); + + switch (evt) { + case KBASE_VM_GPU_GRANTED_EVT: + kbase_arbiter_pm_vm_gpu_start(kbdev); + break; + case KBASE_VM_GPU_STOP_EVT: + kbase_arbiter_pm_vm_gpu_stop(kbdev); + break; + case KBASE_VM_GPU_LOST_EVT: + dev_info(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); + kbase_gpu_lost(kbdev); + break; + case KBASE_VM_OS_SUSPEND_EVENT: + kbase_arbiter_pm_vm_os_prepare_suspend(kbdev); + break; + case KBASE_VM_OS_RESUME_EVENT: + kbase_arbiter_pm_vm_os_resume(kbdev); + break; + case KBASE_VM_GPU_IDLE_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_ACTIVE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + break; + default: + break; + } + break; + + case KBASE_VM_REF_EVENT: + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_STARTING: + KBASE_TLSTREAM_TL_EVENT_ARB_STARTED(kbdev, kbdev); + /* FALL THROUGH */ + case KBASE_VM_STATE_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + break; + case KBASE_VM_STATE_STOPPING_IDLE: + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + break; + default: + break; + } + break; + + case KBASE_VM_GPU_INITIALIZED_EVT: + lockdep_assert_held(&kbdev->pm.lock); + if (kbdev->pm.active_count > 0) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + } else { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + } + break; + + default: + dev_alert(kbdev->dev, "Got Unknown Event!"); + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + dev_dbg(kbdev->dev, "Waiting for GPU assignment...\n"); + wait_event(arb_vm_state->vm_state_wait, + arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); + dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); +} + +static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( + struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + lockdep_assert_held(&arb_vm_state->vm_state_lock); + return (arb_vm_state->vm_state == KBASE_VM_STATE_IDLE || + arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); +} + +int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + if (kbdev->arb.arb_if) { + mutex_lock(&arb_vm_state->vm_state_lock); + while (!kbase_arbiter_pm_vm_gpu_assigned_lockheld(kbdev)) { + /* Update VM state since we have GPU work to do */ + if (arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPING_IDLE) + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPING_ACTIVE); + else if (arb_vm_state->vm_state == + KBASE_VM_STATE_STOPPED) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED); + kbase_arbif_gpu_request(kbdev); + } else if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + if (suspend_handler != + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { + if (suspend_handler == + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED + || + kbdev->pm.active_count > 0) + break; + + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + return 1; + } + + if (arb_vm_state->vm_state == + KBASE_VM_STATE_INITIALIZING_WITH_GPU) + break; + + /* Need to synchronously wait for GPU assignment */ + arb_vm_state->vm_arb_users_waiting++; + mutex_unlock(&arb_vm_state->vm_state_lock); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + kbase_arbiter_pm_vm_wait_gpu_assignment(kbdev); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + mutex_lock(&arb_vm_state->vm_state_lock); + arb_vm_state->vm_arb_users_waiting--; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + } + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h new file mode 100644 index 0000000..3c49eb1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h @@ -0,0 +1,159 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file + * Mali arbiter power manager state machine and APIs + */ + +#ifndef _MALI_KBASE_ARBITER_PM_H_ +#define _MALI_KBASE_ARBITER_PM_H_ + +#include "mali_kbase_arbif.h" + +/** + * enum kbase_vm_state - Current PM Arbitration state. + * + * @KBASE_VM_STATE_INITIALIZING: Special state before arbiter is initialized. + * @KBASE_VM_STATE_INITIALIZING_WITH_GPU: Initialization after GPU + * has been granted. + * @KBASE_VM_STATE_SUSPENDED: KBase is suspended by OS and GPU is not assigned. + * @KBASE_VM_STATE_STOPPED: GPU is not assigned to KBase and is not required. + * @KBASE_VM_STATE_STOPPED_GPU_REQUESTED: GPU is not assigned to KBase + * but a request has been made. + * @KBASE_VM_STATE_STARTING: GPU is assigned and KBase is getting ready to run. + * @KBASE_VM_STATE_IDLE: GPU is assigned but KBase has no work to do + * @KBASE_VM_STATE_ACTIVE: GPU is assigned and KBase is busy using it + * @KBASE_VM_STATE_SUSPEND_PENDING: OS is going into suspend mode. + * @KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: OS is going into suspend mode but GPU + * has already been requested. + * In this situation we must wait for + * the Arbiter to send a GRANTED message + * and respond immediately with + * a STOPPED message before entering + * the suspend mode. + * @KBASE_VM_STATE_STOPPING_IDLE: Arbiter has sent a stopped message and there + * is currently no work to do on the GPU. + * @KBASE_VM_STATE_STOPPING_ACTIVE: Arbiter has sent a stopped message when + * KBase has work to do. + */ +enum kbase_vm_state { + KBASE_VM_STATE_INITIALIZING, + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + KBASE_VM_STATE_SUSPENDED, + KBASE_VM_STATE_STOPPED, + KBASE_VM_STATE_STOPPED_GPU_REQUESTED, + KBASE_VM_STATE_STARTING, + KBASE_VM_STATE_IDLE, + KBASE_VM_STATE_ACTIVE, + KBASE_VM_STATE_SUSPEND_PENDING, + KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT, + KBASE_VM_STATE_STOPPING_IDLE, + KBASE_VM_STATE_STOPPING_ACTIVE +}; + +/** + * kbase_arbiter_pm_early_init() - Initialize arbiter for VM Paravirtualized use + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter and other required resources during the runtime + * and request the GPU for the VM for the first time. + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_arbiter_pm_early_init(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ +void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Releases interrupts if needed (GPU is available) otherwise does nothing + */ +void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * The state machine function. Receives events and transitions states + * according the event received and the current state + */ +void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, + enum kbase_arbif_evt event); + +/** + * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for + * arbitration mode + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @suspend_handler: The handler code for how to handle a suspend + * that might occur + * + * This function handles a suspend event from the driver, + * communicating with the arbiter and waiting synchronously for the GPU + * to be granted again depending on the VM state. + * + * Return: 0 if success, 1 if failure due to system suspending/suspended + */ +int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler); + + +/** + * kbase_arbiter_pm_vm_stopped() - Handle stop event for the VM + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function handles a stop event for the VM. + * It will update the VM state and forward the stop event to the driver. + */ +void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); + +#endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild new file mode 100644 index 0000000..2449e80 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -0,0 +1,65 @@ +# +# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +BACKEND += \ + backend/gpu/mali_kbase_cache_policy_backend.c \ + backend/gpu/mali_kbase_device_hw.c \ + backend/gpu/mali_kbase_gpuprops_backend.c \ + backend/gpu/mali_kbase_irq_linux.c \ + backend/gpu/mali_kbase_instr_backend.c \ + backend/gpu/mali_kbase_js_backend.c \ + backend/gpu/mali_kbase_pm_backend.c \ + backend/gpu/mali_kbase_pm_driver.c \ + backend/gpu/mali_kbase_pm_metrics.c \ + backend/gpu/mali_kbase_pm_ca.c \ + backend/gpu/mali_kbase_pm_always_on.c \ + backend/gpu/mali_kbase_pm_coarse_demand.c \ + backend/gpu/mali_kbase_pm_policy.c \ + backend/gpu/mali_kbase_time.c \ + backend/gpu/mali_kbase_l2_mmu_config.c + +ifeq ($(MALI_USE_CSF),1) +# empty +else + BACKEND += \ + backend/gpu/mali_kbase_jm_as.c \ + backend/gpu/mali_kbase_debug_job_fault_backend.c \ + backend/gpu/mali_kbase_jm_hw.c \ + backend/gpu/mali_kbase_jm_rb.c +endif + +ifeq ($(MALI_CUSTOMER_RELEASE),0) +BACKEND += \ + backend/gpu/mali_kbase_pm_always_on_demand.c +endif + +ifeq ($(CONFIG_MALI_DEVFREQ),y) +BACKEND += \ + backend/gpu/mali_kbase_devfreq.c +endif + +ifeq ($(CONFIG_MALI_NO_MALI),y) + # Dummy model + BACKEND += backend/gpu/mali_kbase_model_dummy.c + BACKEND += backend/gpu/mali_kbase_model_linux.c + # HW error simulation + BACKEND += backend/gpu/mali_kbase_model_error_generator.c +endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h new file mode 100644 index 0000000..4a61f96 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h @@ -0,0 +1,31 @@ +/* + * + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend specific configuration + */ + +#ifndef _KBASE_BACKEND_CONFIG_H_ +#define _KBASE_BACKEND_CONFIG_H_ + +#endif /* _KBASE_BACKEND_CONFIG_H_ */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c new file mode 100644 index 0000000..7378bfd --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -0,0 +1,34 @@ +/* + * + * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "backend/gpu/mali_kbase_cache_policy_backend.h" +#include + +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode) +{ + kbdev->current_gpu_coherency_mode = mode; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) + kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h new file mode 100644 index 0000000..f78ada7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ +#define _KBASE_CACHE_POLICY_BACKEND_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" + +/** + * kbase_cache_set_coherency_mode() - Sets the system coherency mode + * in the GPU. + * @kbdev: Device pointer + * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE + */ +void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, + u32 mode); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c new file mode 100644 index 0000000..b05844e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -0,0 +1,164 @@ +/* + * + * (C) COPYRIGHT 2012-2015,2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include "mali_kbase_debug_job_fault.h" + +#ifdef CONFIG_DEBUG_FS + +/*GPU_CONTROL_REG(r)*/ +static int gpu_control_reg_snapshot[] = { + GPU_ID, + SHADER_READY_LO, + SHADER_READY_HI, + TILER_READY_LO, + TILER_READY_HI, + L2_READY_LO, + L2_READY_HI +}; + +/* JOB_CONTROL_REG(r) */ +static int job_control_reg_snapshot[] = { + JOB_IRQ_MASK, + JOB_IRQ_STATUS +}; + +/* JOB_SLOT_REG(n,r) */ +static int job_slot_reg_snapshot[] = { + JS_HEAD_LO, + JS_HEAD_HI, + JS_TAIL_LO, + JS_TAIL_HI, + JS_AFFINITY_LO, + JS_AFFINITY_HI, + JS_CONFIG, + JS_STATUS, + JS_HEAD_NEXT_LO, + JS_HEAD_NEXT_HI, + JS_AFFINITY_NEXT_LO, + JS_AFFINITY_NEXT_HI, + JS_CONFIG_NEXT +}; + +/*MMU_REG(r)*/ +static int mmu_reg_snapshot[] = { + MMU_IRQ_MASK, + MMU_IRQ_STATUS +}; + +/* MMU_AS_REG(n,r) */ +static int as_reg_snapshot[] = { + AS_TRANSTAB_LO, + AS_TRANSTAB_HI, + AS_TRANSCFG_LO, + AS_TRANSCFG_HI, + AS_MEMATTR_LO, + AS_MEMATTR_HI, + AS_FAULTSTATUS, + AS_FAULTADDRESS_LO, + AS_FAULTADDRESS_HI, + AS_STATUS +}; + +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range) +{ + int i, j; + int offset = 0; + int slot_number; + int as_number; + + if (kctx->reg_dump == NULL) + return false; + + slot_number = kctx->kbdev->gpu_props.num_job_slots; + as_number = kctx->kbdev->gpu_props.num_address_spaces; + + /* get the GPU control registers*/ + for (i = 0; i < sizeof(gpu_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + GPU_CONTROL_REG(gpu_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job control registers*/ + for (i = 0; i < sizeof(job_control_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_CONTROL_REG(job_control_reg_snapshot[i]); + offset += 2; + } + + /* get the Job Slot registers*/ + for (j = 0; j < slot_number; j++) { + for (i = 0; i < sizeof(job_slot_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + JOB_SLOT_REG(j, job_slot_reg_snapshot[i]); + offset += 2; + } + } + + /* get the MMU registers*/ + for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + offset += 2; + } + + /* get the Address space registers*/ + for (j = 0; j < as_number; j++) { + for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { + kctx->reg_dump[offset] = + MMU_AS_REG(j, as_reg_snapshot[i]); + offset += 2; + } + } + + WARN_ON(offset >= (reg_range*2/4)); + + /* set the termination flag*/ + kctx->reg_dump[offset] = REGISTER_DUMP_TERMINATION_FLAG; + kctx->reg_dump[offset + 1] = REGISTER_DUMP_TERMINATION_FLAG; + + dev_dbg(kctx->kbdev->dev, "kbase_job_fault_reg_snapshot_init:%d\n", + offset); + + return true; +} + +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx) +{ + int offset = 0; + + if (kctx->reg_dump == NULL) + return false; + + while (kctx->reg_dump[offset] != REGISTER_DUMP_TERMINATION_FLAG) { + kctx->reg_dump[offset+1] = + kbase_reg_read(kctx->kbdev, + kctx->reg_dump[offset]); + offset += 2; + } + return true; +} + + +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c new file mode 100644 index 0000000..2806f05 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -0,0 +1,731 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include +#include +#include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif + +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) +#include +#else /* Linux >= 3.13 */ +/* In 3.13 the OPP include header file, types, and functions were all + * renamed. Use the old filename for the include, and define the new names to + * the old, when an old kernel is detected. + */ +#include +#define dev_pm_opp opp +#define dev_pm_opp_get_voltage opp_get_voltage +#define dev_pm_opp_get_opp_count opp_get_opp_count +#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil +#define dev_pm_opp_find_freq_floor opp_find_freq_floor +#endif /* Linux >= 3.13 */ + +/** + * opp_translate - Translate nominal OPP frequency from devicetree into real + * frequency and core mask + * @kbdev: Device pointer + * @freq: Nominal frequency + * @core_mask: Pointer to u64 to store core mask to + * @freqs: Pointer to array of frequencies + * @volts: Pointer to array of voltages + * + * This function will only perform translation if an operating-points-v2-mali + * table is present in devicetree. If one is not present then it will return an + * untranslated frequency and all cores enabled. + */ +static void opp_translate(struct kbase_device *kbdev, unsigned long freq, + u64 *core_mask, unsigned long *freqs, unsigned long *volts) +{ + unsigned int i; + + for (i = 0; i < kbdev->num_opps; i++) { + if (kbdev->devfreq_table[i].opp_freq == freq) { + unsigned int j; + + *core_mask = kbdev->devfreq_table[i].core_mask; + for (j = 0; j < kbdev->nr_clocks; j++) { + freqs[j] = + kbdev->devfreq_table[i].real_freqs[j]; + volts[j] = + kbdev->devfreq_table[i].opp_volts[j]; + } + + break; + } + } + + /* If failed to find OPP, return all cores enabled + * and nominal frequency + */ + if (i == kbdev->num_opps) { + *core_mask = kbdev->gpu_props.props.raw_props.shader_present; + for (i = 0; i < kbdev->nr_clocks; i++) + freqs[i] = freq; + } +} + +static int +kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + unsigned long nominal_freq; + unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0}; + unsigned int i; + u64 core_mask; + + nominal_freq = *target_freq; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_lock(); +#endif + opp = devfreq_recommended_opp(dev, &nominal_freq, flags); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + if (IS_ERR_OR_NULL(opp)) { + dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + dev_pm_opp_put(opp); +#endif + + /* + * Only update if there is a change of frequency + */ + if (kbdev->current_nominal_freq == nominal_freq) { + *target_freq = nominal_freq; + return 0; + } + + opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); + +#ifdef CONFIG_REGULATOR + /* Regulators and clocks work in pairs: every clock has a regulator, + * and we never expect to have more regulators than clocks. + * + * We always need to increase the voltage before increasing + * the frequency of a regulator/clock pair, otherwise the clock + * wouldn't have enough power to perform the transition. + * + * It's always safer to decrease the frequency before decreasing + * voltage of a regulator/clock pair, otherwise the clock could have + * problems operating if it is deprived of the necessary power + * to sustain its current frequency (even if that happens for a short + * transition interval). + */ + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->regulators[i] && + kbdev->current_voltages[i] != volts[i] && + kbdev->current_freqs[i] < freqs[i]) { + int err; + + err = regulator_set_voltage(kbdev->regulators[i], + volts[i], volts[i]); + if (!err) { + kbdev->current_voltages[i] = volts[i]; + } else { + dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n", + err, volts[i]); + return err; + } + } + } +#endif + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->clocks[i]) { + int err; + + err = clk_set_rate(kbdev->clocks[i], freqs[i]); + if (!err) { + kbdev->current_freqs[i] = freqs[i]; + } else { + dev_err(dev, "Failed to set clock %lu (target %lu)\n", + freqs[i], *target_freq); + return err; + } + } + } + +#ifdef CONFIG_REGULATOR + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->regulators[i] && + kbdev->current_voltages[i] != volts[i] && + kbdev->current_freqs[i] > freqs[i]) { + int err; + + err = regulator_set_voltage(kbdev->regulators[i], + volts[i], volts[i]); + if (!err) { + kbdev->current_voltages[i] = volts[i]; + } else { + dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n", + err, volts[i]); + return err; + } + } + } +#endif + + kbase_devfreq_set_core_mask(kbdev, core_mask); + + *target_freq = nominal_freq; + kbdev->current_nominal_freq = nominal_freq; + kbdev->current_core_mask = core_mask; + + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)nominal_freq); + + return 0; +} + +void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq) +{ + unsigned long target_freq = freq; + + kbase_devfreq_target(kbdev->dev, &target_freq, 0); +} + +static int +kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + *freq = kbdev->current_nominal_freq; + + return 0; +} + +static int +kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbasep_pm_metrics diff; + + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->last_devfreq_metrics, &diff); + + stat->busy_time = diff.time_busy; + stat->total_time = diff.time_busy + diff.time_idle; + stat->current_frequency = kbdev->current_nominal_freq; + stat->private_data = NULL; + + return 0; +} + +static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, + struct devfreq_dev_profile *dp) +{ + int count; + int i = 0; + unsigned long freq; + struct dev_pm_opp *opp; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_lock(); +#endif + count = dev_pm_opp_get_opp_count(kbdev->dev); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + if (count < 0) + return count; + + dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), + GFP_KERNEL); + if (!dp->freq_table) + return -ENOMEM; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_lock(); +#endif + for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { + opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); + if (IS_ERR(opp)) + break; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) + dev_pm_opp_put(opp); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */ + + dp->freq_table[i] = freq; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) + rcu_read_unlock(); +#endif + + if (count != i) + dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", + count, i); + + dp->max_state = i; + + /* Have the lowest clock as suspend clock. + * It may be overridden by 'opp-mali-errata-1485982'. + */ + if (kbdev->pm.backend.gpu_clock_slow_down_wa) { + freq = 0; + opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq); + if (IS_ERR(opp)) { + dev_err(kbdev->dev, "failed to find slowest clock"); + return 0; + } + dev_info(kbdev->dev, "suspend clock %lu from slowest", freq); + kbdev->pm.backend.gpu_clock_suspend_freq = freq; + } + + return 0; +} + +static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; + + kfree(dp->freq_table); +} + +static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) +{ + kfree(kbdev->devfreq_table); +} + +static void kbase_devfreq_exit(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_devfreq_term_freq_table(kbdev); +} + +static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, + struct device_node *node) +{ + u64 freq = 0; + int err = 0; + + /* Check if this node is the opp entry having 'opp-mali-errata-1485982' + * to get the suspend clock, otherwise skip it. + */ + if (!of_property_read_bool(node, "opp-mali-errata-1485982")) + return; + + /* In kbase DevFreq, the clock will be read from 'opp-hz' + * and translated into the actual clock by opp_translate. + * + * In customer DVFS, the clock will be read from 'opp-hz-real' + * for clk driver. If 'opp-hz-real' does not exist, + * read from 'opp-hz'. + */ + if (IS_ENABLED(CONFIG_MALI_DEVFREQ)) + err = of_property_read_u64(node, "opp-hz", &freq); + else { + if (of_property_read_u64(node, "opp-hz-real", &freq)) + err = of_property_read_u64(node, "opp-hz", &freq); + } + + if (WARN_ON(err || !freq)) + return; + + kbdev->pm.backend.gpu_clock_suspend_freq = freq; + dev_info(kbdev->dev, + "suspend clock %llu by opp-mali-errata-1485982", freq); +} + +static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) + /* OPP table initialization requires at least the capability to get + * regulators and clocks from the device tree, as well as parsing + * arrays of unsigned integer values. + * + * The whole initialization process shall simply be skipped if the + * minimum capability is not available. + */ + return 0; +#else + struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node, + "operating-points-v2", 0); + struct device_node *node; + int i = 0; + int count; + u64 shader_present = kbdev->gpu_props.props.raw_props.shader_present; + + if (!opp_node) + return 0; + if (!of_device_is_compatible(opp_node, "operating-points-v2-mali")) + return 0; + + count = dev_pm_opp_get_opp_count(kbdev->dev); + kbdev->devfreq_table = kmalloc_array(count, + sizeof(struct kbase_devfreq_opp), GFP_KERNEL); + if (!kbdev->devfreq_table) + return -ENOMEM; + + for_each_available_child_of_node(opp_node, node) { + const void *core_count_p; + u64 core_mask, opp_freq, + real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + int err; +#ifdef CONFIG_REGULATOR + u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; +#endif + + /* Read suspend clock from opp table */ + if (kbdev->pm.backend.gpu_clock_slow_down_wa) + kbasep_devfreq_read_suspend_clock(kbdev, node); + + err = of_property_read_u64(node, "opp-hz", &opp_freq); + if (err) { + dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n", + err); + continue; + } + + +#if BASE_MAX_NR_CLOCKS_REGULATORS > 1 + err = of_property_read_u64_array(node, "opp-hz-real", + real_freqs, kbdev->nr_clocks); +#else + WARN_ON(kbdev->nr_clocks != 1); + err = of_property_read_u64(node, "opp-hz-real", real_freqs); +#endif + if (err < 0) { + dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n", + err); + continue; + } +#ifdef CONFIG_REGULATOR + err = of_property_read_u32_array(node, + "opp-microvolt", opp_volts, kbdev->nr_regulators); + if (err < 0) { + dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n", + err); + continue; + } +#endif + + if (of_property_read_u64(node, "opp-core-mask", &core_mask)) + core_mask = shader_present; + if (core_mask != shader_present && corestack_driver_control) { + + dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n", + opp_freq); + continue; + } + + core_count_p = of_get_property(node, "opp-core-count", NULL); + if (core_count_p) { + u64 remaining_core_mask = + kbdev->gpu_props.props.raw_props.shader_present; + int core_count = be32_to_cpup(core_count_p); + + core_mask = 0; + + for (; core_count > 0; core_count--) { + int core = ffs(remaining_core_mask); + + if (!core) { + dev_err(kbdev->dev, "OPP has more cores than GPU\n"); + return -ENODEV; + } + + core_mask |= (1ull << (core-1)); + remaining_core_mask &= ~(1ull << (core-1)); + } + } + + if (!core_mask) { + dev_err(kbdev->dev, "OPP has invalid core mask of 0\n"); + return -ENODEV; + } + + kbdev->devfreq_table[i].opp_freq = opp_freq; + kbdev->devfreq_table[i].core_mask = core_mask; + if (kbdev->nr_clocks > 0) { + int j; + + for (j = 0; j < kbdev->nr_clocks; j++) + kbdev->devfreq_table[i].real_freqs[j] = + real_freqs[j]; + } +#ifdef CONFIG_REGULATOR + if (kbdev->nr_regulators > 0) { + int j; + + for (j = 0; j < kbdev->nr_regulators; j++) + kbdev->devfreq_table[i].opp_volts[j] = + opp_volts[j]; + } +#endif + + dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n", + i, opp_freq, core_mask); + + i++; + } + + kbdev->num_opps = i; + + return 0; +#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + +static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type) +{ + const char *p; + + switch (type) { + case DEVFREQ_WORK_NONE: + p = "devfreq_none"; + break; + case DEVFREQ_WORK_SUSPEND: + p = "devfreq_suspend"; + break; + case DEVFREQ_WORK_RESUME: + p = "devfreq_resume"; + break; + default: + p = "Unknown devfreq_type"; + } + return p; +} + +static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) +{ + struct kbase_devfreq_queue_info *info = container_of(work, + struct kbase_devfreq_queue_info, work); + struct kbase_device *kbdev = container_of(info, struct kbase_device, + devfreq_queue); + unsigned long flags; + enum kbase_devfreq_work_type type, acted_type; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + type = kbdev->devfreq_queue.req_type; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + acted_type = kbdev->devfreq_queue.acted_type; + dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n", + kbase_devfreq_req_type_name(type), + kbase_devfreq_req_type_name(acted_type)); + switch (type) { + case DEVFREQ_WORK_SUSPEND: + case DEVFREQ_WORK_RESUME: + if (type != acted_type) { + if (type == DEVFREQ_WORK_RESUME) + devfreq_resume_device(kbdev->devfreq); + else + devfreq_suspend_device(kbdev->devfreq); + dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n", + kbase_devfreq_req_type_name(acted_type), + kbase_devfreq_req_type_name(type)); + kbdev->devfreq_queue.acted_type = type; + } + break; + default: + WARN_ON(1); + } +} + +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */ + +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, + enum kbase_devfreq_work_type work_type) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + unsigned long flags; + + WARN_ON(work_type == DEVFREQ_WORK_NONE); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->devfreq_queue.req_type = work_type; + queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", + kbase_devfreq_req_type_name(work_type)); +#endif +} + +static int kbase_devfreq_work_init(struct kbase_device *kbdev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE; + kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME; + + kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0); + if (!kbdev->devfreq_queue.workq) + return -ENOMEM; + + INIT_WORK(&kbdev->devfreq_queue.work, + kbase_devfreq_suspend_resume_worker); +#endif + return 0; +} + +static void kbase_devfreq_work_term(struct kbase_device *kbdev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + destroy_workqueue(kbdev->devfreq_queue.workq); +#endif +} + +int kbase_devfreq_init(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp; + int err; + unsigned int i; + + if (kbdev->nr_clocks == 0) { + dev_err(kbdev->dev, "Clock not available for devfreq\n"); + return -ENODEV; + } + + for (i = 0; i < kbdev->nr_clocks; i++) { + if (kbdev->clocks[i]) + kbdev->current_freqs[i] = + clk_get_rate(kbdev->clocks[i]); + else + kbdev->current_freqs[i] = 0; + } + kbdev->current_nominal_freq = kbdev->current_freqs[0]; + + dp = &kbdev->devfreq_profile; + + dp->initial_freq = kbdev->current_freqs[0]; + dp->polling_ms = 100; + dp->target = kbase_devfreq_target; + dp->get_dev_status = kbase_devfreq_status; + dp->get_cur_freq = kbase_devfreq_cur_freq; + dp->exit = kbase_devfreq_exit; + + if (kbase_devfreq_init_freq_table(kbdev, dp)) + return -EFAULT; + + if (dp->max_state > 0) { + /* Record the maximum frequency possible */ + kbdev->gpu_props.props.core_props.gpu_freq_khz_max = + dp->freq_table[0] / 1000; + }; + + err = kbase_devfreq_init_core_mask_table(kbdev); + if (err) { + kbase_devfreq_term_freq_table(kbdev); + return err; + } + + /* Initialise devfreq suspend/resume workqueue */ + err = kbase_devfreq_work_init(kbdev); + if (err) { + kbase_devfreq_term_freq_table(kbdev); + dev_err(kbdev->dev, "Devfreq initialization failed"); + return err; + } + + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, + "simple_ondemand", NULL); + if (IS_ERR(kbdev->devfreq)) { + err = PTR_ERR(kbdev->devfreq); + kbase_devfreq_work_term(kbdev); + kbase_devfreq_term_freq_table(kbdev); + return err; + } + + /* devfreq_add_device only copies a few of kbdev->dev's fields, so + * set drvdata explicitly so IPA models can access kbdev. */ + dev_set_drvdata(&kbdev->devfreq->dev, kbdev); + + err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); + if (err) { + dev_err(kbdev->dev, + "Failed to register OPP notifier (%d)\n", err); + goto opp_notifier_failed; + } + +#ifdef CONFIG_DEVFREQ_THERMAL + err = kbase_ipa_init(kbdev); + if (err) { + dev_err(kbdev->dev, "IPA initialization failed\n"); + goto cooling_failed; + } + + kbdev->devfreq_cooling = of_devfreq_cooling_register_power( + kbdev->dev->of_node, + kbdev->devfreq, + &kbase_ipa_power_model_ops); + if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { + err = PTR_ERR(kbdev->devfreq_cooling); + dev_err(kbdev->dev, + "Failed to register cooling device (%d)\n", + err); + goto cooling_failed; + } +#endif + + return 0; + +#ifdef CONFIG_DEVFREQ_THERMAL +cooling_failed: + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); +#endif /* CONFIG_DEVFREQ_THERMAL */ +opp_notifier_failed: + if (devfreq_remove_device(kbdev->devfreq)) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + kbase_devfreq_work_term(kbdev); + + return err; +} + +void kbase_devfreq_term(struct kbase_device *kbdev) +{ + int err; + + dev_dbg(kbdev->dev, "Term Mali devfreq\n"); + +#ifdef CONFIG_DEVFREQ_THERMAL + if (kbdev->devfreq_cooling) + devfreq_cooling_unregister(kbdev->devfreq_cooling); + + kbase_ipa_term(kbdev); +#endif + + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + + err = devfreq_remove_device(kbdev->devfreq); + if (err) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + kbase_devfreq_term_core_mask_table(kbdev); + + kbase_devfreq_work_term(kbdev); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h new file mode 100644 index 0000000..8c976b2 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h @@ -0,0 +1,46 @@ +/* + * + * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _BASE_DEVFREQ_H_ +#define _BASE_DEVFREQ_H_ + +int kbase_devfreq_init(struct kbase_device *kbdev); + +void kbase_devfreq_term(struct kbase_device *kbdev); + +/** + * kbase_devfreq_force_freq - Set GPU frequency on L2 power on/off. + * @kbdev: Device pointer + * @freq: GPU frequency in HZ to be set when + * MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is enabled + */ +void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); + +/** + * kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq. + * @kbdev: Device pointer + * @work_type: The type of the devfreq work item, i.e. suspend or resume + */ +void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, + enum kbase_devfreq_work_type work_type); + +#endif /* _BASE_DEVFREQ_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h new file mode 100644 index 0000000..2e1d011 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h @@ -0,0 +1,127 @@ +/* + * + * (C) COPYRIGHT 2014,2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Backend-specific HW access device APIs + */ + +#ifndef _KBASE_DEVICE_INTERNAL_H_ +#define _KBASE_DEVICE_INTERNAL_H_ + +/** + * kbase_reg_write - write to GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @value: Value to write + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + */ +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); + +/** + * kbase_reg_read - read from GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + * + * Return: Value in desired register + */ +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); + +/** + * kbase_is_gpu_lost() - Has the GPU been lost. + * @kbdev: Kbase device pointer + * + * This function will return true if the GPU has been lost. + * When this happens register reads will be zero. A zero GPU_ID is + * invalid so this is used to detect GPU_LOST + * + * Return: True if GPU LOST + */ +bool kbase_is_gpu_lost(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. This function will + * take hwaccess_lock. + */ +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean_nolock - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. hwaccess_lock + * must be held by the caller. + */ +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish + * @kbdev: Kbase device + * + * This function will take hwaccess_lock, and may sleep. + */ +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache + * cleaning to finish + * @kbdev: Kbase device + * @wait_timeout_ms: Time, in milli seconds, to wait for cache clean to complete. + * + * This function will take hwaccess_lock, and may sleep. This is supposed to be + * called from paths (like GPU reset) where an indefinite wait for the completion + * of cache clean operation can cause deadlock, as the operation may never + * complete. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms); + +/** + * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is + * finished. Would also be called after + * the GPU reset. + * @kbdev: Kbase device + * + * Caller must hold the hwaccess_lock. + */ +void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); + +/** + * kbase_gpu_interrupt - GPU interrupt handler + * @kbdev: Kbase device pointer + * @val: The value of the GPU IRQ status register which triggered the call + * + * This function is called from the interrupt handler when a GPU irq is to be + * handled. + */ +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); + +#endif /* _KBASE_DEVICE_INTERNAL_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c new file mode 100644 index 0000000..352afa1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel property query backend APIs + */ + +#include +#include +#include +#include + +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + int i; + struct kbase_gpuprops_regdump registers; + + /* Fill regdump with the content of the relevant registers */ + registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + + registers.l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES)); + registers.core_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CORE_FEATURES)); + registers.tiler_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_FEATURES)); + registers.mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES)); + registers.mmu_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MMU_FEATURES)); + registers.as_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(AS_PRESENT)); + registers.js_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_PRESENT)); + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + registers.js_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_FEATURES_REG(i))); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + registers.texture_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i))); + + registers.thread_max_threads = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_THREADS)); + registers.thread_max_workgroup_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE)); + registers.thread_max_barrier_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE)); + registers.thread_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_FEATURES)); + registers.thread_tls_alloc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_TLS_ALLOC)); + + registers.shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO)); + registers.shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI)); + + registers.tiler_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_LO)); + registers.tiler_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_HI)); + + registers.l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO)); + registers.l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI)); + + registers.stack_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_LO)); + registers.stack_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(STACK_PRESENT_HI)); + + if (!kbase_is_gpu_lost(kbdev)) { + *regdump = registers; + return 0; + } else + return -EIO; +} + +int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { + u32 coherency_features; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + regdump->coherency_features = coherency_features; + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + } else { + /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ + regdump->coherency_features = + COHERENCY_FEATURE_BIT(COHERENCY_NONE) | + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + } + + return 0; +} + +int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + u32 l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + regdump->l2_features = l2_features; + } + + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c new file mode 100644 index 0000000..8b320c7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -0,0 +1,411 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * GPU backend instrumentation APIs. + */ + +#include +#include +#include +#include +#include + + +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable) +{ + unsigned long flags; + int err = -EINVAL; + u32 irq_mask; + u32 prfcnt_config; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* alignment failure */ + if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1))) + goto out_err; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is already enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out_err; + } + + /* Enable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | + PRFCNT_SAMPLE_COMPLETED); + + /* In use, this context is the owner */ + kbdev->hwcnt.kctx = kctx; + /* Remember the dump address so we can reprogram it later */ + kbdev->hwcnt.addr = enable->dump_buffer; + kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Configure */ + prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + if (kbdev->hwcnt.backend.use_secondary_override) +#else + if (enable->use_secondary) +#endif + prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_OFF); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + enable->dump_buffer & 0xFFFFFFFF); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + enable->dump_buffer >> 32); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), + enable->jm_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + enable->shader_bm); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), + enable->mmu_l2_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + enable->tiler_bm); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + err = 0; + + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + return err; + out_err: + return err; +} + +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) +{ + unsigned long flags, pm_flags; + int err = -EINVAL; + u32 irq_mask; + struct kbase_device *kbdev = kctx->kbdev; + + while (1) { + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is not enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + goto out; + } + + if (kbdev->hwcnt.kctx != kctx) { + /* Instrumentation has been setup for another context */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + goto out; + } + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) + break; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + /* Ongoing dump/setup - wait for its completion */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + } + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + kbdev->hwcnt.backend.triggered = 0; + + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~PRFCNT_SAMPLE_COMPLETED); + + /* Disable the counters */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0); + + kbdev->hwcnt.kctx = NULL; + kbdev->hwcnt.addr = 0ULL; + kbdev->hwcnt.addr_bytes = 0ULL; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + kctx); + + err = 0; + + out: + return err; +} + +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.kctx != kctx) { + /* The instrumentation has been setup for another context */ + goto unlock; + } + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { + /* HW counters are disabled or another dump is ongoing, or we're + * resetting */ + goto unlock; + } + + kbdev->hwcnt.backend.triggered = 0; + + /* Mark that we're dumping - the PF handler can signal that we faulted + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + + + /* Reconfigure the dump address */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + kbdev->hwcnt.addr & 0xFFFFFFFF); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + kbdev->hwcnt.addr >> 32); + + /* Start dumping */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, + kbdev->hwcnt.addr); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_SAMPLE); + + dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + + err = 0; + + unlock: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success) +{ + unsigned long flags; + bool complete = false; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { + *success = true; + complete = true; + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + *success = false; + complete = true; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return complete; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + +void kbasep_cache_clean_worker(struct work_struct *data) +{ + struct kbase_device *kbdev; + unsigned long flags, pm_flags; + + kbdev = container_of(data, struct kbase_device, + hwcnt.backend.cache_clean_work); + + spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* Clean and invalidate the caches so we're sure the mmu tables for the + * dump buffer is valid. + */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); + + kbase_gpu_wait_cache_clean(kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + + +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { + if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else { + int ret; + /* Always clean and invalidate the cache after a successful dump + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + } + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + int err; + + /* Wait for dump & cache clean to complete */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + err = -EINVAL; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } else { + /* Dump done */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_IDLE); + err = 0; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return err; +} + +int kbase_instr_hwcnt_clear(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + /* Check it's the context previously set up and we're not already + * dumping */ + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_IDLE) + goto out; + + /* Clear the counters */ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_CLEAR); + + err = 0; + +out: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + +int kbase_instr_backend_init(struct kbase_device *kbdev) +{ + int ret = 0; + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + + init_waitqueue_head(&kbdev->hwcnt.backend.wait); + INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, + kbasep_cache_clean_worker); + + + kbdev->hwcnt.backend.triggered = 0; + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbdev->hwcnt.backend.use_secondary_override = false; +#endif + + kbdev->hwcnt.backend.cache_clean_wq = + alloc_workqueue("Mali cache cleaning workqueue", 0, 1); + if (NULL == kbdev->hwcnt.backend.cache_clean_wq) + ret = -EINVAL; + + return ret; +} + +void kbase_instr_backend_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); +} + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->hwcnt.backend.use_secondary_override); +} +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h new file mode 100644 index 0000000..9930968 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific instrumentation definitions + */ + +#ifndef _KBASE_INSTR_DEFS_H_ +#define _KBASE_INSTR_DEFS_H_ + +/* + * Instrumentation State Machine States + */ +enum kbase_instr_state { + /* State where instrumentation is not active */ + KBASE_INSTR_STATE_DISABLED = 0, + /* State machine is active and ready for a command. */ + KBASE_INSTR_STATE_IDLE, + /* Hardware is currently dumping a frame. */ + KBASE_INSTR_STATE_DUMPING, + /* We've requested a clean to occur on a workqueue */ + KBASE_INSTR_STATE_REQUEST_CLEAN, + /* An error has occured during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT +}; + +/* Structure used for instrumentation and HW counters dumping */ +struct kbase_instr_backend { + wait_queue_head_t wait; + int triggered; +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + bool use_secondary_override; +#endif + + enum kbase_instr_state state; + struct workqueue_struct *cache_clean_wq; + struct work_struct cache_clean_work; +}; + +#endif /* _KBASE_INSTR_DEFS_H_ */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h new file mode 100644 index 0000000..2254b9f --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Backend-specific HW access instrumentation APIs + */ + +#ifndef _KBASE_INSTR_INTERNAL_H_ +#define _KBASE_INSTR_INTERNAL_H_ + +/** + * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning + * @data: a &struct work_struct + */ +void kbasep_cache_clean_worker(struct work_struct *data); + +/** + * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); + +#endif /* _KBASE_INSTR_INTERNAL_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h new file mode 100644 index 0000000..ca3c048 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend specific IRQ APIs + */ + +#ifndef _KBASE_IRQ_INTERNAL_H_ +#define _KBASE_IRQ_INTERNAL_H_ + +int kbase_install_interrupts(struct kbase_device *kbdev); + +void kbase_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed + * execution + * @kbdev: The kbase device + */ +void kbase_synchronize_irqs(struct kbase_device *kbdev); + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev); + +#endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c new file mode 100644 index 0000000..8696c6a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -0,0 +1,499 @@ +/* + * + * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include + +#if !defined(CONFIG_MALI_NO_MALI) + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +static void *kbase_tag(void *ptr, u32 tag) +{ + return (void *)(((uintptr_t) ptr) | tag); +} + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + +static irqreturn_t kbase_job_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + + if (!val) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_job_done(kbdev, val); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + atomic_inc(&kbdev->faults_pending); + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) { + atomic_dec(&kbdev->faults_pending); + return IRQ_NONE; + } + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_mmu_interrupt(kbdev, val); + + atomic_dec(&kbdev->faults_pending); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +static irq_handler_t kbase_handler_table[] = { + [JOB_IRQ_TAG] = kbase_job_irq_handler, + [MMU_IRQ_TAG] = kbase_mmu_irq_handler, + [GPU_IRQ_TAG] = kbase_gpu_irq_handler, +}; + +#ifdef CONFIG_MALI_DEBUG +#define JOB_IRQ_HANDLER JOB_IRQ_TAG +#define MMU_IRQ_HANDLER MMU_IRQ_TAG +#define GPU_IRQ_HANDLER GPU_IRQ_TAG + +/** + * kbase_gpu_irq_test_handler - Variant (for test) of kbase_gpu_irq_handler() + * @irq: IRQ number + * @data: Data associated with this IRQ (i.e. kbdev) + * @val: Value of the GPU_CONTROL_REG(GPU_IRQ_STATUS) + * + * Handle the GPU device interrupt source requests reflected in the + * given source bit-pattern. The test code caller is responsible for + * undertaking the required device power maintenace. + * + * Return: IRQ_HANDLED if the requests are from the GPU device, + * IRQ_NONE otherwise + */ +irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val) +{ + struct kbase_device *kbdev = kbase_untag(data); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_irq_test_handler); + +/** + * kbase_set_custom_irq_handler - Set a custom IRQ handler + * @kbdev: Device for which the handler is to be registered + * @custom_handler: Handler to be registered + * @irq_type: Interrupt type + * + * Registers given interrupt handler for requested interrupt type + * In the case where irq handler is not specified, the default handler shall be + * registered + * + * Return: 0 case success, error code otherwise + */ +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type) +{ + int result = 0; + irq_handler_t requested_irq_handler = NULL; + + KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && + (GPU_IRQ_HANDLER >= irq_type)); + + /* Release previous handler */ + if (kbdev->irqs[irq_type].irq) + free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + + requested_irq_handler = (NULL != custom_handler) ? custom_handler : + kbase_handler_table[irq_type]; + + if (0 != request_irq(kbdev->irqs[irq_type].irq, + requested_irq_handler, + kbdev->irqs[irq_type].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { + result = -EINVAL; + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[irq_type].irq, irq_type); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + +/* test correct interrupt assigment and reception by cpu */ +struct kbasep_irq_test { + struct hrtimer timer; + wait_queue_head_t wait; + int triggered; + u32 timeout; +}; + +static struct kbasep_irq_test kbasep_irq_test_data; + +#define IRQ_TEST_TIMEOUT 500 + +static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS)); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + + return IRQ_HANDLED; +} + +static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) +{ + struct kbasep_irq_test *test_data = container_of(timer, + struct kbasep_irq_test, timer); + + test_data->timeout = 1; + test_data->triggered = 1; + wake_up(&test_data->wait); + return HRTIMER_NORESTART; +} + +static int kbasep_common_test_interrupt( + struct kbase_device * const kbdev, u32 tag) +{ + int err = 0; + irq_handler_t test_handler; + + u32 old_mask_val; + u16 mask_offset; + u16 rawstat_offset; + + switch (tag) { + case JOB_IRQ_TAG: + test_handler = kbase_job_irq_test_handler; + rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + break; + case MMU_IRQ_TAG: + test_handler = kbase_mmu_irq_test_handler; + rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_REG(MMU_IRQ_MASK); + break; + case GPU_IRQ_TAG: + /* already tested by pm_driver - bail out */ + default: + return 0; + } + + /* store old mask */ + old_mask_val = kbase_reg_read(kbdev, mask_offset); + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0); + + if (kbdev->irqs[tag].irq) { + /* release original handler and install test handler */ + if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { + err = -EINVAL; + } else { + kbasep_irq_test_data.timeout = 0; + hrtimer_init(&kbasep_irq_test_data.timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = + kbasep_test_interrupt_timeout; + + /* trigger interrupt */ + kbase_reg_write(kbdev, mask_offset, 0x1); + kbase_reg_write(kbdev, rawstat_offset, 0x1); + + hrtimer_start(&kbasep_irq_test_data.timer, + HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), + HRTIMER_MODE_REL); + + wait_event(kbasep_irq_test_data.wait, + kbasep_irq_test_data.triggered != 0); + + if (kbasep_irq_test_data.timeout != 0) { + dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } else { + dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", + kbdev->irqs[tag].irq, tag); + } + + hrtimer_cancel(&kbasep_irq_test_data.timer); + kbasep_irq_test_data.triggered = 0; + + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0); + + /* release test handler */ + free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); + } + + /* restore original interrupt */ + if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], + kbdev->irqs[tag].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { + dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } + } + /* restore old mask */ + kbase_reg_write(kbdev, mask_offset, old_mask_val); + + return err; +} + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev) +{ + int err; + + init_waitqueue_head(&kbasep_irq_test_data.wait); + kbasep_irq_test_data.triggered = 0; + + /* A suspend won't happen during startup/insmod */ + kbase_pm_context_active(kbdev); + + err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); + + out: + kbase_pm_context_idle(kbdev); + + return err; +} +#endif /* CONFIG_MALI_DEBUG */ + +int kbase_install_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + int err; + u32 i; + + for (i = 0; i < nr; i++) { + err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], + kbdev->irqs[i].flags | IRQF_SHARED, + dev_name(kbdev->dev), + kbase_tag(kbdev, i)); + if (err) { + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[i].irq, i); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + goto release; + } + } + + return 0; + + release: + while (i-- > 0) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + + return err; +} + +void kbase_release_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } +} + +void kbase_synchronize_irqs(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + synchronize_irq(kbdev->irqs[i].irq); + } +} + +KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); + +#endif /* !defined(CONFIG_MALI_NO_MALI) */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c new file mode 100644 index 0000000..bb4f548 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c @@ -0,0 +1,243 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register backend context / address space management + */ + +#include +#include +#include + +/** + * assign_and_activate_kctx_addr_space - Assign an AS to a context + * @kbdev: Kbase device + * @kctx: Kbase context + * @current_as: Address Space to assign + * + * Assign an Address Space (AS) to a context, and add the context to the Policy. + * + * This includes + * setting up the global runpool_irq structure and the context on the AS, + * Activating the MMU on the AS, + * Allowing jobs to be submitted on the AS. + * + * Context: + * kbasep_js_kctx_info.jsctx_mutex held, + * kbasep_js_device_data.runpool_mutex held, + * AS transaction mutex held, + * Runpool IRQ lock held + */ +static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_as *current_as) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Attribute handling */ + kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); + + /* Allow it to run jobs */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + + kbase_js_runpool_inc_context_count(kbdev, kctx); +} + +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + int i; + + if (kbdev->hwaccess.active_kctx[js] == kctx) { + /* Context is already active */ + return true; + } + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + if (kbdev->as_to_kctx[i] == kctx) { + /* Context already has ASID - mark as active */ + return true; + } + } + + /* Context does not have address space assigned */ + return false; +} + +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + int as_nr = kctx->as_nr; + + if (as_nr == KBASEP_AS_NR_INVALID) { + WARN(1, "Attempting to release context without ASID\n"); + return; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (atomic_read(&kctx->refcount) != 1) { + WARN(1, "Attempting to release active ASID\n"); + return; + } + + kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); + + kbase_ctx_sched_release_ctx(kctx); + kbase_js_runpool_dec_context_count(kbdev, kctx); +} + +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +} + +int kbase_backend_find_and_release_free_address_space( + struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + int i; + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbasep_js_kctx_info *as_js_kctx_info; + struct kbase_context *as_kctx; + + as_kctx = kbdev->as_to_kctx[i]; + as_js_kctx_info = &as_kctx->jctx.sched_info; + + /* Don't release privileged or active contexts, or contexts with + * jobs running. + * Note that a context will have at least 1 reference (which + * was previously taken by kbasep_js_schedule_ctx()) until + * descheduled. + */ + if (as_kctx && !kbase_ctx_flag(as_kctx, KCTX_PRIVILEGED) && + atomic_read(&as_kctx->refcount) == 1) { + if (!kbase_ctx_sched_inc_refcount_nolock(as_kctx)) { + WARN(1, "Failed to retain active context\n"); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; + } + + kbasep_js_clear_submit_allowed(js_devdata, as_kctx); + + /* Drop and retake locks to take the jsctx_mutex on the + * context we're about to release without violating lock + * ordering + */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + + /* Release context from address space */ + mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); + + if (!kbase_ctx_flag(as_kctx, KCTX_SCHEDULED)) { + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, + as_kctx, + true); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + return i; + } + + /* Context was retained while locks were dropped, + * continue looking for free AS */ + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; +} + +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_as *new_address_space = NULL; + int js; + + js_devdata = &kbdev->js_data; + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == kctx) { + WARN(1, "Context is already scheduled in\n"); + return false; + } + } + + new_address_space = &kbdev->as[as_nr]; + + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); + + if (kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) { + /* We need to retain it to keep the corresponding address space + */ + kbase_ctx_sched_retain_ctx_refcount(kctx); + } + + return true; +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h new file mode 100644 index 0000000..7cda61a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -0,0 +1,111 @@ +/* + * + * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific definitions + */ + +#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ +#define _KBASE_HWACCESS_GPU_DEFS_H_ + +/* SLOT_RB_SIZE must be < 256 */ +#define SLOT_RB_SIZE 2 +#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + +/** + * struct rb_entry - Ringbuffer entry + * @katom: Atom associated with this entry + */ +struct rb_entry { + struct kbase_jd_atom *katom; +}; + +/** + * struct slot_rb - Slot ringbuffer + * @entries: Ringbuffer entries + * @last_context: The last context to submit a job on this slot + * @read_idx: Current read index of buffer + * @write_idx: Current write index of buffer + * @job_chain_flag: Flag used to implement jobchain disambiguation + */ +struct slot_rb { + struct rb_entry entries[SLOT_RB_SIZE]; + + struct kbase_context *last_context; + + u8 read_idx; + u8 write_idx; + + u8 job_chain_flag; +}; + +/** + * struct kbase_backend_data - GPU backend specific data for HW access layer + * @slot_rb: Slot ringbuffers + * @scheduling_timer: The timer tick used for rescheduling jobs + * @timer_running: Is the timer running? The runpool_mutex must be + * held whilst modifying this. + * @suspend_timer: Is the timer suspended? Set when a suspend + * occurs and cleared on resume. The runpool_mutex + * must be held whilst modifying this. + * @reset_gpu: Set to a KBASE_RESET_xxx value (see comments) + * @reset_workq: Work queue for performing the reset + * @reset_work: Work item for performing the reset + * @reset_wait: Wait event signalled when the reset is complete + * @reset_timer: Timeout for soft-stops before the reset + * @timeouts_updated: Have timeout values just been updated? + * + * The hwaccess_lock (a spinlock) must be held when accessing this structure + */ +struct kbase_backend_data { + struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; + + struct hrtimer scheduling_timer; + + bool timer_running; + bool suspend_timer; + + atomic_t reset_gpu; + +/* The GPU reset isn't pending */ +#define KBASE_RESET_GPU_NOT_PENDING 0 +/* kbase_prepare_to_reset_gpu has been called */ +#define KBASE_RESET_GPU_PREPARED 1 +/* kbase_reset_gpu has been called - the reset will now definitely happen + * within the timeout period */ +#define KBASE_RESET_GPU_COMMITTED 2 +/* The GPU reset process is currently occuring (timeout has expired or + * kbasep_try_reset_gpu_early was called) */ +#define KBASE_RESET_GPU_HAPPENING 3 +/* Reset the GPU silently, used when resetting the GPU as part of normal + * behavior (e.g. when exiting protected mode). */ +#define KBASE_RESET_GPU_SILENT 4 + struct workqueue_struct *reset_workq; + struct work_struct reset_work; + wait_queue_head_t reset_wait; + struct hrtimer reset_timer; + + bool timeouts_updated; +}; + +#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c new file mode 100644 index 0000000..fa6bc83 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -0,0 +1,1421 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel job manager APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); + +static u64 kbase_job_write_affinity(struct kbase_device *kbdev, + base_jd_core_req core_req, + int js) +{ + u64 affinity; + + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == + BASE_JD_REQ_T) { + /* Tiler-only atom */ + /* If the hardware supports XAFFINITY then we'll only enable + * the tiler (which is the default so this is a no-op), + * otherwise enable shader core 0. + */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) + affinity = 1; + else + affinity = 0; + } else if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + struct mali_base_gpu_coherent_group_info *coherency_info = + &kbdev->gpu_props.props.coherency_info; + + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + + /* JS2 on a dual core group system targets core group 1. All + * other cases target core group 0. + */ + if (js == 2 && num_core_groups > 1) + affinity &= coherency_info->group[1].core_mask; + else + affinity &= coherency_info->group[0].core_mask; + } else { + /* Use all cores */ + affinity = kbdev->pm.backend.shaders_avail & + kbdev->pm.debug_core_mask[js]; + } + + if (unlikely(!affinity)) { +#ifdef CONFIG_MALI_DEBUG + u64 shaders_ready = + kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + WARN_ON(!(shaders_ready & kbdev->pm.backend.shaders_avail)); +#endif + + affinity = kbdev->pm.backend.shaders_avail; + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), + affinity & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), + affinity >> 32); + + return affinity; +} + +/** + * select_job_chain() - Select which job chain to submit to the GPU + * @katom: Pointer to the atom about to be submitted to the GPU + * + * Selects one of the fragment job chains attached to the special atom at the + * end of a renderpass, or returns the address of the single job chain attached + * to any other type of atom. + * + * Which job chain is selected depends upon whether the tiling phase of the + * renderpass completed normally or was soft-stopped because it used too + * much memory. It also depends upon whether one of the fragment job chains + * has already been run as part of the same renderpass. + * + * Return: GPU virtual address of the selected job chain + */ +static u64 select_job_chain(struct kbase_jd_atom *katom) +{ + struct kbase_context *const kctx = katom->kctx; + u64 jc = katom->jc; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + return jc; + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + * If the end-of-renderpass atom is running with as-yet indeterminate + * OOM state then assume that the start atom was not soft-stopped. + */ + switch (rp->state) { + case KBASE_JD_RP_OOM: + /* Tiling ran out of memory. + * Start of incremental rendering, used once. + */ + jc = katom->jc_fragment.norm_read_forced_write; + break; + case KBASE_JD_RP_START: + case KBASE_JD_RP_PEND_OOM: + /* Tiling completed successfully first time. + * Single-iteration rendering, used once. + */ + jc = katom->jc_fragment.norm_read_norm_write; + break; + case KBASE_JD_RP_RETRY_OOM: + /* Tiling ran out of memory again. + * Continuation of incremental rendering, used as + * many times as required. + */ + jc = katom->jc_fragment.forced_read_forced_write; + break; + case KBASE_JD_RP_RETRY: + case KBASE_JD_RP_RETRY_PEND_OOM: + /* Tiling completed successfully this time. + * End of incremental rendering, used once. + */ + jc = katom->jc_fragment.forced_read_norm_write; + break; + default: + WARN_ON(1); + break; + } + + dev_dbg(kctx->kbdev->dev, + "Selected job chain 0x%llx for end atom %p in state %d\n", + jc, (void *)katom, (int)rp->state); + + katom->jc = jc; + return jc; +} + +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js) +{ + struct kbase_context *kctx; + u32 cfg; + u64 const jc_head = select_job_chain(katom); + u64 affinity; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + /* Command register must be available */ + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + jc_head, (void *)katom); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), + jc_head & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), + jc_head >> 32); + + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on + * start */ + cfg = kctx->as_nr; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) + cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; + else + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + + if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && + !(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET)) + cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; + else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + cfg |= JS_CONFIG_END_FLUSH_CLEAN; + else + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + + cfg |= JS_CONFIG_THREAD_PRI(8); + + if ((katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED) || + (katom->core_req & BASE_JD_REQ_END_RENDERPASS)) + cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { + cfg |= JS_CONFIG_JOB_CHAIN_FLAG; + katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + true; + } else { + katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + false; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), + katom->flush_id); + + /* Write an approximate start timestamp. + * It's approximate because there might be a job in the HEAD register. + */ + katom->start_timestamp = ktime_get(); + + /* GO ! */ + dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", + katom, kctx, js, jc_head); + + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, + (u32)affinity); + + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx, + js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START); + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head, + affinity, cfg); + KBASE_TLSTREAM_TL_RET_CTX_LPU( + kbdev, + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_RET_ATOM_LPU( + kbdev, + katom, + &kbdev->gpu_props.props.raw_props.js_features[js], + "ctx_nr,atom_nr"); +#ifdef CONFIG_GPU_TRACEPOINTS + if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { + /* If this is the only job on the slot, trace it as starting */ + char js_string[16]; + + trace_gpu_sched_switch( + kbasep_make_job_slot_string(js, js_string, + sizeof(js_string)), + ktime_to_ns(katom->start_timestamp), + (u32)katom->kctx->id, 0, katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; + } +#endif + + trace_sysgraph_gpu(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom), js); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_START); +} + +/** + * kbasep_job_slot_update_head_start_timestamp - Update timestamp + * @kbdev: kbase device + * @js: job slot + * @end_timestamp: timestamp + * + * Update the start_timestamp of the job currently in the HEAD, based on the + * fact that we got an IRQ for the previous set of completed jobs. + * + * The estimate also takes into account the time the job was submitted, to + * work out the best estimate (which might still result in an over-estimate to + * the calculated time spent) + */ +static void kbasep_job_slot_update_head_start_timestamp( + struct kbase_device *kbdev, + int js, + ktime_t end_timestamp) +{ + ktime_t timestamp_diff; + struct kbase_jd_atom *katom; + + /* Checking the HEAD position for the job slot */ + katom = kbase_gpu_inspect(kbdev, js, 0); + if (katom != NULL) { + timestamp_diff = ktime_sub(end_timestamp, + katom->start_timestamp); + if (ktime_to_ns(timestamp_diff) >= 0) { + /* Only update the timestamp if it's a better estimate + * than what's currently stored. This is because our + * estimate that accounts for the throttle time may be + * too much of an overestimate */ + katom->start_timestamp = end_timestamp; + } + } +} + +/** + * kbasep_trace_tl_event_lpu_softstop - Call event_lpu_softstop timeline + * tracepoint + * @kbdev: kbase device + * @js: job slot + * + * Make a tracepoint call to the instrumentation module informing that + * softstop happened on given lpu (job slot). + */ +static void kbasep_trace_tl_event_lpu_softstop(struct kbase_device *kbdev, + int js) +{ + KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( + kbdev, + &kbdev->gpu_props.props.raw_props.js_features[js]); +} + +void kbase_job_done(struct kbase_device *kbdev, u32 done) +{ + int i; + u32 count = 0; + ktime_t end_timestamp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_DEBUG_ASSERT(kbdev); + + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ, NULL, NULL, 0, done); + + end_timestamp = ktime_get(); + + while (done) { + u32 failed = done >> 16; + + /* treat failed slots as finished slots */ + u32 finished = (done & 0xFFFF) | failed; + + /* Note: This is inherently unfair, as we always check + * for lower numbered interrupts before the higher + * numbered ones.*/ + i = ffs(finished) - 1; + KBASE_DEBUG_ASSERT(i >= 0); + + do { + int nr_done; + u32 active; + u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ + u64 job_tail = 0; + + if (failed & (1u << i)) { + /* read out the job slot status code if the job + * slot reported failure */ + completion_code = kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_STATUS)); + + if (completion_code == BASE_JD_EVENT_STOPPED) { + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( + kbdev, NULL, + i, 0, TL_JS_EVENT_SOFT_STOP); + + kbasep_trace_tl_event_lpu_softstop( + kbdev, i); + + /* Soft-stopped job - read the value of + * JS_TAIL so that the job chain can + * be resumed */ + job_tail = (u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_LO)) | + ((u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_HI)) + << 32); + } else if (completion_code == + BASE_JD_EVENT_NOT_STARTED) { + /* PRLAM-10673 can cause a TERMINATED + * job to come back as NOT_STARTED, but + * the error interrupt helps us detect + * it */ + completion_code = + BASE_JD_EVENT_TERMINATED; + } + + kbase_gpu_irq_evict(kbdev, i, completion_code); + + /* Some jobs that encounter a BUS FAULT may result in corrupted + * state causing future jobs to hang. Reset GPU before + * allowing any other jobs on the slot to continue. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { + if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } + } + } + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), + done & ((1 << i) | (1 << (i + 16)))); + active = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_JS_STATE)); + + if (((active >> i) & 1) == 0 && + (((done >> (i + 16)) & 1) == 0)) { + /* There is a potential race we must work + * around: + * + * 1. A job slot has a job in both current and + * next registers + * 2. The job in current completes + * successfully, the IRQ handler reads + * RAWSTAT and calls this function with the + * relevant bit set in "done" + * 3. The job in the next registers becomes the + * current job on the GPU + * 4. Sometime before the JOB_IRQ_CLEAR line + * above the job on the GPU _fails_ + * 5. The IRQ_CLEAR clears the done bit but not + * the failed bit. This atomically sets + * JOB_IRQ_JS_STATE. However since both jobs + * have now completed the relevant bits for + * the slot are set to 0. + * + * If we now did nothing then we'd incorrectly + * assume that _both_ jobs had completed + * successfully (since we haven't yet observed + * the fail bit being set in RAWSTAT). + * + * So at this point if there are no active jobs + * left we check to see if RAWSTAT has a failure + * bit set for the job slot. If it does we know + * that there has been a new failure that we + * didn't previously know about, so we make sure + * that we record this in active (but we wait + * for the next loop to deal with it). + * + * If we were handling a job failure (i.e. done + * has the relevant high bit set) then we know + * that the value read back from + * JOB_IRQ_JS_STATE is the correct number of + * remaining jobs because the failed job will + * have prevented any futher jobs from starting + * execution. + */ + u32 rawstat = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + + if ((rawstat >> (i + 16)) & 1) { + /* There is a failed job that we've + * missed - add it back to active */ + active |= (1u << i); + } + } + + dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", + completion_code); + + nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); + nr_done -= (active >> i) & 1; + nr_done -= (active >> (i + 16)) & 1; + + if (nr_done <= 0) { + dev_warn(kbdev->dev, "Spurious interrupt on slot %d", + i); + + goto spurious; + } + + count += nr_done; + + while (nr_done) { + if (nr_done == 1) { + kbase_gpu_complete_hw(kbdev, i, + completion_code, + job_tail, + &end_timestamp); + kbase_jm_try_kick_all(kbdev); + } else { + /* More than one job has completed. + * Since this is not the last job being + * reported this time it must have + * passed. This is because the hardware + * will not allow further jobs in a job + * slot to complete until the failed job + * is cleared from the IRQ status. + */ + kbase_gpu_complete_hw(kbdev, i, + BASE_JD_EVENT_DONE, + 0, + &end_timestamp); + } + nr_done--; + } + spurious: + done = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)); + + failed = done >> 16; + finished = (done & 0xFFFF) | failed; + if (done) + end_timestamp = ktime_get(); + } while (finished & (1 << i)); + + kbasep_job_slot_update_head_start_timestamp(kbdev, i, + end_timestamp); + } + + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_COMMITTED) { + /* If we're trying to reset the GPU then we might be able to do + * it early (without waiting for a timeout) because some jobs + * have completed + */ + kbasep_try_reset_gpu_early_locked(kbdev); + } + KBASE_KTRACE_ADD_JM(kbdev, JM_IRQ_END, NULL, NULL, 0, count); +} + +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom) +{ +#if KBASE_KTRACE_ENABLE + u32 status_reg_before; + u64 job_in_head_before; + u32 status_reg_after; + + KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + + /* Check the head pointer */ + job_in_head_before = ((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_LO))) + | (((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_HI))) + << 32); + status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); +#endif + + if (action == JS_COMMAND_SOFT_STOP) { + if (kbase_jd_katom_is_protected(target_katom)) { +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, + "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%x", + (unsigned int)core_reqs); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + + /* We are about to issue a soft stop, so mark the atom as having + * been soft stopped */ + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; + + /* Mark the point where we issue the soft-stop command */ + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom); + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + } + } else if (action == JS_COMMAND_HARD_STOP) { + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + + if (kbase_hw_has_feature( + kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action); + +#if KBASE_KTRACE_ENABLE + status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS)); + if (status_reg_after == BASE_JD_EVENT_ACTIVE) { + struct kbase_jd_atom *head; + struct kbase_context *head_kctx; + + head = kbase_gpu_inspect(kbdev, js, 0); + head_kctx = head->kctx; + + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, head, job_in_head_before, js); + else + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, head, head->jc, js); + break; + default: + BUG(); + break; + } + } else { + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, job_in_head_before, js); + else + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, 0, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, 0, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, 0, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, 0, js); + break; + default: + BUG(); + break; + } + } +#endif +} + +void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_hardstop(kctx, i, NULL); +} + +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev; + int js = target_katom->slot_nr; + int priority = target_katom->sched_priority; + int i; + bool stop_sent = false; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + if (!katom) + continue; + + if ((kbdev->js_ctx_scheduling_mode == + KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE) && + (katom->kctx != kctx)) + continue; + + if (katom->sched_priority > priority) { + if (!stop_sent) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( + kbdev, + target_katom); + + kbase_job_slot_softstop(kbdev, js, katom); + stop_sent = true; + } + } +} + +static int softstop_start_rp_nolock( + struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_atom *katom; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, 1, 0); + + if (!katom) { + dev_dbg(kctx->kbdev->dev, "No atom on job slot\n"); + return -ESRCH; + } + + if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { + dev_dbg(kctx->kbdev->dev, + "Atom %p on job slot is not start RP\n", (void *)katom); + return -EPERM; + } + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + if (WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + (int)rp->state, (void *)reg); + + if (WARN_ON(katom != rp->start_katom)) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + (void *)reg, (void *)&rp->oom_reg_list); + list_move_tail(®->link, &rp->oom_reg_list); + dev_dbg(kctx->kbdev->dev, "Added region to list\n"); + + rp->state = (rp->state == KBASE_JD_RP_START ? + KBASE_JD_RP_PEND_OOM : KBASE_JD_RP_RETRY_PEND_OOM); + + kbase_job_slot_softstop(kbdev, 1, katom); + + return 0; +} + +int kbase_job_slot_softstop_start_rp(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int err; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + err = softstop_start_rp_nolock(kctx, reg); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return err; +} + +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long timeout = msecs_to_jiffies(ZAP_TIMEOUT); + + timeout = wait_event_timeout(kctx->jctx.zero_jobs_wait, + kctx->jctx.job_nr == 0, timeout); + + if (timeout != 0) + timeout = wait_event_timeout( + kctx->jctx.sched_info.ctx.is_scheduled_wait, + !kbase_ctx_flag(kctx, KCTX_SCHEDULED), + timeout); + + /* Neither wait timed out; all done! */ + if (timeout != 0) + goto exit; + + if (kbase_prepare_to_reset_gpu(kbdev)) { + dev_err(kbdev->dev, + "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", + ZAP_TIMEOUT); + kbase_reset_gpu(kbdev); + } + + /* Wait for the reset to complete */ + kbase_reset_gpu_wait(kbdev); +exit: + dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + + /* Ensure that the signallers of the waitqs have finished */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.lock); +} + +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) +{ + u32 flush_id = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.backend.gpu_powered) + flush_id = kbase_reg_read(kbdev, + GPU_CONTROL_REG(LATEST_FLUSH)); + mutex_unlock(&kbdev->pm.lock); + } + + return flush_id; +} + +int kbase_job_slot_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return 0; +} +KBASE_EXPORT_TEST_API(kbase_job_slot_init); + +void kbase_job_slot_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_job_slot_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_job_slot_term); + + +/** + * kbase_job_slot_softstop_swflags - Soft-stop a job with flags + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * @sw_flags: Flags to pass in about the soft-stop + * + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Soft-stop the specified job slot, with extra information about the stop + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) +{ + dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + target_katom, sw_flags, js); + + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); + kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, + JS_COMMAND_SOFT_STOP | sw_flags); +} + +/** + * kbase_job_slot_softstop - Soft-stop the specified job slot + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom) +{ + kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); +} + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool stopped; + + stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, + target_katom, + JS_COMMAND_HARD_STOP); +} + +/** + * kbase_job_check_enter_disjoint - potentiall enter disjoint mode + * @kbdev: kbase device + * @action: the event which has occurred + * @core_reqs: core requirements of the atom + * @target_katom: the atom which is being affected + * + * For a certain soft-stop action, work out whether to enter disjoint + * state. + * + * This does not register multiple disjoint events if the atom has already + * started a disjoint period + * + * @core_reqs can be supplied as 0 if the atom had not started on the hardware + * (and so a 'real' soft/hard-stop was not required, but it still interrupted + * flow, perhaps on another context) + * + * kbase_job_check_leave_disjoint() should be used to end the disjoint + * state when the soft/hard-stop action is complete + */ +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + /* For soft-stop, don't enter if soft-stop not allowed, or isn't + * causing disjoint. + */ + if (hw_action == JS_COMMAND_SOFT_STOP && + (kbase_jd_katom_is_protected(target_katom) || + (0 == (action & JS_COMMAND_SW_CAUSES_DISJOINT)))) + return; + + /* Nothing to do if already logged disjoint state on this atom */ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) + return; + + target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_up(kbdev); +} + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom) +{ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { + target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_down(kbdev); + } +} + +static void kbase_debug_dump_registers(struct kbase_device *kbdev) +{ + int i; + + kbase_io_history_dump(kbdev); + + dev_err(kbdev->dev, "Register state:"); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS))); + dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x", + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE))); + for (i = 0; i < 3; i++) { + dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)), + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); + } + dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG))); + dev_err(kbdev->dev, " TILER_CONFIG=0x%08x JM_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG))); +} + +static void kbasep_reset_timeout_worker(struct work_struct *data) +{ + unsigned long flags; + struct kbase_device *kbdev; + ktime_t end_timestamp = ktime_get(); + struct kbasep_js_device_data *js_devdata; + bool silent = false; + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + KBASE_DEBUG_ASSERT(data); + + kbdev = container_of(data, struct kbase_device, + hwaccess.backend.reset_work); + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_SILENT) + silent = true; + + KBASE_KTRACE_ADD_JM(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + /* Make sure the timer has completed - this cannot be done from + * interrupt context, so this cannot be done within + * kbasep_try_reset_gpu_early. */ + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* This would re-activate the GPU. Since it's already idle, + * there's no need to reset it */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + kbase_disjoint_state_down(kbdev); + wake_up(&kbdev->hwaccess.backend.reset_wait); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&kbdev->mmu_mask_change); + kbase_pm_reset_start_locked(kbdev); + + /* We're about to flush out the IRQs and their bottom half's */ + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts */ + kbase_pm_disable_interrupts_nolock(kbdev); + + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Ensure that any IRQ handlers have finished + * Must be done without any locks IRQ handlers will take */ + kbase_synchronize_irqs(kbdev); + + /* Flush out any in-flight work items */ + kbase_flush_mmu_wqs(kbdev); + + /* The flush has completed so reset the active indicator */ + kbdev->irq_reset_flush = false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { + /* Ensure that L2 is not transitioning when we send the reset + * command */ + while (--max_loops && kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2)) + ; + + WARN(!max_loops, "L2 power transition timed out while trying to reset\n"); + } + + mutex_lock(&kbdev->pm.lock); + /* We hold the pm lock, so there ought to be a current policy */ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + + /* All slot have been soft-stopped and we've waited + * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we + * assume that anything that is still left on the GPU is stuck there and + * we'll kill it when we reset the GPU */ + + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + RESET_TIMEOUT); + + /* Output the state of some interesting registers to help in the + * debugging of GPU resets */ + if (!silent) + kbase_debug_dump_registers(kbdev); + + /* Complete any jobs that were still on the GPU */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Reset the GPU */ + kbase_pm_init_hw(kbdev, 0); + + mutex_unlock(&kbdev->pm.lock); + + mutex_lock(&js_devdata->runpool_mutex); + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_pm_enable_interrupts(kbdev); + + kbase_disjoint_state_down(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + + mutex_lock(&kbdev->pm.lock); + + kbase_pm_reset_complete(kbdev); + + /* Find out what cores are required now */ + kbase_pm_update_cores_state(kbdev); + + /* Synchronously request and wait for those cores, because if + * instrumentation is enabled it would need them immediately. */ + kbase_pm_wait_for_desired_state(kbdev); + + mutex_unlock(&kbdev->pm.lock); + + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + if (!silent) + dev_err(kbdev->dev, "Reset complete"); + + /* Try submitting some jobs to restart processing */ + KBASE_KTRACE_ADD_JM(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0); + kbase_js_sched_all(kbdev); + + /* Process any pending slot updates */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_backend_slot_update(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_context_idle(kbdev); + + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + KBASE_KTRACE_ADD_JM(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); +} + +static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev = container_of(timer, struct kbase_device, + hwaccess.backend.reset_timer); + + KBASE_DEBUG_ASSERT(kbdev); + + /* Reset still pending? */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == + KBASE_RESET_GPU_COMMITTED) + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); + + return HRTIMER_NORESTART; +} + +/* + * If all jobs are evicted from the GPU then we can reset the GPU + * immediately instead of waiting for the timeout to elapse + */ + +static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) +{ + int i; + int pending_jobs = 0; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Count the number of jobs */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); + + if (pending_jobs > 0) { + /* There are still jobs on the GPU - wait */ + return; + } + + /* To prevent getting incorrect registers when dumping failed job, + * skip early reset. + */ + if (atomic_read(&kbdev->job_fault_debug) > 0) + return; + + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has + * been called), and that no other thread beat this thread to starting + * the reset */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != + KBASE_RESET_GPU_COMMITTED) { + /* Reset has already occurred */ + return; + } + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_try_reset_gpu_early_locked(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU + * @kbdev: kbase device + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: + * The function returns a boolean which should be interpreted as follows: + * true - Prepared for reset, kbase_reset_gpu_locked should be called. + * false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +{ + int i; + + KBASE_DEBUG_ASSERT(kbdev); + + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return false; + } + + kbase_disjoint_state_up(kbdev); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + + return true; +} + +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + +/* + * This function should be called after kbase_prepare_to_reset_gpu if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for + * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset + * has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu); + +void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early_locked(kbdev); +} + +int kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_SILENT) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return -EAGAIN; + } + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); + + return 0; +} + +bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_NOT_PENDING) + return false; + + return true; +} + +int kbase_reset_gpu_wait(struct kbase_device *kbdev) +{ + wait_event(kbdev->hwaccess.backend.reset_wait, + atomic_read(&kbdev->hwaccess.backend.reset_gpu) + == KBASE_RESET_GPU_NOT_PENDING); + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); + +int kbase_reset_gpu_init(struct kbase_device *kbdev) +{ + kbdev->hwaccess.backend.reset_workq = alloc_workqueue( + "Mali reset workqueue", 0, 1); + if (kbdev->hwaccess.backend.reset_workq == NULL) + return -ENOMEM; + + INIT_WORK(&kbdev->hwaccess.backend.reset_work, + kbasep_reset_timeout_worker); + + hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->hwaccess.backend.reset_timer.function = + kbasep_reset_timer_callback; + + return 0; +} + +void kbase_reset_gpu_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->hwaccess.backend.reset_workq); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h new file mode 100644 index 0000000..1419b59 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -0,0 +1,177 @@ +/* + * + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Job Manager backend-specific low-level APIs. + */ + +#ifndef _KBASE_JM_HWACCESS_H_ +#define _KBASE_JM_HWACCESS_H_ + +#include +#include +#include + +#include +#include + +/** + * kbase_job_submit_nolock() - Submit a job to a certain job-slot + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbase_job_submit_nolock(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, int js); + +/** + * kbase_job_done_slot() - Complete the head job on a particular job-slot + * @kbdev: Device pointer + * @s: Job slot + * @completion_code: Completion code of job reported by GPU + * @job_tail: Job tail address reported by GPU + * @end_timestamp: Timestamp of job completion + */ +void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); + +#ifdef CONFIG_GPU_TRACEPOINTS +static inline char *kbasep_make_job_slot_string(int js, char *js_string, + size_t js_size) +{ + snprintf(js_string, js_size, "job_slot_%i", js); + return js_string; +} +#endif + +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, + struct kbase_context *kctx) +{ + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); +} + + +/** + * kbase_job_hw_submit() - Submit a job to the GPU + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + +/** + * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop + * on the specified atom + * @kbdev: Device pointer + * @js: Job slot to stop on + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * @core_reqs: Core requirements of atom to stop + * @target_katom: Atom to stop + * + * The following locking conditions are made on the caller: + * - it must hold the hwaccess_lock + */ +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + base_jd_core_req core_reqs, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job + * slot belonging to a given context. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @katom: Specific atom to stop. May be NULL + * @js: Job slot to hard stop + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * + * If no context is provided then all jobs on the slot will be soft or hard + * stopped. + * + * If a katom is provided then only that specific atom will be stopped. In this + * case the kctx parameter is ignored. + * + * Jobs that are on the slot but are not yet on the GPU will be unpulled and + * returned to the job scheduler. + * + * Return: true if an atom was stopped, false otherwise + */ +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action); + +/** + * kbase_job_slot_init - Initialise job slot framework + * @kbdev: Device pointer + * + * Called on driver initialisation + * + * Return: 0 on success + */ +int kbase_job_slot_init(struct kbase_device *kbdev); + +/** + * kbase_job_slot_halt - Halt the job slot framework + * @kbdev: Device pointer + * + * Should prevent any further job slot processing + */ +void kbase_job_slot_halt(struct kbase_device *kbdev); + +/** + * kbase_job_slot_term - Terminate job slot framework + * @kbdev: Device pointer + * + * Called on driver termination + */ +void kbase_job_slot_term(struct kbase_device *kbdev); + +/** + * kbase_gpu_cache_clean - Cause a GPU cache clean & flush + * @kbdev: Device pointer + * + * Caller must not be in IRQ context + */ +void kbase_gpu_cache_clean(struct kbase_device *kbdev); + +#endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c new file mode 100755 index 0000000..4e4ed05 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -0,0 +1,1656 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Return whether the specified ringbuffer is empty. HW access lock must be + * held */ +#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) +/* Return number of atoms currently in the specified ringbuffer. HW access lock + * must be held */ +#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer + * @kbdev: Device pointer + * @katom: Atom to enqueue + * + * Context: Caller must hold the HW access lock + */ +static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + + WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; + rb->write_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; +} + +/** + * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once + * it has been completed + * @kbdev: Device pointer + * @js: Job slot to remove atom from + * @end_timestamp: Pointer to timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * Context: Caller must hold the HW access lock + * + * Return: Atom removed from ringbuffer + */ +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, + int js, + ktime_t *end_timestamp) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + struct kbase_jd_atom *katom; + + if (SLOT_RB_EMPTY(rb)) { + WARN(1, "GPU ringbuffer unexpectedly empty\n"); + return NULL; + } + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; + + kbase_gpu_release_atom(kbdev, katom, end_timestamp); + + rb->read_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; + + return katom; +} + +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if ((SLOT_RB_ENTRIES(rb) - 1) < idx) + return NULL; /* idx out of range */ + + return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; +} + +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + if (SLOT_RB_EMPTY(rb)) + return NULL; + + return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; +} + +bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) +{ + int js; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + return true; + } + } + return false; +} + +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED)) + nr++; + } + + return nr; +} + +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + if (kbase_gpu_inspect(kbdev, js, i)) + nr++; + } + + return nr; +} + +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, + enum kbase_atom_gpu_rb_state min_rb_state) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state >= min_rb_state)) + nr++; + } + + return nr; +} + +/** + * check_secure_atom - Check if the given atom is in the given secure state and + * has a ringbuffer state of at least + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @katom: Atom pointer + * @secure: Desired secure state + * + * Return: true if atom is in the given state, false otherwise + */ +static bool check_secure_atom(struct kbase_jd_atom *katom, bool secure) +{ + if (katom->gpu_rb_state >= + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION && + ((kbase_jd_katom_is_protected(katom) && secure) || + (!kbase_jd_katom_is_protected(katom) && !secure))) + return true; + + return false; +} + +/** + * kbase_gpu_check_secure_atoms - Check if there are any atoms in the given + * secure state in the ringbuffers of at least + * state + * KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE + * @kbdev: Device pointer + * @secure: Desired secure state + * + * Return: true if any atoms are in the given state, false otherwise + */ +static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, + bool secure) +{ + int js, i; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, i); + + if (katom) { + if (check_secure_atom(katom, secure)) + return true; + } + } + } + + return false; +} + +int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != + KBASE_RESET_GPU_NOT_PENDING) { + /* The GPU is being reset - so prevent submission */ + return 0; + } + + return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); +} + + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + ktime_t *end_timestamp) +{ + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + switch (katom->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to release atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Inform power management at start/finish of atom so it can + * update its GPU utilisation metrics. Mark atom as not + * submitted beforehand. */ + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + kbase_pm_metrics_update(kbdev, end_timestamp); + + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx, + &kbdev->gpu_props.props.raw_props.js_features + [katom->slot_nr]); + + case KBASE_ATOM_GPU_RB_READY: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + break; + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK) && + (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_HWCNT)) { + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + } + if (kbase_jd_katom_is_protected(katom) && + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2)) + kbase_pm_protected_entry_override_disable(kbdev); + if (!kbase_jd_katom_is_protected(katom) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) && + (katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) { + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + } + + if (katom->protected_state.enter != + KBASE_ATOM_ENTER_PROTECTED_CHECK || + katom->protected_state.exit != + KBASE_ATOM_EXIT_PROTECTED_CHECK) + kbdev->protected_mode_transition = false; + /* If the atom has suspended hwcnt but has not yet entered + * protected mode, then resume hwcnt now. If the GPU is now in + * protected mode then hwcnt will be resumed by GPU reset so + * don't resume it here. + */ + if (kbase_jd_katom_is_protected(katom) && + ((katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) || + (katom->protected_state.enter == + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) { + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + if (katom->atom_flags & + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, false); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + } + + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + break; + } + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; +} + +static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_gpu_release_atom(kbdev, katom, NULL); + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; +} + +/** + * other_slots_busy - Determine if any job slots other than @js are currently + * running atoms + * @kbdev: Device pointer + * @js: Job slot + * + * Return: true if any slots other than @js are busy, false otherwise + */ +static inline bool other_slots_busy(struct kbase_device *kbdev, int js) +{ + int slot; + + for (slot = 0; slot < kbdev->gpu_props.num_job_slots; slot++) { + if (slot == js) + continue; + + if (kbase_gpu_nr_atoms_on_slot_min(kbdev, slot, + KBASE_ATOM_GPU_RB_SUBMITTED)) + return true; + } + + return false; +} + +static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) +{ + return kbdev->protected_mode; +} + +static void kbase_gpu_disable_coherent(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + if (kbdev->system_coherency == COHERENCY_ACE) + kbase_cache_set_coherency_mode(kbdev, COHERENCY_ACE_LITE); +} + +static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +{ + int err = -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot enter protected mode: protected callbacks not specified.\n"); + + if (kbdev->protected_ops) { + /* Switch GPU to protected mode */ + err = kbdev->protected_ops->protected_mode_enable( + kbdev->protected_dev); + + if (err) { + dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", + err); + } else { + kbdev->protected_mode = true; + kbase_ipa_protection_mode_switch_event(kbdev); + } + } + + return err; +} + +static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ONCE(!kbdev->protected_ops, + "Cannot exit protected mode: protected callbacks not specified.\n"); + + if (!kbdev->protected_ops) + return -EINVAL; + + /* The protected mode disable callback will be called as part of reset + */ + return kbase_reset_gpu_silent(kbdev); +} + +static int kbase_jm_protected_entry(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + err = kbase_gpu_protected_mode_enter(kbdev); + + /* + * Regardless of result before this call, we are no longer + * transitioning the GPU. + */ + + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev); + if (err) { + /* + * Failed to switch into protected mode, resume + * GPU hwcnt and fail atom. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* + * Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order. + */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + return -EINVAL; + } + + /* + * Protected mode sanity checks. + */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom[idx]) == + kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom[idx]), + kbase_gpu_in_protected_mode(kbdev)); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + return err; +} + +static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + switch (katom[idx]->protected_state.enter) { + case KBASE_ATOM_ENTER_PROTECTED_CHECK: + KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + /* If hwcnt is disabled, it means we didn't clean up correctly + * during last exit from protected mode. + */ + WARN_ON(kbdev->protected_mode_hwcnt_disabled); + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_HWCNT; + + kbdev->protected_mode_transition = true; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_HWCNT: + /* See if we can get away with disabling hwcnt atomically */ + kbdev->protected_mode_hwcnt_desired = false; + if (!kbdev->protected_mode_hwcnt_disabled) { + if (kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)) + kbdev->protected_mode_hwcnt_disabled = true; + } + + /* We couldn't disable atomically, so kick off a worker */ + if (!kbdev->protected_mode_hwcnt_disabled) { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &kbdev->protected_mode_hwcnt_disable_work); +#endif + return -EAGAIN; + } + + /* Once reaching this point GPU must be + * switched to protected mode or hwcnt + * re-enabled. */ + + if (kbase_pm_protected_entry_override_enable(kbdev)) + return -EAGAIN; + + /* + * Not in correct mode, begin protected mode switch. + * Entering protected mode requires us to power down the L2, + * and drop out of fully coherent mode. + */ + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2; + + kbase_pm_protected_override_enable(kbdev); + /* + * Only if the GPU reset hasn't been initiated, there is a need + * to invoke the state machine to explicitly power down the + * shader cores and L2. + */ + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: + /* Avoid unnecessary waiting on non-ACE platforms. */ + if (kbdev->system_coherency == COHERENCY_ACE) { + if (kbdev->pm.backend.l2_always_on) { + /* + * If the GPU reset hasn't completed, then L2 + * could still be powered up. + */ + if (kbase_reset_gpu_is_active(kbdev)) + return -EAGAIN; + } + + if (kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2) || + kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2) || + kbase_is_gpu_lost(kbdev)) { + /* + * The L2 is still powered, wait for all + * the users to finish with it before doing + * the actual reset. + */ + return -EAGAIN; + } + } + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: + /* + * When entering into protected mode, we must ensure that the + * GPU is not operating in coherent mode as well. This is to + * ensure that no protected memory can be leaked. + */ + kbase_gpu_disable_coherent(kbdev); + + kbase_pm_protected_entry_override_disable(kbdev); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + /* + * Power on L2 caches; this will also result in the + * correct value written to coherency enable register. + */ + kbase_pm_protected_l2_override(kbdev, true); + + /* + * Set the flag on the atom that additional + * L2 references are taken. + */ + katom[idx]->atom_flags |= + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + + katom[idx]->protected_state.enter = + KBASE_ATOM_ENTER_PROTECTED_FINISHED; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) + return -EAGAIN; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_ENTER_PROTECTED_FINISHED: + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) { + /* + * Check that L2 caches are powered and, if so, + * enter protected mode. + */ + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { + /* + * Remove additional L2 reference and reset + * the atom flag which denotes it. + */ + if (katom[idx]->atom_flags & + KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) { + kbase_pm_protected_l2_override(kbdev, + false); + katom[idx]->atom_flags &= + ~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT; + } + + err = kbase_jm_protected_entry(kbdev, katom, idx, js); + + if (err) + return err; + } else { + /* + * still waiting for L2 caches to power up + */ + return -EAGAIN; + } + } else { + err = kbase_jm_protected_entry(kbdev, katom, idx, js); + + if (err) + return err; + } + } + + return 0; +} + +static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, + struct kbase_jd_atom **katom, int idx, int js) +{ + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + switch (katom[idx]->protected_state.exit) { + case KBASE_ATOM_EXIT_PROTECTED_CHECK: + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); + /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV + * should ensure that we are not already transitiong, and that + * there are no atoms currently on the GPU. */ + WARN_ON(kbdev->protected_mode_transition); + WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); + + /* + * Exiting protected mode requires a reset, but first the L2 + * needs to be powered down to ensure it's not active when the + * reset is issued. + */ + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; + + kbdev->protected_mode_transition = true; + kbase_pm_protected_override_enable(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: + if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { + /* + * The L2 is still powered, wait for all the users to + * finish with it before doing the actual reset. + */ + return -EAGAIN; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_RESET; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_EXIT_PROTECTED_RESET: + /* Issue the reset to the GPU */ + err = kbase_gpu_protected_mode_reset(kbdev); + + if (err == -EAGAIN) + return -EAGAIN; + + if (err) { + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); + + /* Failed to exit protected mode, fail atom */ + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + + /* If we're exiting from protected mode, hwcnt must have + * been disabled during entry. + */ + WARN_ON(!kbdev->protected_mode_hwcnt_disabled); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + + return -EINVAL; + } + + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: + /* A GPU reset is issued when exiting protected mode. Once the + * reset is done all atoms' state will also be reset. For this + * reason, if the atom is still in this state we can safely + * say that the reset has not completed i.e., we have not + * finished exiting protected mode yet. + */ + return -EAGAIN; + } + + return 0; +} + +void kbase_backend_slot_update(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_lost(kbdev)) +#else + if (kbase_reset_gpu_is_active(kbdev)) +#endif + return; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + struct kbase_jd_atom *katom[2]; + int idx; + + katom[0] = kbase_gpu_inspect(kbdev, js, 0); + katom[1] = kbase_gpu_inspect(kbdev, js, 1); + WARN_ON(katom[1] && !katom[0]); + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + bool cores_ready; + int ret; + + if (!katom[idx]) + continue; + + switch (katom[idx]->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to update atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + if (kbase_js_atom_blocked_on_x_dep(katom[idx])) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: + if (kbase_gpu_check_secure_atoms(kbdev, + !kbase_jd_katom_is_protected( + katom[idx]))) + break; + + if ((idx == 1) && (kbase_jd_katom_is_protected( + katom[0]) != + kbase_jd_katom_is_protected( + katom[1]))) + break; + + if (kbdev->protected_mode_transition) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: + + /* + * Exiting protected mode must be done before + * the references on the cores are taken as + * a power down the L2 is required which + * can't happen after the references for this + * atom are taken. + */ + + if (!kbase_gpu_in_protected_mode(kbdev) && + kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition into protected mode. */ + ret = kbase_jm_enter_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } else if (kbase_gpu_in_protected_mode(kbdev) && + !kbase_jd_katom_is_protected(katom[idx])) { + /* Atom needs to transition out of protected mode. */ + ret = kbase_jm_exit_protected_mode(kbdev, + katom, idx, js); + if (ret) + break; + } + katom[idx]->protected_state.exit = + KBASE_ATOM_EXIT_PROTECTED_CHECK; + + /* Atom needs no protected mode transition. */ + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + if (katom[idx]->will_fail_event_code) { + kbase_gpu_mark_atom_for_return(kbdev, + katom[idx]); + /* Set EVENT_DONE so this atom will be + completed, not unpulled. */ + katom[idx]->event_code = + BASE_JD_EVENT_DONE; + /* Only return if head atom or previous + * atom already removed - as atoms must + * be returned in order. */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, katom[idx]); + } + break; + } + + cores_ready = kbase_pm_cores_requested(kbdev, + true); + + if (katom[idx]->event_code == + BASE_JD_EVENT_PM_EVENT) { + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_RETURN_TO_JS; + break; + } + + if (!cores_ready) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_READY: + + if (idx == 1) { + /* Only submit if head atom or previous + * atom already submitted */ + if ((katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + + /* If intra-slot serialization in use + * then don't submit atom to NEXT slot + */ + if (kbdev->serialize_jobs & + KBASE_SERIALIZE_INTRA_SLOT) + break; + } + + /* If inter-slot serialization in use then don't + * submit atom if any other slots are in use */ + if ((kbdev->serialize_jobs & + KBASE_SERIALIZE_INTER_SLOT) && + other_slots_busy(kbdev, js)) + break; + +#ifdef CONFIG_MALI_GEM5_BUILD + if (!kbasep_jm_is_js_free(kbdev, js, + katom[idx]->kctx)) + break; +#endif + /* Check if this job needs the cycle counter + * enabled before submission */ + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_request_gpu_cycle_counter_l2_is_on( + kbdev); + + kbase_job_hw_submit(kbdev, katom[idx], js); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_SUBMITTED; + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. */ + kbase_pm_metrics_update(kbdev, + &katom[idx]->start_timestamp); + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Atom submitted to HW, nothing else to do */ + break; + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + kbase_jm_return_atom_to_js(kbdev, + katom[idx]); + } + break; + } + } + } +} + + +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + + kbase_gpu_enqueue_atom(kbdev, katom); + kbase_backend_slot_update(kbdev); +} + +#define HAS_DEP(katom) (katom->pre_dep || katom->atom_flags & \ + (KBASE_KATOM_FLAG_X_DEP_BLOCKED | KBASE_KATOM_FLAG_FAIL_BLOCKER)) + +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, + u32 completion_code) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_atom *next_katom; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom = kbase_gpu_inspect(kbdev, js, 0); + next_katom = kbase_gpu_inspect(kbdev, js, 1); + + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + (HAS_DEP(next_katom) || next_katom->sched_priority == + katom->sched_priority) && + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO)) + != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) + != 0)) { + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_NOP); + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + + if (completion_code == BASE_JD_EVENT_STOPPED) { + KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, + &kbdev->gpu_props.props.raw_props.js_features + [next_katom->slot_nr]); + KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as + [next_katom->kctx->as_nr]); + KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, + &kbdev->gpu_props.props.raw_props.js_features + [next_katom->slot_nr]); + } + + if (next_katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + return true; + } + + return false; +} + +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + struct kbase_context *kctx = katom->kctx; + + dev_dbg(kbdev->dev, + "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + (void *)katom, completion_code, job_tail, js); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* + * When a hard-stop is followed close after a soft-stop, the completion + * code may be set to STOPPED, even though the job is terminated + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8438)) { + if (completion_code == BASE_JD_EVENT_STOPPED && + (katom->atom_flags & + KBASE_KATOM_FLAG_BEEN_HARD_STOPPED)) { + completion_code = BASE_JD_EVENT_TERMINATED; + } + } + + if ((katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) && + completion_code != BASE_JD_EVENT_DONE && + !(completion_code & BASE_JD_SW_EVENT)) { + /* When a job chain fails, on a T60x or when + * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not + * flushed. To prevent future evictions causing possible memory + * corruption we need to flush the cache manually before any + * affected memory gets reused. */ + katom->need_cache_flush_cores_retained = true; + } + + katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + + if (completion_code == BASE_JD_EVENT_STOPPED) { + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + /* + * Dequeue next atom from ringbuffers on same slot if required. + * This atom will already have been removed from the NEXT + * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that + * the atoms on this slot are returned in the correct order. + */ + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->sched_priority == + katom->sched_priority) { + WARN_ON(next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED); + kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + kbase_jm_return_atom_to_js(kbdev, next_katom); + } + } else if (completion_code != BASE_JD_EVENT_DONE) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + int i; + + if (!kbase_ctx_flag(katom->kctx, KCTX_DYING)) { + meson_gpu_data_invalid_count ++; + dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", + js, completion_code, + kbase_gpu_exception_name( + completion_code)); + } + +#if KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR != 0 + KBASE_KTRACE_DUMP(kbdev); +#endif + kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + + /* + * Remove all atoms on the same context from ringbuffers. This + * will not remove atoms that are already on the GPU, as these + * are guaranteed not to have fail dependencies on the failed + * atom. + */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { + struct kbase_jd_atom *katom_idx0 = + kbase_gpu_inspect(kbdev, i, 0); + struct kbase_jd_atom *katom_idx1 = + kbase_gpu_inspect(kbdev, i, 1); + + if (katom_idx0 && katom_idx0->kctx == katom->kctx && + HAS_DEP(katom_idx0) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx0 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i, end_timestamp); + + if (katom_idx1 && + katom_idx1->kctx == katom->kctx + && HAS_DEP(katom_idx1) && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx1 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i, + end_timestamp); + + katom_idx1->event_code = + BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, + katom_idx1); + } + katom_idx0->event_code = BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + + } else if (katom_idx1 && + katom_idx1->kctx == katom->kctx && + HAS_DEP(katom_idx1) && + katom_idx1->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Can not dequeue this atom yet - will be + * dequeued when atom at idx0 completes */ + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_gpu_mark_atom_for_return(kbdev, + katom_idx1); + } + } + } + + KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, js, completion_code); + + if (job_tail != 0 && job_tail != katom->jc) { + /* Some of the job has been executed */ + dev_dbg(kbdev->dev, + "Update job chain address of atom %p to resume from 0x%llx\n", + (void *)katom, job_tail); + + katom->jc = job_tail; + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, + katom, job_tail, js); + } + + /* Only update the event code for jobs that weren't cancelled */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + katom->event_code = (enum base_jd_event_code)completion_code; + + /* Complete the job, and start new ones + * + * Also defer remaining work onto the workqueue: + * - Re-queue Soft-stopped jobs + * - For any other jobs, queue the job back into the dependency system + * - Schedule out the parent context if necessary, and schedule a new + * one in. + */ +#ifdef CONFIG_GPU_TRACEPOINTS + { + /* The atom in the HEAD */ + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + if (next_katom && next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(*end_timestamp), + (u32)next_katom->kctx->id, 0, + next_katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = + next_katom->kctx; + } else { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string, + sizeof(js_string)), + ktime_to_ns(ktime_get()), 0, 0, + 0); + kbdev->hwaccess.backend.slot_rb[js].last_context = 0; + } + } +#endif + + if (kbdev->serialize_jobs & KBASE_SERIALIZE_RESET) + kbase_reset_gpu_silent(kbdev); + + if (completion_code == BASE_JD_EVENT_STOPPED) + katom = kbase_jm_return_atom_to_js(kbdev, katom); + else + katom = kbase_jm_complete(kbdev, katom, end_timestamp); + + if (katom) { + dev_dbg(kbdev->dev, + "Cross-slot dependency %p has become runnable.\n", + (void *)katom); + + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + kbase_jm_try_kick(kbdev, 1 << katom->slot_nr); + } + + /* For partial shader core off L2 cache flush */ + kbase_pm_update_state(kbdev); + + /* Job completion may have unblocked other atoms. Try to update all job + * slots */ + kbase_backend_slot_update(kbdev); +} + +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Reset should always take the GPU out of protected mode */ + WARN_ON(kbase_gpu_in_protected_mode(kbdev)); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int atom_idx = 0; + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, atom_idx); + bool keep_in_jm_rb = false; + + if (!katom) + break; + if (katom->protected_state.exit == + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) { + /* protected mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev), + "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", + kbase_jd_katom_is_protected(katom), kbase_gpu_in_protected_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_protected(katom) && js == 0) || + !kbase_jd_katom_is_protected(katom), + "Protected atom on JS%d not supported", js); + } + if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) && + !kbase_ctx_flag(katom->kctx, KCTX_DYING)) + keep_in_jm_rb = true; + + kbase_gpu_release_atom(kbdev, katom, NULL); + + /* + * If the atom wasn't on HW when the reset was issued + * then leave it in the RB and next time we're kicked + * it will be processed again from the starting state. + */ + if (keep_in_jm_rb) { + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + /* As the atom was not removed, increment the + * index so that we read the correct atom in the + * next iteration. */ + atom_idx++; + continue; + } + + /* + * The atom was on the HW when the reset was issued + * all we can do is fail the atom. + */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); + } + } + + /* Re-enable GPU hardware counters if we're resetting from protected + * mode. + */ + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + + KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev); + } + + kbdev->protected_mode_transition = false; + kbase_pm_protected_override_disable(kbdev); +} + +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, + katom->core_req, katom); + katom->kctx->blocked_js[js][katom->sched_priority] = true; +} + +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + u32 action, + bool disjoint) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true; + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, + katom); +} + +static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) +{ + if (katom->x_post_dep) { + struct kbase_jd_atom *dep_atom = katom->x_post_dep; + + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && + dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_RETURN_TO_JS) + return dep_atom->slot_nr; + } + return -1; +} + +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + struct kbase_jd_atom *katom_idx0; + struct kbase_jd_atom *katom_idx1; + + bool katom_idx0_valid, katom_idx1_valid; + + bool ret = false; + + int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; + int prio_idx0 = 0, prio_idx1 = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); + katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); + + if (katom_idx0) + prio_idx0 = katom_idx0->sched_priority; + if (katom_idx1) + prio_idx1 = katom_idx1->sched_priority; + + if (katom) { + katom_idx0_valid = (katom_idx0 == katom); + /* If idx0 is to be removed and idx1 is on the same context, + * then idx1 must also be removed otherwise the atoms might be + * returned out of order */ + if (katom_idx1) + katom_idx1_valid = (katom_idx1 == katom) || + (katom_idx0_valid && + (katom_idx0->kctx == + katom_idx1->kctx)); + else + katom_idx1_valid = false; + } else { + katom_idx0_valid = (katom_idx0 && + (!kctx || katom_idx0->kctx == kctx)); + katom_idx1_valid = (katom_idx1 && + (!kctx || katom_idx1->kctx == kctx) && + prio_idx0 == prio_idx1); + } + + if (katom_idx0_valid) + stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); + if (katom_idx1_valid) + stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); + + if (katom_idx0_valid) { + if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Simple case - just dequeue and return */ + kbase_gpu_dequeue_atom(kbdev, js, NULL); + if (katom_idx1_valid) { + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom_idx1->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx1); + katom_idx1->kctx->blocked_js[js][prio_idx1] = + true; + } + + katom_idx0->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + katom_idx0->kctx->blocked_js[js][prio_idx0] = true; + } else { + /* katom_idx0 is on GPU */ + if (katom_idx1_valid && katom_idx1->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* katom_idx0 and katom_idx1 are on GPU */ + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT)) == 0) { + /* idx0 has already completed - stop + * idx1 if needed*/ + if (katom_idx1_valid) { + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } else { + /* idx1 is in NEXT registers - attempt + * to remove */ + kbase_reg_write(kbdev, + JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP); + + if (kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO)) + != 0 || + kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI)) + != 0) { + /* idx1 removed successfully, + * will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, + katom_idx1, + action, true); + stop_x_dep_idx1 = + should_stop_x_dep_slot(katom_idx1); + + /* stop idx0 if still on GPU */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx0, + action); + ret = true; + } else if (katom_idx1_valid) { + /* idx0 has already completed, + * stop idx1 if needed */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + /* idx1 not on GPU but must be dequeued*/ + + /* idx1 will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + /* stop idx0 */ + /* This will be repeated for anything removed + * from the next registers, since their normal + * flow was also interrupted, and this function + * might not enter disjoint state e.g. if we + * don't actually do a hard stop on the head + * atom */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } else { + /* no atom in idx1 */ + /* just stop idx0 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Mark for return */ + /* idx1 will be returned once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + } else { + /* idx1 is on GPU */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT)) == 0) { + /* idx0 has already completed - stop idx1 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx1, + action); + ret = true; + } else { + /* idx1 is in NEXT registers - attempt to + * remove */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP); + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO)) != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI)) != 0) { + /* idx1 removed successfully, will be + * handled in IRQ once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, + action, + false); + } else { + /* idx0 has already completed - stop + * idx1 */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } + } + + + if (stop_x_dep_idx0 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, + NULL, action); + + if (stop_x_dep_idx1 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, + NULL, action); + + return ret; +} + +void kbase_backend_cache_clean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if (katom->need_cache_flush_cores_retained) { + kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_wait_cache_clean(kbdev); + + katom->need_cache_flush_cores_retained = false; + } +} + +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + /* + * If cache flush required due to HW workaround then perform the flush + * now + */ + kbase_backend_cache_clean(kbdev, katom); +} + +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req) +{ + if (!kbdev->pm.active_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); + kbase_pm_update_active(kbdev); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } +} + +void kbase_gpu_dump_slots(struct kbase_device *kbdev) +{ + unsigned long flags; + int js; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, + idx); + + if (katom) + dev_info(kbdev->dev, + " js%d idx%d : katom=%p gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); + else + dev_info(kbdev->dev, " js%d idx%d : empty\n", + js, idx); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h new file mode 100644 index 0000000..c3b9f2d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -0,0 +1,83 @@ +/* + * + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific APIs + */ + +#ifndef _KBASE_HWACCESS_GPU_H_ +#define _KBASE_HWACCESS_GPU_H_ + +#include + +/** + * kbase_gpu_irq_evict - Evict an atom from a NEXT slot + * + * @kbdev: Device pointer + * @js: Job slot to evict from + * @completion_code: Event code from job that was run. + * + * Evict the atom in the NEXT slot for the specified job slot. This function is + * called from the job complete IRQ handler when the previous job has failed. + * + * Return: true if job evicted from NEXT registers, false otherwise + */ +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js, + u32 completion_code); + +/** + * kbase_gpu_complete_hw - Complete an atom on job slot js + * + * @kbdev: Device pointer + * @js: Job slot that has completed + * @completion_code: Event code from job that has completed + * @job_tail: The tail address from the hardware if the job has partially + * completed + * @end_timestamp: Time of completion + */ +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp); + +/** + * kbase_gpu_inspect - Inspect the contents of the HW access ringbuffer + * + * @kbdev: Device pointer + * @js: Job slot to inspect + * @idx: Index into ringbuffer. 0 is the job currently running on + * the slot, 1 is the job waiting, all other values are invalid. + * Return: The atom at that position in the ringbuffer + * or NULL if no atom present + */ +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx); + +/** + * kbase_gpu_dump_slots - Print the contents of the slot ringbuffers + * + * @kbdev: Device pointer + */ +void kbase_gpu_dump_slots(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_GPU_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c new file mode 100644 index 0000000..fcc0437 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -0,0 +1,351 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#include +#include +#include +#include +#include + +/* + * Hold the runpool_mutex for this + */ +static inline bool timer_callback_should_run(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + s8 nr_running_ctxs; + + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + + /* Timer must stop if we are suspending */ + if (backend->suspend_timer) + return false; + + /* nr_contexts_pullable is updated with the runpool_mutex. However, the + * locking in the caller gives us a barrier that ensures + * nr_contexts_pullable is up-to-date for reading */ + nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + +#ifdef CONFIG_MALI_DEBUG + if (kbdev->js_data.softstop_always) { + /* Debug support for allowing soft-stop on a single context */ + return true; + } +#endif /* CONFIG_MALI_DEBUG */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { + /* Timeouts would have to be 4x longer (due to micro- + * architectural design) to support OpenCL conformance tests, so + * only run the timer when there's: + * - 2 or more CL contexts + * - 1 or more GLES contexts + * + * NOTE: We will treat a context that has both Compute and Non- + * Compute jobs will be treated as an OpenCL context (hence, we + * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). + */ + { + s8 nr_compute_ctxs = + kbasep_js_ctx_attr_count_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE); + s8 nr_noncompute_ctxs = nr_running_ctxs - + nr_compute_ctxs; + + return (bool) (nr_compute_ctxs >= 2 || + nr_noncompute_ctxs > 0); + } + } else { + /* Run the timer callback whenever you have at least 1 context + */ + return (bool) (nr_running_ctxs > 0); + } +} + +static enum hrtimer_restart timer_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + struct kbase_backend_data *backend; + int s; + bool reset_needed = false; + + KBASE_DEBUG_ASSERT(timer != NULL); + + backend = container_of(timer, struct kbase_backend_data, + scheduling_timer); + kbdev = container_of(backend, struct kbase_device, hwaccess.backend); + js_devdata = &kbdev->js_data; + + /* Loop through the slots */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { + struct kbase_jd_atom *atom = NULL; + + if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { + atom = kbase_gpu_inspect(kbdev, s, 0); + KBASE_DEBUG_ASSERT(atom != NULL); + } + + if (atom != NULL) { + /* The current version of the model doesn't support + * Soft-Stop */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + u32 ticks = atom->ticks++; + +#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP) + u32 soft_stop_ticks, hard_stop_ticks, + gpu_reset_ticks; + if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + soft_stop_ticks = + js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = + js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_cl; + } else { + soft_stop_ticks = + js_devdata->soft_stop_ticks; + hard_stop_ticks = + js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_ss; + } + + /* If timeouts have been changed then ensure + * that atom tick count is not greater than the + * new soft_stop timeout. This ensures that + * atoms do not miss any of the timeouts due to + * races between this worker and the thread + * changing the timeouts. */ + if (backend->timeouts_updated && + ticks > soft_stop_ticks) + ticks = atom->ticks = soft_stop_ticks; + + /* Job is Soft-Stoppable */ + if (ticks == soft_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks ticks. + * Soft stop the slot so we can run + * other jobs. + */ +#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS + int disjoint_threshold = + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + u32 softstop_flags = 0u; + + dev_dbg(kbdev->dev, "Soft-stop"); + /* nr_user_contexts_running is updated + * with the runpool_mutex, but we can't + * take that here. + * + * However, if it's about to be + * increased then the new context can't + * run any jobs until they take the + * hwaccess_lock, so it's OK to observe + * the older value. + * + * Similarly, if it's about to be + * decreased, the last job from another + * context has already finished, so it's + * not too bad that we observe the older + * value and register a disjoint event + * when we try soft-stopping */ + if (js_devdata->nr_user_contexts_running + >= disjoint_threshold) + softstop_flags |= + JS_COMMAND_SW_CAUSES_DISJOINT; + + kbase_job_slot_softstop_swflags(kbdev, + s, atom, softstop_flags); +#endif + } else if (ticks == hard_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_ss ticks. + * It should have been soft-stopped by + * now. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == gpu_reset_ticks) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_ss ticks. + * It should have left the GPU by now. + * Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#else /* !CONFIG_MALI_JOB_DUMP */ + /* NOTE: During CONFIG_MALI_JOB_DUMP, we use + * the alternate timeouts, which makes the hard- + * stop and GPU reset timeout much longer. We + * also ensure that we don't soft-stop at all. + */ + if (ticks == js_devdata->soft_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks. We do + * not soft-stop during + * CONFIG_MALI_JOB_DUMP, however. + */ + dev_dbg(kbdev->dev, "Soft-stop"); + } else if (ticks == + js_devdata->hard_stop_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_dumping + * ticks. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == + js_devdata->gpu_reset_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_dumping + * ticks. It should have left the GPU by + * now. Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#endif /* !CONFIG_MALI_JOB_DUMP */ + } + } + } + if (reset_needed) { + dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } + /* the timer is re-issued if there is contexts in the run-pool */ + + if (backend->timer_running) + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + backend->timeouts_updated = false; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + unsigned long flags; + + lockdep_assert_held(&js_devdata->runpool_mutex); + + if (!timer_callback_should_run(kbdev)) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* From now on, return value of timer_callback_should_run() will + * also cause the timer to not requeue itself. Its return value + * cannot change, because it depends on variables updated with + * the runpool_mutex held, which the caller of this must also + * hold */ + hrtimer_cancel(&backend->scheduling_timer); + } + + if (timer_callback_should_run(kbdev) && !backend->timer_running) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->timer_running = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); + } +} + +int kbase_backend_timer_init(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + backend->scheduling_timer.function = timer_callback; + backend->timer_running = false; + + return 0; +} + +void kbase_backend_timer_term(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_cancel(&backend->scheduling_timer); +} + +void kbase_backend_timer_suspend(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = true; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timer_resume(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->suspend_timer = false; + + kbase_backend_ctx_count_changed(kbdev); +} + +void kbase_backend_timeouts_changed(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + backend->timeouts_updated = true; +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h new file mode 100644 index 0000000..6576e55 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h @@ -0,0 +1,74 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#ifndef _KBASE_JS_BACKEND_H_ +#define _KBASE_JS_BACKEND_H_ + +/** + * kbase_backend_timer_init() - Initialise the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver initialisation + * + * Return: 0 on success + */ +int kbase_backend_timer_init(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_term() - Terminate the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver termination + */ +void kbase_backend_timer_term(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_suspend - Suspend is happening, stop the JS scheduling + * timer + * @kbdev: Device pointer + * + * This function should be called on suspend, after the active count has reached + * zero. This is required as the timer may have been started on job submission + * to the job scheduler, but before jobs are submitted to the GPU. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_suspend(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_resume - Resume is happening, re-evaluate the JS + * scheduling timer + * @kbdev: Device pointer + * + * This function should be called on resume. Note that is is not guaranteed to + * re-start the timer, only evalute whether it should be re-started. + * + * Caller must hold runpool_mutex. + */ +void kbase_backend_timer_resume(struct kbase_device *kbdev); + +#endif /* _KBASE_JS_BACKEND_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c new file mode 100644 index 0000000..e67d12b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include "mali_kbase_l2_mmu_config.h" + +/** + * struct l2_mmu_config_limit_region + * + * @value: The default value to load into the L2_MMU_CONFIG register + * @mask: The shifted mask of the field in the L2_MMU_CONFIG register + * @shift: The shift of where the field starts in the L2_MMU_CONFIG register + * This should be the same value as the smaller of the two mask + * values + */ +struct l2_mmu_config_limit_region { + u32 value, mask, shift; +}; + +/** + * struct l2_mmu_config_limit + * + * @product_model: The GPU for which this entry applies + * @read: Values for the read limit field + * @write: Values for the write limit field + */ +struct l2_mmu_config_limit { + u32 product_model; + struct l2_mmu_config_limit_region read; + struct l2_mmu_config_limit_region write; +}; + +/* + * Zero represents no limit + * + * For LBEX TBEX TTRX and TNAX: + * The value represents the number of outstanding reads (6 bits) or writes (5 bits) + * + * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h + */ +static const struct l2_mmu_config_limit limits[] = { + /* GPU read write */ + {GPU_ID2_PRODUCT_LBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, + {GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, + {GPU_ID2_PRODUCT_TGOX, + {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, + {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + {GPU_ID2_PRODUCT_TNOX, + {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, + {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, +}; + +int kbase_set_mmu_quirks(struct kbase_device *kbdev) +{ + /* All older GPUs had 2 bits for both fields, this is a default */ + struct l2_mmu_config_limit limit = { + 0, /* Any GPU not in the limits array defined above */ + {KBASE_AID_32, GENMASK(25, 24), 24}, + {KBASE_AID_32, GENMASK(27, 26), 26} + }; + u32 product_model, gpu_id; + u32 mmu_config; + int i; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + + /* Limit the GPU bus bandwidth if the platform needs this. */ + for (i = 0; i < ARRAY_SIZE(limits); i++) { + if (product_model == limits[i].product_model) { + limit = limits[i]; + break; + } + } + + mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + mmu_config &= ~(limit.read.mask | limit.write.mask); + /* Can't use FIELD_PREP() macro here as the mask isn't constant */ + mmu_config |= (limit.read.value << limit.read.shift) | + (limit.write.value << limit.write.shift); + + kbdev->hw_quirks_mmu = mmu_config; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Allow memory configuration disparity to be ignored, + * we optimize the use of shared memory and thus we + * expect some disparity in the memory configuration. + */ + kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; + } + + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h new file mode 100644 index 0000000..0c779ac --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_L2_MMU_CONFIG_H_ +#define _KBASE_L2_MMU_CONFIG_H_ +/** + * kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Use this function to initialise the hw_quirks_mmu field, for instance to set + * the MAX_READS and MAX_WRITES to sane defaults for each GPU. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_set_mmu_quirks(struct kbase_device *kbdev); + +#endif /* _KBASE_L2_MMU_CONFIG_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c new file mode 100644 index 0000000..e33fe0b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Always on" power management policy + */ + +#include +#include + +static bool always_on_shaders_needed(struct kbase_device *kbdev) +{ + return true; +} + +static bool always_on_get_core_active(struct kbase_device *kbdev) +{ + return true; +} + +static void always_on_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void always_on_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* + * The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_shaders_needed, /* shaders_needed */ + always_on_get_core_active, /* get_core_active */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h new file mode 100644 index 0000000..e7927cf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h @@ -0,0 +1,81 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Always on" power management policy + */ + +#ifndef MALI_KBASE_PM_ALWAYS_ON_H +#define MALI_KBASE_PM_ALWAYS_ON_H + +/** + * DOC: + * The "Always on" power management policy has the following + * characteristics: + * + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * Shader Cores are powered up, regardless of whether or not they will be + * needed later. + * + * - When KBase indicates that Shader Cores are needed to submit the currently + * queued Job Chains: + * Shader Cores are kept powered, regardless of whether or not they will be + * needed + * + * - When KBase indicates that the GPU need not be powered: + * The Shader Cores are kept powered, regardless of whether or not they will + * be needed. The GPU itself is also kept powered, even though it is not + * needed. + * + * This policy is automatically overridden during system suspend: the desired + * core state is ignored, and the cores are forced off regardless of what the + * policy requests. After resuming from suspend, new changes to the desired + * core state made by the policy are honored. + * + * Note: + * + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_always_on - Private struct for policy instance data + * @dummy: unused dummy variable + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_policy_always_on { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; + +#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c new file mode 100644 index 0000000..576c9f2 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -0,0 +1,747 @@ + /* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * GPU backend implementation of base kernel power management APIs + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); +static void kbase_pm_gpu_clock_control_worker(struct work_struct *data); + +int kbase_pm_runtime_init(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + if (callbacks) { + kbdev->pm.backend.callback_power_on = + callbacks->power_on_callback; + kbdev->pm.backend.callback_power_off = + callbacks->power_off_callback; + kbdev->pm.backend.callback_power_suspend = + callbacks->power_suspend_callback; + kbdev->pm.backend.callback_power_resume = + callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = + callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = + callbacks->power_runtime_term_callback; + kbdev->pm.backend.callback_power_runtime_on = + callbacks->power_runtime_on_callback; + kbdev->pm.backend.callback_power_runtime_off = + callbacks->power_runtime_off_callback; + kbdev->pm.backend.callback_power_runtime_idle = + callbacks->power_runtime_idle_callback; + kbdev->pm.backend.callback_soft_reset = + callbacks->soft_reset_callback; + + if (callbacks->power_runtime_init_callback) + return callbacks->power_runtime_init_callback(kbdev); + else + return 0; + } + + kbdev->pm.backend.callback_power_on = NULL; + kbdev->pm.backend.callback_power_off = NULL; + kbdev->pm.backend.callback_power_suspend = NULL; + kbdev->pm.backend.callback_power_resume = NULL; + kbdev->pm.callback_power_runtime_init = NULL; + kbdev->pm.callback_power_runtime_term = NULL; + kbdev->pm.backend.callback_power_runtime_on = NULL; + kbdev->pm.backend.callback_power_runtime_off = NULL; + kbdev->pm.backend.callback_power_runtime_idle = NULL; + kbdev->pm.backend.callback_soft_reset = NULL; + + return 0; +} + +void kbase_pm_runtime_term(struct kbase_device *kbdev) +{ + if (kbdev->pm.callback_power_runtime_term) { + kbdev->pm.callback_power_runtime_term(kbdev); + } +} + +void kbase_pm_register_access_enable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_on_callback(kbdev); + + kbdev->pm.backend.gpu_powered = true; +} + +void kbase_pm_register_access_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_off_callback(kbdev); + + kbdev->pm.backend.gpu_powered = false; +} + +int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +{ + int ret = 0; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_init(&kbdev->pm.lock); + + kbdev->pm.backend.gpu_poweroff_wait_wq = alloc_workqueue("kbase_pm_poweroff_wait", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!kbdev->pm.backend.gpu_poweroff_wait_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work, + kbase_pm_gpu_poweroff_wait_wq); + + kbdev->pm.backend.ca_cores_enabled = ~0ull; + kbdev->pm.backend.gpu_powered = false; + kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbdev->pm.gpu_lost = false; +#endif +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = false; +#endif /* CONFIG_MALI_DEBUG */ + init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); + + /* Initialise the metrics subsystem */ + ret = kbasep_pm_metrics_init(kbdev); + if (ret) + return ret; + + init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); + kbdev->pm.backend.reset_done = false; + + init_waitqueue_head(&kbdev->pm.zero_active_count_wait); + kbdev->pm.active_count = 0; + + spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); + + init_waitqueue_head(&kbdev->pm.backend.poweroff_wait); + + if (kbase_pm_ca_init(kbdev) != 0) + goto workq_fail; + + kbase_pm_policy_init(kbdev); + + if (kbase_pm_state_machine_init(kbdev) != 0) + goto pm_state_machine_fail; + + kbdev->pm.backend.hwcnt_desired = false; + kbdev->pm.backend.hwcnt_disabled = true; + INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work, + kbase_pm_hwcnt_disable_worker); + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) { + kbdev->pm.backend.l2_always_on = false; + kbdev->pm.backend.gpu_clock_slow_down_wa = false; + + return 0; + } + + /* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */ + if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) { + kbdev->pm.backend.gpu_clock_slow_down_wa = false; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) + kbdev->pm.backend.l2_always_on = true; + else + kbdev->pm.backend.l2_always_on = false; + + return 0; + } + + /* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */ + kbdev->pm.backend.l2_always_on = false; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) { + kbdev->pm.backend.gpu_clock_slow_down_wa = true; + kbdev->pm.backend.gpu_clock_suspend_freq = 0; + kbdev->pm.backend.gpu_clock_slow_down_desired = true; + kbdev->pm.backend.gpu_clock_slowed_down = false; + INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work, + kbase_pm_gpu_clock_control_worker); + } else + kbdev->pm.backend.gpu_clock_slow_down_wa = false; + + return 0; + +pm_state_machine_fail: + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); +workq_fail: + kbasep_pm_metrics_term(kbdev); + return -EINVAL; +} + +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) +{ + lockdep_assert_held(&kbdev->pm.lock); + + /* Turn clocks and interrupts on - no-op if we haven't done a previous + * kbase_pm_clock_off() */ + kbase_pm_clock_on(kbdev, is_resume); + + if (!is_resume) { + unsigned long flags; + + /* Force update of L2 state - if we have abandoned a power off + * then this may be required to power the L2 back on. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* Update core status as required by the policy */ + kbase_pm_update_cores_state(kbdev); + + /* NOTE: We don't wait to reach the desired state, since running atoms + * will wait for that state to be reached anyway */ +} + +static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_wait_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + + /* Wait for power transitions to complete. We do this with no locks held + * so that we don't deadlock with any pending workqueues. + */ + kbase_pm_wait_for_desired_state(kbdev); + + kbase_pm_lock(kbdev); + + if (!backend->poweron_required) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(backend->shaders_state != + KBASE_SHADERS_OFF_CORESTACK_OFF || + backend->l2_state != KBASE_L2_OFF); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Disable interrupts and turn the clock off */ + if (!kbase_pm_clock_off(kbdev)) { + /* + * Page/bus faults are pending, must drop locks to + * process. Interrupts are disabled so no more faults + * should be generated at this point. + */ + kbase_pm_unlock(kbdev); + kbase_flush_mmu_wqs(kbdev); + kbase_pm_lock(kbdev); + + /* Turn off clock now that fault have been handled. We + * dropped locks so poweron_required may have changed - + * power back on if this is the case (effectively only + * re-enabling of the interrupts would be done in this + * case, as the clocks to GPU were not withdrawn yet). + */ + if (backend->poweron_required) + kbase_pm_clock_on(kbdev, false); + else + WARN_ON(!kbase_pm_clock_off(kbdev)); + } + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + backend->poweroff_wait_in_progress = false; + if (backend->poweron_required) { + backend->poweron_required = false; + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_backend_slot_update(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_pm_unlock(kbdev); + + wake_up(&kbdev->pm.backend.poweroff_wait); +} + +static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev) +{ +#if defined(CONFIG_MALI_MIDGARD_DVFS) + struct clk *clk = kbdev->clocks[0]; +#endif + + if (!kbdev->pm.backend.gpu_clock_slow_down_wa) + return; + + /* No suspend clock is specified */ + if (WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_suspend_freq)) + return; + +#if defined(CONFIG_MALI_DEVFREQ) + + /* Suspend devfreq */ + devfreq_suspend_device(kbdev->devfreq); + + /* Keep the current freq to restore it upon resume */ + kbdev->previous_frequency = kbdev->current_nominal_freq; + + /* Slow down GPU clock to the suspend clock*/ + kbase_devfreq_force_freq(kbdev, + kbdev->pm.backend.gpu_clock_suspend_freq); + +#elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */ + + if (WARN_ON_ONCE(!clk)) + return; + + /* Stop the metrics gathering framework */ + if (kbase_pm_metrics_is_active(kbdev)) + kbase_pm_metrics_stop(kbdev); + + /* Keep the current freq to restore it upon resume */ + kbdev->previous_frequency = clk_get_rate(clk); + + /* Slow down GPU clock to the suspend clock*/ + if (WARN_ON_ONCE(clk_set_rate(clk, + kbdev->pm.backend.gpu_clock_suspend_freq))) + dev_err(kbdev->dev, "Failed to set suspend freq\n"); + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} + +static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev) +{ +#if defined(CONFIG_MALI_MIDGARD_DVFS) + struct clk *clk = kbdev->clocks[0]; +#endif + + if (!kbdev->pm.backend.gpu_clock_slow_down_wa) + return; + +#if defined(CONFIG_MALI_DEVFREQ) + + /* Restore GPU clock to the previous one */ + kbase_devfreq_force_freq(kbdev, kbdev->previous_frequency); + + /* Resume devfreq */ + devfreq_resume_device(kbdev->devfreq); + +#elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */ + + if (WARN_ON_ONCE(!clk)) + return; + + /* Restore GPU clock */ + if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency))) + dev_err(kbdev->dev, "Failed to restore freq (%lu)\n", + kbdev->previous_frequency); + + /* Restart the metrics gathering framework */ + kbase_pm_metrics_start(kbdev); + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} + +static void kbase_pm_gpu_clock_control_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_clock_control_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + bool slow_down = false, normalize = false; + + /* Determine if GPU clock control is required */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (!backend->gpu_clock_slowed_down && + backend->gpu_clock_slow_down_desired) { + slow_down = true; + backend->gpu_clock_slowed_down = true; + } else if (backend->gpu_clock_slowed_down && + !backend->gpu_clock_slow_down_desired) { + normalize = true; + backend->gpu_clock_slowed_down = false; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Control GPU clock according to the request of L2 state machine. + * The GPU clock needs to be lowered for safe L2 power down + * and restored to previous speed at L2 power up. + */ + if (slow_down) + kbase_pm_l2_clock_slow(kbdev); + else if (normalize) + kbase_pm_l2_clock_normalize(kbdev); + + /* Tell L2 state machine to transit to next state */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + pm.backend.hwcnt_disable_work); + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled; + + if (do_disable) { + /* PM state did not change while we were doing the disable, + * so commit the work we just performed and continue the state + * machine. + */ + backend->hwcnt_disabled = true; + kbase_pm_update_state(kbdev); + kbase_backend_slot_update(kbdev); + } else { + /* PM state was updated while we were doing the disable, + * so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_do_poweroff(struct kbase_device *kbdev) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) + goto unlock_hwaccess; + + if (kbdev->pm.backend.poweroff_wait_in_progress) + goto unlock_hwaccess; + + /* Force all cores off */ + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + + kbdev->pm.backend.poweroff_wait_in_progress = true; + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true; + + /* l2_desired being false should cause the state machine to + * start powering off the L2. When it actually is powered off, + * the interrupt handler will call kbase_pm_l2_update_state() + * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq. + * Callers of this function will need to wait on poweroff_wait. + */ + kbase_pm_update_state(kbdev); + +unlock_hwaccess: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static bool is_poweroff_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev) +{ + wait_event_killable(kbdev->pm.backend.poweroff_wait, + is_poweroff_in_progress(kbdev)); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_complete); + +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags) +{ + unsigned long irq_flags; + int ret; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbase_pm_lock(kbdev); + + /* A suspend won't happen during startup/insmod */ + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Power up the GPU, don't enable IRQs as we are not ready to receive + * them. */ + ret = kbase_pm_init_hw(kbdev, flags); + if (ret) { + kbase_pm_unlock(kbdev); + return ret; + } + + kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = + kbdev->pm.debug_core_mask[1] = + kbdev->pm.debug_core_mask[2] = + kbdev->gpu_props.props.raw_props.shader_present; + + /* Pretend the GPU is active to prevent a power policy turning the GPU + * cores off */ + kbdev->pm.active_count = 1; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + /* Ensure cycle counter is off */ + kbdev->pm.backend.gpu_cycle_counter_requests = 0; + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + /* We are ready to receive IRQ's now as power policy is set up, so + * enable them now. */ +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = true; +#endif + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the GPU and any cores needed by the policy */ + kbase_pm_do_poweron(kbdev, false); + kbase_pm_unlock(kbdev); + + return 0; +} + +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&kbdev->pm.lock); + kbase_pm_do_poweroff(kbdev); + mutex_unlock(&kbdev->pm.lock); +} + +KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); + +void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); + + cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work); + + if (kbdev->pm.backend.hwcnt_disabled) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* Free any resources the policy allocated */ + kbase_pm_state_machine_term(kbdev); + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + + /* Shut down the metrics subsystem */ + kbasep_pm_metrics_term(kbdev); + + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); +} + +void kbase_pm_power_changed(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + + kbase_backend_slot_update(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbase_dummy_job_wa_enabled(kbdev)) { + dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled"); + new_core_mask_js0 = kbdev->pm.debug_core_mask[0]; + } + + kbdev->pm.debug_core_mask[0] = new_core_mask_js0; + kbdev->pm.debug_core_mask[1] = new_core_mask_js1; + kbdev->pm.debug_core_mask[2] = new_core_mask_js2; + kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | + new_core_mask_js2; + + kbase_pm_update_dynamic_cores_onoff(kbdev); +} + +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) +{ + /* Force power off the GPU and all cores (regardless of policy), only + * after the PM active count reaches zero (otherwise, we risk turning it + * off prematurely) */ + kbase_pm_lock(kbdev); + + kbase_pm_do_poweroff(kbdev); + + kbase_backend_timer_suspend(kbdev); + + kbase_pm_unlock(kbdev); + + kbase_pm_wait_for_poweroff_complete(kbdev); + + if (kbdev->pm.backend.callback_power_suspend) + kbdev->pm.backend.callback_power_suspend(kbdev); +} + +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) +{ + kbase_pm_lock(kbdev); + + kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbdev->pm.gpu_lost = false; +#endif + kbase_pm_do_poweron(kbdev, true); + + kbase_backend_timer_resume(kbdev); + + kbase_pm_unlock(kbdev); +} + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + ktime_t end_timestamp = ktime_get(); + + /* Full GPU reset will have been done by hypervisor, so cancel */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + /* GPU is no longer mapped to VM. So no interrupts will be received + * and Mali registers have been replaced by dummy RAM + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&kbdev->mmu_mask_change); + kbdev->irq_reset_flush = true; + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_synchronize_irqs(kbdev); + kbase_flush_mmu_wqs(kbdev); + kbdev->irq_reset_flush = false; + + /* Clear all jobs running on the GPU */ + mutex_lock(&kbdev->pm.lock); + kbdev->pm.gpu_lost = true; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + if (!kbdev->pm.backend.protected_entry_transition_override) + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Cancel any pending HWC dumps */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Wait for all threads keeping GPU active to complete */ + mutex_unlock(&kbdev->pm.lock); + wait_event(kbdev->pm.zero_active_count_wait, + kbdev->pm.active_count == 0); + mutex_lock(&kbdev->pm.lock); + + /* Update state to GPU off */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.shaders_desired = false; + kbdev->pm.backend.l2_desired = false; + backend->l2_state = KBASE_L2_OFF; + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + kbdev->pm.backend.gpu_powered = false; + backend->poweroff_wait_in_progress = false; + KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + kbase_gpu_cache_clean_wait_complete(kbdev); + backend->poweroff_wait_in_progress = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + wake_up(&kbdev->pm.backend.poweroff_wait); + mutex_unlock(&kbdev->pm.lock); +} +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c new file mode 100644 index 0000000..e7eef26 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -0,0 +1,112 @@ +/* + * + * (C) COPYRIGHT 2013-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel core availability APIs + */ + +#include +#include +#include +#ifdef CONFIG_MALI_NO_MALI +#include +#endif +#include + +int kbase_pm_ca_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; + + if (kbdev->current_core_mask) + pm_backend->ca_cores_enabled = kbdev->current_core_mask; + else + pm_backend->ca_cores_enabled = + kbdev->gpu_props.props.raw_props.shader_present; +#endif + + return 0; +} + +void kbase_pm_ca_term(struct kbase_device *kbdev) +{ +} + +#ifdef CONFIG_MALI_DEVFREQ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) +{ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!(core_mask & kbdev->pm.debug_core_mask_all)) { + dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask_all); + goto unlock; + } + + if (kbase_dummy_job_wa_enabled(kbdev)) { + dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); + goto unlock; + } + + pm_backend->ca_cores_enabled = core_mask; + + kbase_pm_update_state(kbdev); + +unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", + pm_backend->ca_cores_enabled); +} +#endif + +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; +#endif + + lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_DEVFREQ + return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; +#else + return kbdev->gpu_props.props.raw_props.shader_present & + kbdev->pm.debug_core_mask_all; +#endif +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); + +u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + +#ifdef CONFIG_MALI_NO_MALI + return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); +#else + return kbdev->pm.backend.pm_shaders_core_mask; +#endif +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h new file mode 100644 index 0000000..5423e96 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h @@ -0,0 +1,89 @@ +/* + * + * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel core availability APIs + */ + +#ifndef _KBASE_PM_CA_H_ +#define _KBASE_PM_CA_H_ + +/** + * kbase_pm_ca_init - Initialize core availability framework + * + * Must be called before calling any other core availability function + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the core availability framework was successfully initialized, + * -errno otherwise + */ +int kbase_pm_ca_init(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_term - Terminate core availability framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_term(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_get_core_mask - Get currently available shaders core mask + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the currently available shader cores. + * Calls into the core availability policy + * + * Return: The bit mask of available cores + */ +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_update_core_status - Update core status + * + * @kbdev: The kbase device structure for the device (must be + * a valid pointer) + * @cores_ready: The bit mask of cores ready for job submission + * @cores_transitioning: The bit mask of cores that are transitioning power + * state + * + * Update core availability policy with current core power status + * + * Calls into the core availability policy + */ +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning); + +/** + * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the PM state synchronised shader cores for arranging + * HW performance counter dumps + * + * Return: The bit mask of PM state synchronised cores + */ +u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_CA_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h new file mode 100644 index 0000000..f67ec65 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * A core availability policy for use with devfreq, where core masks are + * associated with OPPs. + */ + +#ifndef MALI_KBASE_PM_CA_DEVFREQ_H +#define MALI_KBASE_PM_CA_DEVFREQ_H + +/** + * struct kbasep_pm_ca_policy_devfreq - Private structure for devfreq ca policy + * + * This contains data that is private to the devfreq core availability + * policy. + * + * @cores_desired: Cores that the policy wants to be available + * @cores_enabled: Cores that the policy is currently returning as available + * @cores_used: Cores currently powered or transitioning + */ +struct kbasep_pm_ca_policy_devfreq { + u64 cores_desired; + u64 cores_enabled; + u64 cores_used; +}; + +extern const struct kbase_pm_ca_policy kbase_pm_ca_devfreq_policy_ops; + +/** + * kbase_devfreq_set_core_mask - Set core mask for policy to use + * @kbdev: Device pointer + * @core_mask: New core mask + * + * The new core mask will have immediate effect if the GPU is powered, or will + * take effect when it is next powered on. + */ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); + +#endif /* MALI_KBASE_PM_CA_DEVFREQ_H */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c new file mode 100644 index 0000000..9eef44a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -0,0 +1,66 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Coarse Demand" power management policy + */ + +#include +#include + +static bool coarse_demand_shaders_needed(struct kbase_device *kbdev) +{ + return kbase_pm_is_active(kbdev); +} + +static bool coarse_demand_get_core_active(struct kbase_device *kbdev) +{ + return kbase_pm_is_active(kbdev); +} + +static void coarse_demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void coarse_demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_shaders_needed, /* shaders_needed */ + coarse_demand_get_core_active, /* get_core_active */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h new file mode 100644 index 0000000..304e5d7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * "Coarse Demand" power management policy + */ + +#ifndef MALI_KBASE_PM_COARSE_DEMAND_H +#define MALI_KBASE_PM_COARSE_DEMAND_H + +/** + * DOC: + * The "Coarse" demand power management policy has the following + * characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - Shader Cores are powered up, regardless of whether or not they will be + * needed later. + * - When KBase indicates that Shader Cores are needed to submit the currently + * queued Job Chains: + * - Shader Cores are kept powered, regardless of whether or not they will + * be needed + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_coarse_demand - Private structure for coarse demand + * policy + * + * This contains data that is private to the coarse demand power policy. + * + * @dummy: Dummy member - no state needed + */ +struct kbasep_pm_policy_coarse_demand { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; + +#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h new file mode 100644 index 0000000..f4bcf3e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -0,0 +1,517 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific Power Manager definitions + */ + +#ifndef _KBASE_PM_HWACCESS_DEFS_H_ +#define _KBASE_PM_HWACCESS_DEFS_H_ + +#include "mali_kbase_pm_always_on.h" +#include "mali_kbase_pm_coarse_demand.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_pm_always_on_demand.h" +#endif + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; +struct kbase_jd_atom; + +/** + * enum kbase_pm_core_type - The types of core in a GPU. + * + * These enumerated values are used in calls to + * - kbase_pm_get_present_cores() + * - kbase_pm_get_active_cores() + * - kbase_pm_get_trans_cores() + * - kbase_pm_get_ready_cores(). + * + * They specify which type of core should be acted on. These values are set in + * a manner that allows core_type_to_reg() function to be simpler and more + * efficient. + * + * @KBASE_PM_CORE_L2: The L2 cache + * @KBASE_PM_CORE_SHADER: Shader cores + * @KBASE_PM_CORE_TILER: Tiler cores + * @KBASE_PM_CORE_STACK: Core stacks + */ +enum kbase_pm_core_type { + KBASE_PM_CORE_L2 = L2_PRESENT_LO, + KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, + KBASE_PM_CORE_TILER = TILER_PRESENT_LO, + KBASE_PM_CORE_STACK = STACK_PRESENT_LO +}; + +/** + * enum kbase_l2_core_state - The states used for the L2 cache & tiler power + * state machine. + * + * @KBASE_L2_OFF: The L2 cache and tiler are off + * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on + * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. + * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being + * enabled + * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled + * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being + * disabled + * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest + * clock. Conditionally used. + * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off + * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off + * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state + * are unknown + */ +enum kbase_l2_core_state { +#define KBASEP_L2_STATE(n) KBASE_L2_ ## n, +#include "mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE +}; + +/** + * enum kbase_shader_core_state - The states used for the shaders' state machine. + * + * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have + * been requested to power on and hwcnt + * is being disabled + * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on. Or after doing + * partial shader on/off, checking whether + * it's the desired state. + * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt + * already enabled. + * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks + * are on, hwcnt disabled, and checks + * to powering down or re-enabling + * hwcnt. + * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to + * power off, but they remain on for the + * duration of the hysteresis timer + * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach + * a state where jobs on the GPU are finished + * including jobs currently running and in the + * GPU queue because of GPU2017-861 + * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the + * level 2 cache is being flushed. + * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders + * are ready to be powered off. + * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are + * off, but the tick timer + * cancellation is still + * pending. + * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power + * states are unknown + */ +enum kbase_shader_core_state { +#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, +#include "mali_kbase_pm_shader_states.h" +#undef KBASEP_SHADER_STATE +}; + +/** + * struct kbasep_pm_metrics - Metrics data collected for use by the power + * management framework. + * + * @time_busy: number of ns the GPU was busy executing jobs since the + * @time_period_start timestamp. + * @time_idle: number of ns since time_period_start the GPU was not executing + * jobs since the @time_period_start timestamp. + * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that + * if two CL jobs were active for 400ns, this value would be updated + * with 800. + * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that + * if two GL jobs were active for 400ns, this value would be updated + * with 800. + */ +struct kbasep_pm_metrics { + u32 time_busy; + u32 time_idle; + u32 busy_cl[2]; + u32 busy_gl; +}; + +/** + * struct kbasep_pm_metrics_state - State required to collect the metrics in + * struct kbasep_pm_metrics + * @time_period_start: time at which busy/idle measurements started + * @gpu_active: true when the GPU is executing jobs. false when + * not. Updated when the job scheduler informs us a job in submitted + * or removed from a GPU slot. + * @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device. + * @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. + * @lock: spinlock protecting the kbasep_pm_metrics_data structure + * @platform_data: pointer to data controlled by platform specific code + * @kbdev: pointer to kbase device for which metrics are collected + * @values: The current values of the power management metrics. The + * kbase_pm_get_dvfs_metrics() function is used to compare these + * current values with the saved values from a previous invocation. + * @timer: timer to regularly make DVFS decisions based on the power + * management metrics. + * @timer_active: boolean indicating @timer is running + * @dvfs_last: values of the PM metrics from the last DVFS tick + * @dvfs_diff: different between the current and previous PM metrics. + */ +struct kbasep_pm_metrics_state { + ktime_t time_period_start; + bool gpu_active; + u32 active_cl_ctx[2]; + u32 active_gl_ctx[3]; + spinlock_t lock; + + void *platform_data; + struct kbase_device *kbdev; + + struct kbasep_pm_metrics values; + +#ifdef CONFIG_MALI_MIDGARD_DVFS + struct hrtimer timer; + bool timer_active; + struct kbasep_pm_metrics dvfs_last; + struct kbasep_pm_metrics dvfs_diff; +#endif +}; + +/** + * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer + * @wq: Work queue to wait for the timer to stopped + * @work: Work item which cancels the timer + * @timer: Timer for powering off the shader cores + * @configured_interval: Period of GPU poweroff timer + * @configured_ticks: User-configured number of ticks to wait after the shader + * power down request is received before turning off the cores + * @remaining_ticks: Number of remaining timer ticks until shaders are powered off + * @cancel_queued: True if the cancellation work item has been queued. This is + * required to ensure that it is not queued twice, e.g. after + * a reset, which could cause the timer to be incorrectly + * cancelled later by a delayed workitem. + * @needed: Whether the timer should restart itself + */ +struct kbasep_pm_tick_timer_state { + struct workqueue_struct *wq; + struct work_struct work; + struct hrtimer timer; + + ktime_t configured_interval; + unsigned int configured_ticks; + unsigned int remaining_ticks; + + bool cancel_queued; + bool needed; +}; + +union kbase_pm_policy_data { + struct kbasep_pm_policy_always_on always_on; + struct kbasep_pm_policy_coarse_demand coarse_demand; +#if !MALI_CUSTOMER_RELEASE + struct kbasep_pm_policy_always_on_demand always_on_demand; +#endif +}; + +/** + * struct kbase_pm_backend_data - Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + * + * @pm_current_policy: The policy that is currently actively controlling the + * power state. + * @pm_policy_data: Private data for current PM policy + * @reset_done: Flag when a reset is complete + * @reset_done_wait: Wait queue to wait for changes to @reset_done + * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter + * users + * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests + * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired + * state according to the L2 and shader power state + * machines + * @gpu_powered: Set to true when the GPU is powered and register + * accesses are possible, false otherwise. Access to this + * variable should be protected by: both the hwaccess_lock + * spinlock and the pm.lock mutex for writes; or at least + * one of either lock for reads. + * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It + * holds the cores enabled in a hardware counters dump, + * and may differ from @shaders_avail when under different + * states and transitions. + * @cg1_disabled: Set if the policy wants to keep the second core group + * powered off + * @driver_ready_for_irqs: Debug state indicating whether sufficient + * initialization of the driver has occurred to handle + * IRQs + * @metrics: Structure to hold metrics for the GPU + * @shader_tick_timer: Structure to hold the shader poweroff tick timer state + * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress. + * hwaccess_lock must be held when accessing + * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state + * machine should invoke the poweroff + * worker after the L2 has turned off. + * @poweron_required: true if a GPU power on is required. Should only be set + * when poweroff_wait_in_progress is true, and therefore the + * GPU can not immediately be powered on. pm.lock must be + * held when accessing + * @gpu_poweroff_wait_wq: workqueue for waiting for GPU to power off + * @gpu_poweroff_wait_work: work item for use with @gpu_poweroff_wait_wq + * @poweroff_wait: waitqueue for waiting for @gpu_poweroff_wait_work to complete + * @callback_power_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_suspend: Callback when a suspend occurs and the GPU needs to + * be turned off. See &struct kbase_pm_callback_conf + * @callback_power_resume: Callback when a resume occurs and the GPU needs to + * be turned on. See &struct kbase_pm_callback_conf + * @callback_power_runtime_on: Callback when the GPU needs to be turned on. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_off: Callback when the GPU may be turned off. See + * &struct kbase_pm_callback_conf + * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See + * &struct kbase_pm_callback_conf + * @callback_soft_reset: Optional callback to software reset the GPU. See + * &struct kbase_pm_callback_conf + * @ca_cores_enabled: Cores that are currently available + * @l2_state: The current state of the L2 cache state machine. See + * &enum kbase_l2_core_state + * @l2_desired: True if the L2 cache should be powered on by the L2 cache state + * machine + * @l2_always_on: If true, disable powering down of l2 cache. + * @shaders_state: The current state of the shader state machine. + * @shaders_avail: This is updated by the state machine when it is in a state + * where it can write to the SHADER_PWRON or PWROFF registers + * to have the same set of available cores as specified by + * @shaders_desired_mask. So it would eventually have the same + * value as @shaders_desired_mask and would precisely indicate + * the cores that are currently available. This is internal to + * shader state machine and should *not* be modified elsewhere. + * @shaders_desired_mask: This is updated by the state machine when it is in + * a state where it can handle changes to the core + * availability (either by DVFS or sysfs). This is + * internal to the shader state machine and should + * *not* be modified elsewhere. + * @shaders_desired: True if the PM active count or power policy requires the + * shader cores to be on. This is used as an input to the + * shader power state machine. The current state of the + * cores may be different, but there should be transitions in + * progress that will eventually achieve this state (assuming + * that the policy doesn't change its mind in the mean time). + * @in_reset: True if a GPU is resetting and normal power manager operation is + * suspended + * @partial_shaderoff: True if we want to partial power off shader cores, + * it indicates a partial shader core off case, + * do some special operation for such case like flush + * L2 cache because of GPU2017-861 + * @protected_entry_transition_override : True if GPU reset is being used + * before entering the protected mode and so + * the reset handling behaviour is being + * overridden. + * @protected_transition_override : True if a protected mode transition is in + * progress and is overriding power manager + * behaviour. + * @protected_l2_override : Non-zero if the L2 cache is required during a + * protected mode transition. Has no effect if not + * transitioning. + * @hwcnt_desired: True if we want GPU hardware counters to be enabled. + * @hwcnt_disabled: True if GPU hardware counters are not enabled. + * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if + * atomic disable is not possible. + * @gpu_clock_suspend_freq: 'opp-mali-errata-1485982' clock in opp table + * for safe L2 power cycle. + * If no opp-mali-errata-1485982 specified, + * the slowest clock will be taken. + * @gpu_clock_slow_down_wa: If true, slow down GPU clock during L2 power cycle. + * @gpu_clock_slow_down_desired: True if we want lower GPU clock + * for safe L2 power cycle. False if want GPU clock + * to back to normalized one. This is updated only + * in L2 state machine, kbase_pm_l2_update_state. + * @gpu_clock_slowed_down: During L2 power cycle, + * True if gpu clock is set at lower frequency + * for safe L2 power down, False if gpu clock gets + * restored to previous speed. This is updated only in + * work function, kbase_pm_gpu_clock_control_worker. + * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle + * using gpu_clock_control + * + * Note: + * During an IRQ, @pm_current_policy can be NULL when the policy is being + * changed with kbase_pm_set_policy(). The change is protected under + * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context + * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will + * re-issue the policy functions that would have been done under IRQ. + */ +struct kbase_pm_backend_data { + const struct kbase_pm_policy *pm_current_policy; + union kbase_pm_policy_data pm_policy_data; + bool reset_done; + wait_queue_head_t reset_done_wait; + int gpu_cycle_counter_requests; + spinlock_t gpu_cycle_counter_requests_lock; + + wait_queue_head_t gpu_in_desired_state_wait; + + bool gpu_powered; + + u64 pm_shaders_core_mask; + + bool cg1_disabled; + +#ifdef CONFIG_MALI_DEBUG + bool driver_ready_for_irqs; +#endif /* CONFIG_MALI_DEBUG */ + + struct kbasep_pm_metrics_state metrics; + + struct kbasep_pm_tick_timer_state shader_tick_timer; + + bool poweroff_wait_in_progress; + bool invoke_poweroff_wait_wq_when_l2_off; + bool poweron_required; + + struct workqueue_struct *gpu_poweroff_wait_wq; + struct work_struct gpu_poweroff_wait_work; + + wait_queue_head_t poweroff_wait; + + int (*callback_power_on)(struct kbase_device *kbdev); + void (*callback_power_off)(struct kbase_device *kbdev); + void (*callback_power_suspend)(struct kbase_device *kbdev); + void (*callback_power_resume)(struct kbase_device *kbdev); + int (*callback_power_runtime_on)(struct kbase_device *kbdev); + void (*callback_power_runtime_off)(struct kbase_device *kbdev); + int (*callback_power_runtime_idle)(struct kbase_device *kbdev); + int (*callback_soft_reset)(struct kbase_device *kbdev); + + u64 ca_cores_enabled; + + enum kbase_l2_core_state l2_state; + enum kbase_shader_core_state shaders_state; + u64 shaders_avail; + u64 shaders_desired_mask; + bool l2_desired; + bool l2_always_on; + bool shaders_desired; + + bool in_reset; + + bool partial_shaderoff; + + bool protected_entry_transition_override; + bool protected_transition_override; + int protected_l2_override; + + bool hwcnt_desired; + bool hwcnt_disabled; + struct work_struct hwcnt_disable_work; + + u64 gpu_clock_suspend_freq; + bool gpu_clock_slow_down_wa; + bool gpu_clock_slow_down_desired; + bool gpu_clock_slowed_down; + struct work_struct gpu_clock_control_work; +}; + + +/* List of policy IDs */ +enum kbase_pm_policy_id { + KBASE_PM_POLICY_ID_COARSE_DEMAND, +#if !MALI_CUSTOMER_RELEASE + KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND, +#endif + KBASE_PM_POLICY_ID_ALWAYS_ON +}; + +/** + * struct kbase_pm_policy - Power policy structure. + * + * Each power policy exposes a (static) instance of this structure which + * contains function pointers to the policy's methods. + * + * @name: The name of this policy + * @init: Function called when the policy is selected + * @term: Function called when the policy is unselected + * @shaders_needed: Function called to find out if shader cores are needed + * @get_core_active: Function called to get the current overall GPU power + * state + * @id: Field indicating an ID for this policy. This is not + * necessarily the same as its index in the list returned + * by kbase_pm_list_policies(). + * It is used purely for debugging. + */ +struct kbase_pm_policy { + char *name; + + /** + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.pm_policy_data structure. It + * should not attempt to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is + * called. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + */ + void (*init)(struct kbase_device *kbdev); + + /** + * Function called when the policy is unselected. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + */ + void (*term)(struct kbase_device *kbdev); + + /** + * Function called to find out if shader cores are needed + * + * This needs to at least satisfy kbdev->pm.backend.shaders_desired, + * and so must never return false when shaders_desired is true. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * + * Return: true if shader cores are needed, false otherwise + */ + bool (*shaders_needed)(struct kbase_device *kbdev); + + /** + * Function called to get the current overall GPU power state + * + * This function must meet or exceed the requirements for power + * indicated by kbase_pm_is_active(). + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * + * Return: true if the GPU should be powered, false otherwise + */ + bool (*get_core_active)(struct kbase_device *kbdev); + + enum kbase_pm_policy_id id; +}; + +#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c new file mode 100644 index 0000000..6b821f7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -0,0 +1,2226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel Power Management hardware control + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#include + +#ifdef CONFIG_MALI_CORESTACK +bool corestack_driver_control = true; +#else +bool corestack_driver_control; /* Default value of 0/false */ +#endif +module_param(corestack_driver_control, bool, 0444); +MODULE_PARM_DESC(corestack_driver_control, + "Let the driver power on/off the GPU core stack independently " + "without involving the Power Domain Controller. This should " + "only be enabled on platforms for which integration of the PDC " + "to the Mali GPU is known to be problematic."); +KBASE_EXPORT_TEST_API(corestack_driver_control); + +/** + * enum kbasep_pm_action - Actions that can be performed on a core. + * + * This enumeration is private to the file. Its values are set to allow + * core_type_to_reg() function, which decodes this enumeration, to be simpler + * and more efficient. + * + * @ACTION_PRESENT: The cores that are present + * @ACTION_READY: The cores that are ready + * @ACTION_PWRON: Power on the cores specified + * @ACTION_PWROFF: Power off the cores specified + * @ACTION_PWRTRANS: The cores that are transitioning + * @ACTION_PWRACTIVE: The cores that are active + */ +enum kbasep_pm_action { + ACTION_PRESENT = 0, + ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), + ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), + ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), + ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), + ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) +}; + +static u64 kbase_pm_get_state( + struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); + +bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) +{ + if (kbdev->pm.backend.protected_entry_transition_override) + return false; + + if (kbdev->pm.backend.protected_transition_override && + kbdev->pm.backend.protected_l2_override) + return true; + + if (kbdev->pm.backend.protected_transition_override && + !kbdev->pm.backend.shaders_desired) + return false; + + return kbdev->pm.backend.l2_desired; +} + +void kbase_pm_protected_override_enable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = true; +} +void kbase_pm_protected_override_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->pm.backend.protected_transition_override = false; +} + +int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->protected_mode_transition); + + if (kbdev->pm.backend.l2_always_on && + (kbdev->system_coherency == COHERENCY_ACE)) { + WARN_ON(kbdev->pm.backend.protected_entry_transition_override); + + /* + * If there is already a GPU reset pending then wait for it to + * complete before initiating a special reset for protected + * mode entry. + */ + if (kbase_reset_gpu_silent(kbdev)) + return -EAGAIN; + + kbdev->pm.backend.protected_entry_transition_override = true; + } + + return 0; +} + +void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->protected_mode_transition); + + if (kbdev->pm.backend.l2_always_on && + (kbdev->system_coherency == COHERENCY_ACE)) { + WARN_ON(!kbdev->pm.backend.protected_entry_transition_override); + + kbdev->pm.backend.protected_entry_transition_override = false; + } +} + +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (override) { + kbdev->pm.backend.protected_l2_override++; + WARN_ON(kbdev->pm.backend.protected_l2_override <= 0); + } else { + kbdev->pm.backend.protected_l2_override--; + WARN_ON(kbdev->pm.backend.protected_l2_override < 0); + } + + kbase_pm_update_state(kbdev); +} + +/** + * core_type_to_reg - Decode a core type and action to a register. + * + * Given a core type (defined by kbase_pm_core_type) and an action (defined + * by kbasep_pm_action) this function will return the register offset that + * will perform the action on the core type. The register returned is the _LO + * register and an offset must be applied to use the _HI register. + * + * @core_type: The type of core + * @action: The type of action + * + * Return: The register offset of the _LO register that performs an action of + * type @action on a core of type @core_type. + */ +static u32 core_type_to_reg(enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + if (corestack_driver_control) { + if (core_type == KBASE_PM_CORE_STACK) { + switch (action) { + case ACTION_PRESENT: + return STACK_PRESENT_LO; + case ACTION_READY: + return STACK_READY_LO; + case ACTION_PWRON: + return STACK_PWRON_LO; + case ACTION_PWROFF: + return STACK_PWROFF_LO; + case ACTION_PWRTRANS: + return STACK_PWRTRANS_LO; + default: + WARN(1, "Invalid action for core type\n"); + } + } + } + + return (u32)core_type + (u32)action; +} + +#ifdef CONFIG_ARM64 +static void mali_cci_flush_l2(struct kbase_device *kbdev) +{ + const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; + u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + u32 raw; + + /* + * Note that we don't take the cache flush mutex here since + * we expect to be the last user of the L2, all other L2 users + * would have dropped their references, to initiate L2 power + * down, L2 power down being the only valid place for this + * to be called from. + */ + + kbase_reg_write(kbdev, + GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + + /* Wait for cache flush to complete before continuing, exit on + * gpu resets or loop expiry. */ + while (((raw & mask) == 0) && --loops) { + raw = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); + } +} +#endif + +/** + * kbase_pm_invoke - Invokes an action on a core set + * + * This function performs the action given by @action on a set of cores of a + * type given by @core_type. It is a static function used by + * kbase_pm_transition_core_type() + * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the action should be performed on + * @cores: A bit mask of cores to perform the action on (low 32 bits) + * @action: The action to perform on the cores + */ +static void kbase_pm_invoke(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + u64 cores, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo = cores & 0xFFFFFFFF; + u32 hi = (cores >> 32) & 0xFFFFFFFF; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + if (cores) { + u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); + + if (action == ACTION_PWRON) + state |= cores; + else if (action == ACTION_PWROFF) + state &= ~cores; + KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state); + } + + /* Tracing */ + if (cores) { + if (action == ACTION_PWRON) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_KTRACE_ADD(kbdev, PM_PWRON, NULL, cores); + break; + case KBASE_PM_CORE_TILER: + KBASE_KTRACE_ADD(kbdev, PM_PWRON_TILER, NULL, cores); + break; + case KBASE_PM_CORE_L2: + KBASE_KTRACE_ADD(kbdev, PM_PWRON_L2, NULL, cores); + break; + default: + break; + } + else if (action == ACTION_PWROFF) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_KTRACE_ADD(kbdev, PM_PWROFF, NULL, cores); + break; + case KBASE_PM_CORE_TILER: + KBASE_KTRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, cores); + break; + case KBASE_PM_CORE_L2: + KBASE_KTRACE_ADD(kbdev, PM_PWROFF_L2, NULL, cores); + /* disable snoops before L2 is turned off */ + kbase_pm_cache_snoop_disable(kbdev); + break; + default: + break; + } + } + + if (kbase_dummy_job_wa_enabled(kbdev) && + action == ACTION_PWRON && + core_type == KBASE_PM_CORE_SHADER && + !(kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER)) { + kbase_dummy_job_wa_execute(kbdev, cores); + } else { + if (lo != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo); + if (hi != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi); + } +} + +/** + * kbase_pm_get_state - Get information about a core set + * + * This function gets information (chosen by @action) about a set of cores of + * a type given by @core_type. It is a static function used by + * kbase_pm_get_active_cores(), kbase_pm_get_trans_cores() and + * kbase_pm_get_ready_cores(). + * + * @kbdev: The kbase device structure of the device + * @core_type: The type of core that the should be queried + * @action: The property of the cores to query + * + * Return: A bit mask specifying the state of the cores + */ +static u64 kbase_pm_get_state(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo, hi; + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg)); + hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4)); + + return (((u64) hi) << 32) | ((u64) lo); +} + +/** + * kbase_pm_get_present_cores - Get the cores that are present + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of the cores that are present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (type) { + case KBASE_PM_CORE_L2: + return kbdev->gpu_props.props.raw_props.l2_present; + case KBASE_PM_CORE_SHADER: + return kbdev->gpu_props.props.raw_props.shader_present; + case KBASE_PM_CORE_TILER: + return kbdev->gpu_props.props.raw_props.tiler_present; + case KBASE_PM_CORE_STACK: + return kbdev->gpu_props.props.raw_props.stack_present; + default: + break; + } + KBASE_DEBUG_ASSERT(0); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); + +/** + * kbase_pm_get_active_cores - Get the cores that are "active" + * (busy processing work) + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are active + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); + +/** + * kbase_pm_get_trans_cores - Get the cores that are transitioning between + * power states + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are transitioning + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); + +/** + * kbase_pm_get_ready_cores - Get the cores that are powered on + * + * @kbdev: Kbase device + * @type: The type of cores to query + * + * Return: Bitmask of cores that are ready (powered on) + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + u64 result; + + result = kbase_pm_get_state(kbdev, type, ACTION_READY); + + switch (type) { + case KBASE_PM_CORE_SHADER: + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED, NULL, result); + break; + case KBASE_PM_CORE_TILER: + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, result); + break; + case KBASE_PM_CORE_L2: + KBASE_KTRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, result); + break; + default: + break; + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); + +static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* See if we can get away with disabling hwcnt + * atomically, otherwise kick off a worker. + */ + if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { + backend->hwcnt_disabled = true; + } else { +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, + &backend->hwcnt_disable_work); +#else + queue_work(system_highpri_wq, + &backend->hwcnt_disable_work); +#endif + } +} + +static void kbase_pm_l2_config_override(struct kbase_device *kbdev) +{ + u32 val; + + /* + * Skip if it is not supported + */ + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) + return; + + /* + * Skip if size and hash are not given explicitly, + * which means default values are used. + */ + if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0)) + return; + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + + if (kbdev->l2_size_override) { + val &= ~L2_CONFIG_SIZE_MASK; + val |= (kbdev->l2_size_override << L2_CONFIG_SIZE_SHIFT); + } + + if (kbdev->l2_hash_override) { + val &= ~L2_CONFIG_HASH_MASK; + val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); + } + + dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); + + /* Write L2_CONFIG to override */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); +} + +static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *const backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + queue_work(system_wq, &backend->gpu_clock_control_work); +} + +static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) +{ + const char *const strings[] = { +#define KBASEP_L2_STATE(n) #n, +#include "mali_kbase_pm_l2_states.h" +#undef KBASEP_L2_STATE + }; + if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) + return "Bad level 2 cache state"; + else + return strings[state]; +} + +static int kbase_pm_l2_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; + enum kbase_l2_core_state prev_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + do { + /* Get current state */ + u64 l2_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_L2); + u64 l2_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2); + u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_TILER); + u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER); + + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + l2_trans &= ~l2_ready; + tiler_trans &= ~tiler_ready; + + prev_state = backend->l2_state; + + switch (backend->l2_state) { + case KBASE_L2_OFF: + if (kbase_pm_is_l2_desired(kbdev)) { + /* + * Set the desired config for L2 before powering + * it on + */ + kbase_pm_l2_config_override(kbdev); + + /* L2 is required, power on. Powering on the + * tiler will also power the first L2 cache. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER, + tiler_present, ACTION_PWRON); + + /* If we have more than one L2 cache then we + * must power them on explicitly. + */ + if (l2_present != 1) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present & ~1, + ACTION_PWRON); + backend->l2_state = KBASE_L2_PEND_ON; + } + break; + + case KBASE_L2_PEND_ON: + if (!l2_trans && l2_ready == l2_present && !tiler_trans + && tiler_ready == tiler_present) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); + /* + * Ensure snoops are enabled after L2 is powered + * up. Note that kbase keeps track of the snoop + * state, so safe to repeatedly call. + */ + kbase_pm_cache_snoop_enable(kbdev); + + /* With the L2 enabled, we can now enable + * hardware counters. + */ + if (kbdev->pm.backend.gpu_clock_slow_down_wa) + backend->l2_state = + KBASE_L2_RESTORE_CLOCKS; + else + backend->l2_state = + KBASE_L2_ON_HWCNT_ENABLE; + + /* Now that the L2 is on, the shaders can start + * powering on if they're required. The obvious + * way to do this would be to call + * kbase_pm_shaders_update_state() here. + * However, that would make the two state + * machines mutually recursive, as the opposite + * would be needed for powering down. Instead, + * callers of this function should use the + * kbase_pm_update_state() wrapper, which will + * call the shader state machine immediately + * after the L2 (for power up), or + * automatically re-invoke the L2 state machine + * when the shaders power down. + */ + } + break; + + case KBASE_L2_RESTORE_CLOCKS: + /* We always assume only GPUs being affected by + * BASE_HW_ISSUE_GPU2017_1336 fall into this state + */ + WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); + + /* If L2 not needed, we need to make sure cancellation + * of any previously issued work to restore GPU clock. + * For it, move to KBASE_L2_SLOW_DOWN_CLOCKS state. + */ + if (!kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS; + break; + } + + backend->gpu_clock_slow_down_desired = false; + if (backend->gpu_clock_slowed_down) + kbase_pm_control_gpu_clock(kbdev); + else + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + break; + + case KBASE_L2_ON_HWCNT_ENABLE: + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + backend->l2_state = KBASE_L2_ON; + break; + + case KBASE_L2_ON: + if (!kbase_pm_is_l2_desired(kbdev)) { + /* Do not power off L2 until the shaders and + * core stacks are off. + */ + if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + break; + + /* We need to make sure hardware counters are + * disabled before powering down the L2, to + * prevent loss of data. + * + * We waited until after the cores were powered + * down to prevent ping-ponging between hwcnt + * enabled and disabled, which would have + * happened if userspace submitted more work + * while we were trying to power down. + */ + backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE; + } + break; + + case KBASE_L2_ON_HWCNT_DISABLE: + /* If the L2 became desired while we were waiting on the + * worker to do the actual hwcnt disable (which might + * happen if some work was submitted immediately after + * the shaders powered off), then we need to early-out + * of this state and re-enable hwcnt. + * + * If we get lucky, the hwcnt disable might not have + * actually started yet, and the logic in the hwcnt + * enable state will prevent the worker from + * performing the disable entirely, preventing loss of + * any hardware counter data. + * + * If the hwcnt disable has started, then we'll lose + * a tiny amount of hardware counter data between the + * disable and the re-enable occurring. + * + * This loss of data is preferable to the alternative, + * which is to block the shader cores from doing any + * work until we're sure hwcnt has been re-enabled. + */ + if (kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE; + break; + } + + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + kbase_pm_trigger_hwcnt_disable(kbdev); + } + + if (backend->hwcnt_disabled) { + if (kbdev->pm.backend.gpu_clock_slow_down_wa) + backend->l2_state = + KBASE_L2_SLOW_DOWN_CLOCKS; + else + backend->l2_state = KBASE_L2_POWER_DOWN; + } + break; + + case KBASE_L2_SLOW_DOWN_CLOCKS: + /* We always assume only GPUs being affected by + * BASE_HW_ISSUE_GPU2017_1336 fall into this state + */ + WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa); + + /* L2 needs to be powered up. And we need to make sure + * cancellation of any previously issued work to slow + * down GPU clock. For it, we move to the state, + * KBASE_L2_RESTORE_CLOCKS. + */ + if (kbase_pm_is_l2_desired(kbdev)) { + backend->l2_state = KBASE_L2_RESTORE_CLOCKS; + break; + } + + backend->gpu_clock_slow_down_desired = true; + if (!backend->gpu_clock_slowed_down) + kbase_pm_control_gpu_clock(kbdev); + else + backend->l2_state = KBASE_L2_POWER_DOWN; + + break; + + case KBASE_L2_POWER_DOWN: + if (!backend->l2_always_on) + /* Powering off the L2 will also power off the + * tiler. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present, + ACTION_PWROFF); + else + /* If L2 cache is powered then we must flush it + * before we power off the GPU. Normally this + * would have been handled when the L2 was + * powered off. + */ + kbase_gpu_start_cache_clean_nolock( + kbdev); + + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); + + backend->l2_state = KBASE_L2_PEND_OFF; + break; + + case KBASE_L2_PEND_OFF: + if (!backend->l2_always_on) { + /* We only need to check the L2 here - if the L2 + * is off then the tiler is definitely also off. + */ + if (!l2_trans && !l2_ready) + /* L2 is now powered off */ + backend->l2_state = KBASE_L2_OFF; + } else { + if (!kbdev->cache_clean_in_progress) + backend->l2_state = KBASE_L2_OFF; + } + break; + + case KBASE_L2_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->l2_state = KBASE_L2_OFF; + break; + + default: + WARN(1, "Invalid state in l2_state: %d", + backend->l2_state); + } + + if (backend->l2_state != prev_state) + dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n", + kbase_l2_core_state_to_string(prev_state), + kbase_l2_core_state_to_string( + backend->l2_state)); + + } while (backend->l2_state != prev_state); + + if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off && + backend->l2_state == KBASE_L2_OFF) { + kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false; + queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq, + &kbdev->pm.backend.gpu_poweroff_wait_work); + } + + return 0; +} + +static void shader_poweroff_timer_stop_callback(struct work_struct *data) +{ + unsigned long flags; + struct kbasep_pm_tick_timer_state *stt = container_of(data, + struct kbasep_pm_tick_timer_state, work); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); + + hrtimer_cancel(&stt->timer); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt->cancel_queued = false; + if (kbdev->pm.backend.gpu_powered) + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** + * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer + * @kbdev: pointer to kbase device + * + * Synchronization between the shader state machine and the timer thread is + * difficult. This is because situations may arise where the state machine + * wants to start the timer, but the callback is already running, and has + * already passed the point at which it checks whether it is required, and so + * cancels itself, even though the state machine may have just tried to call + * hrtimer_start. + * + * This cannot be stopped by holding hwaccess_lock in the timer thread, + * because there are still infinitesimally small sections at the start and end + * of the callback where the lock is not held. + * + * Instead, a new state is added to the shader state machine, + * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee + * that when the shaders are switched off, the timer has definitely been + * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the + * timer is started, it is guaranteed that either the timer is already running + * (from an availability change or cancelled timer), or hrtimer_start will + * succeed. It is critical to avoid ending up in + * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could + * hang there forever. + */ +static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) +{ + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + stt->needed = false; + + if (hrtimer_active(&stt->timer) && !stt->cancel_queued) { + stt->cancel_queued = true; + queue_work(stt->wq, &stt->work); + } +} + +static const char *kbase_shader_core_state_to_string( + enum kbase_shader_core_state state) +{ + const char *const strings[] = { +#define KBASEP_SHADER_STATE(n) #n, +#include "mali_kbase_pm_shader_states.h" +#undef KBASEP_SHADER_STATE + }; + if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) + return "Bad shader core state"; + else + return strings[state]; +} + +static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + struct kbasep_pm_tick_timer_state *stt = + &kbdev->pm.backend.shader_tick_timer; + enum kbase_shader_core_state prev_state; + u64 stacks_avail = 0; + int err = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (corestack_driver_control) + /* Always power on all the corestacks. Disabling certain + * corestacks when their respective shaders are not in the + * available bitmap is not currently supported. + */ + stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK); + + do { + u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 stacks_trans = 0; + u64 stacks_ready = 0; + + if (corestack_driver_control) { + stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK); + stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK); + } + + /* + * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores + * are vulnerable to corruption if gpu is lost + */ + if (kbase_is_gpu_lost(kbdev)) { + err = -EIO; + break; + } + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + shaders_trans &= ~shaders_ready; + stacks_trans &= ~stacks_ready; + + prev_state = backend->shaders_state; + + switch (backend->shaders_state) { + case KBASE_SHADERS_OFF_CORESTACK_OFF: + /* Ignore changes to the shader core availability + * except at certain points where we can handle it, + * i.e. off and SHADERS_ON_CORESTACK_ON. + */ + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + backend->pm_shaders_core_mask = 0; + + if (backend->shaders_desired && + backend->l2_state == KBASE_L2_ON) { + if (backend->hwcnt_desired && + !backend->hwcnt_disabled) { + /* Trigger a hwcounter dump */ + backend->hwcnt_desired = false; + kbase_pm_trigger_hwcnt_disable(kbdev); + } + + if (backend->hwcnt_disabled) { + if (corestack_driver_control) { + kbase_pm_invoke(kbdev, + KBASE_PM_CORE_STACK, + stacks_avail, + ACTION_PWRON); + } + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_PEND_ON; + } + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_PEND_ON: + if (!stacks_trans && stacks_ready == stacks_avail) { + backend->shaders_avail = + backend->shaders_desired_mask; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail, ACTION_PWRON); + + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_PEND_ON_CORESTACK_ON: + if (!shaders_trans && shaders_ready == backend->shaders_avail) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, shaders_ready); + backend->pm_shaders_core_mask = shaders_ready; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON: + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + + /* If shaders to change state, trigger a counter dump */ + if (!backend->shaders_desired || + (backend->shaders_desired_mask != shaders_ready)) { + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) + kbase_pm_trigger_hwcnt_disable(kbdev); + backend->shaders_state = + KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + } + break; + + case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + + if (!backend->hwcnt_disabled) { + /* Wait for being disabled */ + ; + } else if (!backend->shaders_desired) { + if (kbdev->pm.backend.protected_transition_override || +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev) || +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + !stt->configured_ticks || + WARN_ON(stt->cancel_queued)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } else { + stt->remaining_ticks = stt->configured_ticks; + stt->needed = true; + + /* The shader hysteresis timer is not + * done the obvious way, which would be + * to start an hrtimer when the shader + * power off is requested. Instead, + * use a 'tick' timer, and set the + * remaining number of ticks on a power + * off request. This avoids the + * latency of starting, then + * immediately cancelling an hrtimer + * when the shaders are re-requested + * before the timeout expires. + */ + if (!hrtimer_active(&stt->timer)) + hrtimer_start(&stt->timer, + stt->configured_interval, + HRTIMER_MODE_REL); + + backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON; + } + } else if (backend->shaders_desired_mask & ~shaders_ready) { + /* set cores ready but not available to + * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON + * check pass + */ + backend->shaders_avail = + (backend->shaders_desired_mask | shaders_ready); + + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail & ~shaders_ready, + ACTION_PWRON); + backend->shaders_state = + KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } else if (shaders_ready & ~backend->shaders_desired_mask) { + backend->shaders_state = + KBASE_SHADERS_WAIT_GPU_IDLE; + } else { + backend->shaders_state = + KBASE_SHADERS_PEND_ON_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: + if (WARN_ON(!hrtimer_active(&stt->timer))) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } + + if (backend->shaders_desired) { + stt->remaining_ticks = 0; + backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; + } else if (stt->remaining_ticks == 0) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + } else if (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev)) { + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + } + break; + + case KBASE_SHADERS_WAIT_GPU_IDLE: + /* If partial shader core off need to wait the job in + * running and next register finished then flush L2 + * or it might hit GPU2017-861 + */ + if (!kbase_gpu_atoms_submitted_any(kbdev)) { + backend->partial_shaderoff = true; + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: + shader_poweroff_timer_queue_cancel(kbdev); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) { + kbase_gpu_start_cache_clean_nolock(kbdev); + backend->shaders_state = + KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON; + } else { + backend->shaders_state = + KBASE_SHADERS_READY_OFF_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: + if (!kbdev->cache_clean_in_progress) + backend->shaders_state = + KBASE_SHADERS_READY_OFF_CORESTACK_ON; + + break; + + case KBASE_SHADERS_READY_OFF_CORESTACK_ON: + if (backend->partial_shaderoff) { + backend->partial_shaderoff = false; + /* remove cores available but not ready to + * meet KBASE_SHADERS_PEND_ON_CORESTACK_ON + * check pass + */ + + /* shaders_desired_mask shall be a subset of + * shaders_ready + */ + WARN_ON(backend->shaders_desired_mask & ~shaders_ready); + WARN_ON(!(backend->shaders_desired_mask & shaders_ready)); + + backend->shaders_avail = + backend->shaders_desired_mask; + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready & ~backend->shaders_avail, ACTION_PWROFF); + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, (shaders_ready & ~backend->shaders_avail)); + } else { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready, ACTION_PWROFF); + + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); + + backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON; + } + break; + + case KBASE_SHADERS_PEND_OFF_CORESTACK_ON: + if (!shaders_trans && !shaders_ready) { + if (corestack_driver_control) + kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK, + stacks_avail, ACTION_PWROFF); + + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF; + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: + if (!stacks_trans && !stacks_ready) { + /* On powered off, re-enable the hwcnt */ + backend->pm_shaders_core_mask = 0; + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + backend->hwcnt_disabled = false; + } + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + } + break; + + case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: + if (!hrtimer_active(&stt->timer) && !stt->cancel_queued) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; + break; + + case KBASE_SHADERS_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; + break; + } + + if (backend->shaders_state != prev_state) + dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n", + kbase_shader_core_state_to_string(prev_state), + kbase_shader_core_state_to_string( + backend->shaders_state)); + + } while (backend->shaders_state != prev_state); + + return err; +} + +static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) +{ + bool in_desired_state = true; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_ON) + in_desired_state = false; + else if (!kbase_pm_is_l2_desired(kbdev) && + kbdev->pm.backend.l2_state != KBASE_L2_OFF) + in_desired_state = false; + + if (kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) + in_desired_state = false; + else if (!kbdev->pm.backend.shaders_desired && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + in_desired_state = false; + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev) +{ + bool in_desired_state; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static bool kbase_pm_is_in_desired_state_with_l2_powered( + struct kbase_device *kbdev) +{ + bool in_desired_state = false; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_pm_is_in_desired_state_nolock(kbdev) && + (kbdev->pm.backend.l2_state == KBASE_L2_ON)) + in_desired_state = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return in_desired_state; +} + +static void kbase_pm_trace_power_state(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_L2)); + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_SHADER)); + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_TILER)); + + if (corestack_driver_control) + KBASE_TLSTREAM_AUX_PM_STATE( + kbdev, + KBASE_PM_CORE_STACK, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_STACK)); +} + +void kbase_pm_update_state(struct kbase_device *kbdev) +{ + enum kbase_shader_core_state prev_shaders_state = + kbdev->pm.backend.shaders_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbdev->pm.backend.gpu_powered) + return; /* Do nothing if the GPU is off */ + + if (kbase_pm_l2_update_state(kbdev)) + return; + + if (kbase_pm_shaders_update_state(kbdev)) + return; + + /* If the shaders just turned off, re-invoke the L2 state machine, in + * case it was waiting for the shaders to turn off before powering down + * the L2. + */ + if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF && + kbdev->pm.backend.shaders_state == + KBASE_SHADERS_OFF_CORESTACK_OFF) { + if (kbase_pm_l2_update_state(kbdev)) + return; + } + + if (kbase_pm_is_in_desired_state_nolock(kbdev)) { + KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); + + kbase_pm_trace_power_state(kbdev); + + KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + } +} + +static enum hrtimer_restart +shader_tick_timer_callback(struct hrtimer *timer) +{ + struct kbasep_pm_tick_timer_state *stt = container_of(timer, + struct kbasep_pm_tick_timer_state, timer); + struct kbase_device *kbdev = container_of(stt, struct kbase_device, + pm.backend.shader_tick_timer); + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + enum hrtimer_restart restart = HRTIMER_NORESTART; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (stt->remaining_ticks && + backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) { + stt->remaining_ticks--; + + /* If the remaining ticks just changed from 1 to 0, invoke the + * PM state machine to power off the shader cores. + */ + if (!stt->remaining_ticks && !backend->shaders_desired) + kbase_pm_update_state(kbdev); + } + + if (stt->needed) { + hrtimer_forward_now(timer, stt->configured_interval); + restart = HRTIMER_RESTART; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return restart; +} + +int kbase_pm_state_machine_init(struct kbase_device *kbdev) +{ + struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer; + + stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!stt->wq) + return -ENOMEM; + + INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback); + + stt->needed = false; + hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stt->timer.function = shader_tick_timer_callback; + stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); + stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + + return 0; +} + +void kbase_pm_state_machine_term(struct kbase_device *kbdev) +{ + hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer); + destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq); +} + +void kbase_pm_reset_start_locked(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + backend->in_reset = true; + backend->l2_state = KBASE_L2_RESET_WAIT; + backend->shaders_state = KBASE_SHADERS_RESET_WAIT; + + /* We're in a reset, so hwcnt will have been synchronously disabled by + * this function's caller as part of the reset process. We therefore + * know that any call to kbase_hwcnt_context_disable_atomic, if + * required to sync the hwcnt refcount with our internal state, is + * guaranteed to succeed. + */ + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) { + WARN_ON(!kbase_hwcnt_context_disable_atomic( + kbdev->hwcnt_gpu_ctx)); + backend->hwcnt_disabled = true; + } + + shader_poweroff_timer_queue_cancel(kbdev); +} + +void kbase_pm_reset_complete(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + unsigned long flags; + + WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* As GPU has just been reset, that results in implicit flush of L2 + * cache, can safely mark the pending cache flush operation (if there + * was any) as complete and unblock the waiter. + * No work can be submitted whilst GPU reset is ongoing. + */ + kbase_gpu_cache_clean_wait_complete(kbdev); + backend->in_reset = false; + kbase_pm_update_state(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has + * aborted due to a fatal signal. If the time spent waiting has exceeded this + * threshold then there is most likely a hardware issue. */ +#define PM_TIMEOUT (5*HZ) /* 5s */ + +static void kbase_pm_timed_out(struct kbase_device *kbdev) +{ + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); + dev_err(kbdev->dev, "Desired state :\n"); + dev_err(kbdev->dev, "\tShader=%016llx\n", + kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); + dev_err(kbdev->dev, "Current state :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_READY_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_READY_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_HI)), + kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_READY_LO))); + dev_err(kbdev->dev, "Cores transitioning :\n"); + dev_err(kbdev->dev, "\tShader=%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + SHADER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + TILER_PWRTRANS_LO))); + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_HI)), + kbase_reg_read(kbdev, GPU_CONTROL_REG( + L2_PWRTRANS_LO))); + + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); +} + +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + int err; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; + + /* Wait for cores */ + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); +} + +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long timeout; + int err; + + /* Let the state machine latch the most recent desired state. */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + timeout = jiffies + PM_TIMEOUT; + + /* Wait for cores */ + err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev)); + + if (err < 0 && time_after(jiffies, timeout)) + kbase_pm_timed_out(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); + +void kbase_pm_enable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Clear all interrupts, + * and unmask them all. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); +} + +KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); + +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Mask all interrupts, + * and clear them all. + */ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); +} + +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_disable_interrupts_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + +/* + * pmu layout: + * 0x0000: PMU TAG (RO) (0xCAFECAFE) + * 0x0004: PMU VERSION ID (RO) (0x00000000) + * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) +{ + bool reset_required = is_resume; + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.backend.gpu_powered) { + /* Already turned on */ + if (kbdev->poweroff_pending) + kbase_pm_enable_interrupts(kbdev); + kbdev->poweroff_pending = false; + KBASE_DEBUG_ASSERT(!is_resume); + return; + } + + kbdev->poweroff_pending = false; + + KBASE_KTRACE_ADD(kbdev, PM_GPU_ON, NULL, 0u); + + if (is_resume && kbdev->pm.backend.callback_power_resume) { + kbdev->pm.backend.callback_power_resume(kbdev); + return; + } else if (kbdev->pm.backend.callback_power_on) { + reset_required = kbdev->pm.backend.callback_power_on(kbdev); + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (reset_required) { + /* GPU state was lost, reset GPU to ensure it is in a + * consistent state */ + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); + } + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + if (kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_dummy_job_wa_execute(kbdev, + kbase_pm_get_present_cores(kbdev, + KBASE_PM_CORE_SHADER)); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* Enable the interrupts */ + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the L2 caches */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.l2_desired = true; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_on); + +bool kbase_pm_clock_off(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* ASSERT that the cores should now be unavailable. No lock needed. */ + WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF); + + kbdev->poweroff_pending = true; + + if (!kbdev->pm.backend.gpu_powered) { + /* Already turned off */ + return true; + } + + KBASE_KTRACE_ADD(kbdev, PM_GPU_OFF, NULL, 0u); + + /* Disable interrupts. This also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure that any IRQ handlers have finished */ + kbase_synchronize_irqs(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (atomic_read(&kbdev->faults_pending)) { + /* Page/bus faults are still being processed. The GPU can not + * be powered off until they have completed */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return false; + } + + kbase_pm_cache_snoop_disable(kbdev); + + /* The GPU power may be turned off from this point */ + kbdev->pm.backend.gpu_powered = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + if (kbdev->pm.backend.callback_power_off) + kbdev->pm.backend.callback_power_off(kbdev); + return true; +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_off); + +struct kbasep_reset_timeout_data { + struct hrtimer timer; + bool timed_out; + struct kbase_device *kbdev; +}; + +void kbase_pm_reset_done(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.backend.reset_done = true; + wake_up(&kbdev->pm.backend.reset_done_wait); +} + +/** + * kbase_pm_wait_for_reset - Wait for a reset to happen + * + * Wait for the %RESET_COMPLETED IRQ to occur, then reset the waiting state. + * + * @kbdev: Kbase device + */ +static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + wait_event(kbdev->pm.backend.reset_done_wait, + (kbdev->pm.backend.reset_done)); + kbdev->pm.backend.reset_done = false; +} + +KBASE_EXPORT_TEST_API(kbase_pm_reset_done); + +static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) +{ + struct kbasep_reset_timeout_data *rtdata = + container_of(timer, struct kbasep_reset_timeout_data, timer); + + rtdata->timed_out = 1; + + /* Set the wait queue to wake up kbase_pm_init_hw even though the reset + * hasn't completed */ + kbase_pm_reset_done(rtdata->kbdev); + + return HRTIMER_NORESTART; +} + +static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) +{ + u32 hw_quirks_jm = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JM_CONFIG)); + + if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { + /* Only for tMIx */ + u32 coherency_features; + + coherency_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(COHERENCY_FEATURES)); + + /* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly + * documented for tMIx so force correct value here. + */ + if (coherency_features == + COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { + hw_quirks_jm |= (COHERENCY_ACE_LITE | + COHERENCY_ACE) << + JM_FORCE_COHERENCY_FEATURES_SHIFT; + } + } + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + kbdev->hw_quirks_jm = hw_quirks_jm; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { + int default_idvs_group_size = 0xF; + u32 tmp; + + if (of_property_read_u32(kbdev->dev->of_node, + "idvs-group-size", &tmp)) + tmp = default_idvs_group_size; + + if (tmp > IDVS_GROUP_MAX_SIZE) { + dev_err(kbdev->dev, + "idvs-group-size of %d is too large. Maximum value is %d", + tmp, IDVS_GROUP_MAX_SIZE); + tmp = default_idvs_group_size; + } + + kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; + } + +#define MANUAL_POWER_CONTROL ((u32)(1 << 8)) + if (corestack_driver_control) + kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; + + return 0; +} + +static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) +{ + u32 hw_quirks_sc = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ + hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; + else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ + hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162)) + hw_quirks_sc |= SC_VAR_ALGORITHM; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING)) + hw_quirks_sc |= SC_TLS_HASH_ENABLE; + + kbdev->hw_quirks_sc = hw_quirks_sc; + + return 0; +} + +static int kbase_set_tiler_quirks(struct kbase_device *kbdev) +{ + u32 hw_quirks_tiler = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_CONFIG)); + + if (kbase_is_gpu_lost(kbdev)) + return -EIO; + + /* Set tiler clock gate override if required */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) + hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + + kbdev->hw_quirks_tiler = hw_quirks_tiler; + + return 0; +} + +static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) +{ + struct device_node *np = kbdev->dev->of_node; + const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + int error = 0; + + kbdev->hw_quirks_jm = 0; + kbdev->hw_quirks_sc = 0; + kbdev->hw_quirks_tiler = 0; + kbdev->hw_quirks_mmu = 0; + + if (!of_property_read_u32(np, "quirks_jm", + &kbdev->hw_quirks_jm)) { + dev_info(kbdev->dev, + "Found quirks_jm = [0x%x] in Devicetree\n", + kbdev->hw_quirks_jm); + } else { + error = kbase_set_jm_quirks(kbdev, prod_id); + if (error) + return error; + } + + if (!of_property_read_u32(np, "quirks_sc", + &kbdev->hw_quirks_sc)) { + dev_info(kbdev->dev, + "Found quirks_sc = [0x%x] in Devicetree\n", + kbdev->hw_quirks_sc); + } else { + error = kbase_set_sc_quirks(kbdev, prod_id); + if (error) + return error; + } + + if (!of_property_read_u32(np, "quirks_tiler", + &kbdev->hw_quirks_tiler)) { + dev_info(kbdev->dev, + "Found quirks_tiler = [0x%x] in Devicetree\n", + kbdev->hw_quirks_tiler); + } else { + error = kbase_set_tiler_quirks(kbdev); + if (error) + return error; + } + + if (!of_property_read_u32(np, "quirks_mmu", + &kbdev->hw_quirks_mmu)) { + dev_info(kbdev->dev, + "Found quirks_mmu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_mmu); + } else { + error = kbase_set_mmu_quirks(kbdev); + } + + return error; +} + +static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), + kbdev->hw_quirks_sc); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), + kbdev->hw_quirks_tiler); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), + kbdev->hw_quirks_mmu); + kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), + kbdev->hw_quirks_jm); +} + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) +{ + if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && + !kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_enable_smc != 0) + kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); + kbdev->cci_snoop_enabled = true; + } +} + +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +{ + if (kbdev->cci_snoop_enabled) { +#ifdef CONFIG_ARM64 + if (kbdev->snoop_disable_smc != 0) { + mali_cci_flush_l2(kbdev); + kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); + } +#endif /* CONFIG_ARM64 */ + dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); + kbdev->cci_snoop_enabled = false; + } +} + +static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->protected_mode_hwcnt_desired = true; + if (kbdev->protected_mode_hwcnt_disabled) { + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbdev->protected_mode_hwcnt_disabled = false; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +} + +static int kbase_pm_do_reset(struct kbase_device *kbdev) +{ + struct kbasep_reset_timeout_data rtdata; + int ret; + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, 0); + + KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev); + + if (kbdev->pm.backend.callback_soft_reset) { + ret = kbdev->pm.backend.callback_soft_reset(kbdev); + if (ret < 0) + return ret; + else if (ret > 0) + return 0; + } else { + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SOFT_RESET); + } + + /* Unmask the reset complete interrupt only */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED); + + /* Initialize a structure for tracking the status of the reset */ + rtdata.kbdev = kbdev; + rtdata.timed_out = 0; + + /* Create a timer to use as a timeout on the reset */ + hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rtdata.timer.function = kbasep_reset_timeout; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } + + /* No interrupt has been received - check if the RAWSTAT register says + * the reset has completed */ + if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & + RESET_COMPLETED) + || kbase_is_gpu_lost(kbdev)) { + /* The interrupt is set in the RAWSTAT; this suggests that the + * interrupts are not getting to the CPU */ + dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + /* If interrupts aren't working we can't continue. */ + destroy_hrtimer_on_stack(&rtdata.timer); + return -EINVAL; + } + + /* The GPU doesn't seem to be responding to the reset so try a hard + * reset */ + dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET); + + /* Restart the timer to wait for the hard reset to complete */ + rtdata.timed_out = 0; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } + + destroy_hrtimer_on_stack(&rtdata.timer); + + dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", + RESET_TIMEOUT); + + return -EINVAL; +} + +int kbase_pm_protected_mode_enable(struct kbase_device *const kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SET_PROTECTED_MODE); + return 0; +} + +int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + return kbase_pm_do_reset(kbdev); +} + +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +{ + unsigned long irq_flags; + int err; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + kbdev->pm.backend.gpu_powered = true; + } + + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure cache snoops are disabled before reset. */ + kbase_pm_cache_snoop_disable(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, 0u); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + /* Soft reset the GPU */ + err = kbdev->protected_ops->protected_mode_disable( + kbdev->protected_dev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->protected_mode = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + + if (err) + goto exit; + + if (flags & PM_HW_ISSUES_DETECT) { + err = kbase_pm_hw_issues_detect(kbdev); + if (err) + goto exit; + } + + kbase_pm_hw_issues_apply(kbdev); + kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); + + /* Sanity check protected mode was left after reset */ + WARN_ON(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_PROTECTED_MODE_ACTIVE); + + /* If cycle counter was in use re-enable it, enable_irqs will only be + * false when called from kbase_pm_powerup */ + if (kbdev->pm.backend.gpu_cycle_counter_requests && + (flags & PM_ENABLE_IRQS)) { + kbase_pm_enable_interrupts(kbdev); + + /* Re-enable the counters if we need to */ + spin_lock_irqsave( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + if (kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + kbase_pm_disable_interrupts(kbdev); + } + + if (flags & PM_ENABLE_IRQS) + kbase_pm_enable_interrupts(kbdev); + +exit: + if (!kbdev->pm.backend.protected_entry_transition_override) { + /* Re-enable GPU hardware counters if we're resetting from + * protected mode. + */ + reenable_protected_mode_hwcnt(kbdev); + } + + return err; +} + +/** + * kbase_pm_request_gpu_cycle_counter_do_request - Request cycle counters + * + * Increase the count of cycle counter users and turn the cycle counters on if + * they were previously off + * + * This function is designed to be called by + * kbase_pm_request_gpu_cycle_counter() or + * kbase_pm_request_gpu_cycle_counter_l2_is_on() only + * + * When this function is called the l2 cache must be on - i.e., the GPU must be + * on. + * + * @kbdev: The kbase device structure of the device + */ +static void +kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + ++kbdev->pm.backend.gpu_cycle_counter_requests; + + if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +} + +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); + +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); + +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); + + --kbdev->pm.backend.gpu_cycle_counter_requests; + + if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +} + +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h new file mode 100644 index 0000000..95f10e0 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -0,0 +1,710 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Power management API definitions used internally by GPU backend + */ + +#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ +#define _KBASE_BACKEND_PM_INTERNAL_H_ + +#include + +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_pm_policy.h" + + +/** + * kbase_pm_dev_idle - The GPU is idle. + * + * The OS may choose to turn off idle devices + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** + * kbase_pm_dev_activate - The GPU is active. + * + * The OS should avoid opportunistically turning off the GPU while it is active + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** + * kbase_pm_get_present_cores - Get details of the cores that are present in + * the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) present in the GPU device and also a count of + * the number of cores. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of cores present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_active_cores - Get details of the cores that are currently + * active in the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are actively processing work (i.e. + * turned on *and* busy). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of active cores + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_trans_cores - Get details of the cores that are currently + * transitioning between power states. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are currently transitioning between + * power states. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of transitioning cores + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_get_ready_cores - Get details of the cores that are currently + * powered and ready for jobs. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are powered and ready for jobs (they may + * or may not be currently executing jobs). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @type: The type of core (see the enum kbase_pm_core_type enumeration) + * + * Return: The bit mask of ready cores + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * kbase_pm_clock_on - Turn the clock for the device on, and enable device + * interrupts. + * + * This function can be used by a power policy to turn the clock for the GPU on. + * It should be modified during integration to perform the necessary actions to + * ensure that the GPU is fully powered and clocked. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if clock on due to resume after suspend, false otherwise + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); + +/** + * kbase_pm_clock_off - Disable device interrupts, and turn the clock for the + * device off. + * + * This function can be used by a power policy to turn the clock for the GPU + * off. It should be modified during integration to perform the necessary + * actions to turn the clock off (if this is possible in the integration). + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * + * Return: true if clock was turned off, or + * false if clock can not be turned off due to pending page/bus fault + * workers. Caller must flush MMU workqueues and retry + */ +bool kbase_pm_clock_off(struct kbase_device *kbdev); + +/** + * kbase_pm_enable_interrupts - Enable interrupts on the device. + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_disable_interrupts - Disable interrupts on the device. + * + * This prevents delivery of Power Management interrupts to the CPU so that + * kbase_pm_update_state() will not be called from the IRQ handler + * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called. + * + * Interrupts are also disabled after a call to kbase_pm_clock_off(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_disable_interrupts_nolock - Version of kbase_pm_disable_interrupts() + * that does not take the hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_init_hw - Initialize the hardware. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags specifying the type of PM init + * + * This function checks the GPU ID register to ensure that the GPU is supported + * by the driver and performs a reset on the device so that it is in a known + * state before the device is used. + * + * Return: 0 if the device is supported and successfully reset. + */ +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + +/** + * kbase_pm_reset_done - The GPU has been reset successfully. + * + * This function must be called by the GPU interrupt handler when the + * RESET_COMPLETED bit is set. It signals to the power management initialization + * code that the GPU has been successfully reset. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_reset_done(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_desired_state - Wait for the desired power state to be + * reached + * + * Wait for the L2 and shader power state machines to reach the states + * corresponding to the values of 'l2_desired' and 'shaders_desired'. + * + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. + * + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. + * + * NOTE: This may not wait until the correct state is reached if there is a + * power off in progress. To correctly wait for the desired state the caller + * must ensure that this is not the case by, for example, calling + * kbase_pm_wait_for_poweroff_complete() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on + * + * Wait for the L2 to be powered on, and for the L2 and shader state machines to + * stabilise by reaching the states corresponding to the values of 'l2_desired' + * and 'shaders_desired'. + * + * kbdev->pm.active_count must be non-zero when calling this function. + * + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); + +/** + * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state + * machines after changing shader core + * availability + * + * It can be called in any status, so need to check the l2 and shader core + * power status in this function or it will break shader/l2 state machine + * + * Caller must hold hwaccess_lock + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state() + * where the caller must hold + * kbase_device.hwaccess_lock + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_update_state - Update the L2 and shader power state machines + * @kbdev: Device pointer + */ +void kbase_pm_update_state(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_init - Initialize the state machines, primarily the + * shader poweroff timer + * @kbdev: Device pointer + */ +int kbase_pm_state_machine_init(struct kbase_device *kbdev); + +/** + * kbase_pm_state_machine_term - Clean up the PM state machines' data + * @kbdev: Device pointer + */ +void kbase_pm_state_machine_term(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores_state - Update the desired state of shader cores from + * the Power Policy, and begin any power + * transitions. + * + * This function will update the desired_xx_state members of + * struct kbase_pm_device_data by calling into the current Power Policy. It will + * then begin power transitions to make the hardware acheive the desired shader + * core state. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_update_cores_state(struct kbase_device *kbdev); + +/** + * kbasep_pm_metrics_init - Initialize the metrics gathering framework. + * + * This must be called before other metric gathering APIs are called. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success, error code on error + */ +int kbasep_pm_metrics_init(struct kbase_device *kbdev); + +/** + * kbasep_pm_metrics_term - Terminate the metrics gathering framework. + * + * This must be called when metric gathering is no longer required. It is an + * error to call any metrics gathering function (other than + * kbasep_pm_metrics_init()) after calling this function. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbasep_pm_metrics_term(struct kbase_device *kbdev); + +/** + * kbase_pm_report_vsync - Function to be called by the frame buffer driver to + * update the vsync metric. + * + * This function should be called by the frame buffer driver to update whether + * the system is hitting the vsync target or not. buffer_updated should be true + * if the vsync corresponded with a new frame being displayed, otherwise it + * should be false. This function does not need to be called every vsync, but + * only when the value of @buffer_updated differs from a previous call. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @buffer_updated: True if the buffer has been updated on this VSync, + * false otherwise + */ +void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + +/** + * kbase_pm_get_dvfs_action - Determine whether the DVFS system should change + * the clock speed of the GPU. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function should be called regularly by the DVFS system to check whether + * the clock speed of the GPU needs updating. + */ +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + +/** + * kbase_pm_request_gpu_cycle_counter - Mark that the GPU cycle counter is + * needed + * + * If the caller is the first caller then the GPU cycle counters will be enabled + * along with the l2 cache + * + * The GPU must be powered when calling this function (i.e. + * kbase_pm_context_active() must have been called). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * kbase_pm_request_gpu_cycle_counter_l2_is_on - Mark GPU cycle counter is + * needed (l2 cache already on) + * + * This is a version of the above function + * (kbase_pm_request_gpu_cycle_counter()) suitable for being called when the + * l2 cache is known to be on and assured to be on until the subsequent call of + * kbase_pm_release_gpu_cycle_counter() such as when a job is submitted. It does + * not sleep and can be called from atomic functions. + * + * The GPU must be powered when calling this function (i.e. + * kbase_pm_context_active() must have been called) and the l2 cache must be + * powered on. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); + +/** + * kbase_pm_release_gpu_cycle_counter - Mark that the GPU cycle counter is no + * longer in use + * + * If the caller is the last caller then the GPU cycle counters will be + * disabled. A request must have been made before a call to this. + * + * Caller must not hold the hwaccess_lock, as it will be taken in this function. + * If the caller is already holding this lock then + * kbase_pm_release_gpu_cycle_counter_nolock() must be used instead. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * kbase_pm_release_gpu_cycle_counter_nolock - Version of kbase_pm_release_gpu_cycle_counter() + * that does not take hwaccess_lock + * + * Caller must hold the hwaccess_lock. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); + +/** + * kbase_pm_wait_for_poweroff_complete - Wait for the poweroff workqueue to + * complete + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_wait_for_poweroff_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_runtime_init - Initialize runtime-pm for Mali GPU platform device + * + * Setup the power management callbacks and initialize/enable the runtime-pm + * for the Mali GPU platform device, using the callback function. This must be + * called before the kbase_pm_register_access_enable() function. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +int kbase_pm_runtime_init(struct kbase_device *kbdev); + +/** + * kbase_pm_runtime_term - Disable runtime-pm for Mali GPU platform device + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_runtime_term(struct kbase_device *kbdev); + +/** + * kbase_pm_register_access_enable - Enable access to GPU registers + * + * Enables access to the GPU registers before power management has powered up + * the GPU with kbase_pm_powerup(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn on power and/or clocks to the GPU. See + * kbase_pm_callback_conf. + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_register_access_disable - Disable early register access + * + * Disables access to the GPU registers enabled earlier by a call to + * kbase_pm_register_access_enable(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn off power and/or clocks to the GPU. See + * kbase_pm_callback_conf + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_register_access_disable(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline + * function */ + +/** + * kbase_pm_metrics_is_active - Check if the power management metrics + * collection is active. + * + * Note that this returns if the power management metrics collection was + * active at the time of calling, it is possible that after the call the metrics + * collection enable may have changed state. + * + * The caller must handle the consequence that the state may have changed. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * Return: true if metrics collection was active else false. + */ +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + +/** + * kbase_pm_do_poweron - Power on the GPU, and any cores that are requested. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + * @is_resume: true if power on due to resume after suspend, + * false otherwise + */ +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); + +/** + * kbase_pm_do_poweroff - Power off the GPU, and any cores that have been + * requested. + * + * @kbdev: The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_do_poweroff(struct kbase_device *kbdev); + +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + struct kbasep_pm_metrics *last, + struct kbasep_pm_metrics *diff); +#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/** + * kbase_platform_dvfs_event - Report utilisation to DVFS code + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * @util_gl_share: The current calculated gl share of utilisation. + * @util_cl_share: The current calculated cl share of utilisation per core + * group. + * Return: Returns 0 on failure and non zero on success. + */ + +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, + u32 util_gl_share, u32 util_cl_share[2]); +#endif + +void kbase_pm_power_changed(struct kbase_device *kbdev); + +/** + * kbase_pm_metrics_update - Inform the metrics system that an atom is either + * about to be run or has just completed. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @now: Pointer to the timestamp of the change, or NULL to use current time + * + * Caller must hold hwaccess_lock + */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, + ktime_t *now); + +/** + * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called after L2 power up. + */ + +void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU + * If the GPU does not have coherency this is a no-op + * @kbdev: Device pointer + * + * This function should be called before L2 power off. + */ +void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_DEVFREQ +/** + * kbase_devfreq_set_core_mask - Set devfreq core mask + * @kbdev: Device pointer + * @core_mask: New core mask + * + * This function is used by devfreq to change the available core mask as + * required by Dynamic Core Scaling. + */ +void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask); +#endif + +/** + * kbase_pm_reset_start_locked - Signal that GPU reset has started + * @kbdev: Device pointer + * + * Normal power management operation will be suspended until the reset has + * completed. + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_reset_start_locked(struct kbase_device *kbdev); + +/** + * kbase_pm_reset_complete - Signal that GPU reset has completed + * @kbdev: Device pointer + * + * Normal power management operation will be resumed. The power manager will + * re-evaluate what cores are needed and power on or off as required. + */ +void kbase_pm_reset_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_enable - Enable the protected mode override + * @kbdev: Device pointer + * + * When the protected mode override is enabled, all shader cores are requested + * to power down, and the L2 power state can be controlled by + * kbase_pm_protected_l2_override(). + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_override_disable - Disable the protected mode override + * @kbdev: Device pointer + * + * Caller must hold hwaccess_lock. + */ +void kbase_pm_protected_override_disable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_l2_override - Control the protected mode L2 override + * @kbdev: Device pointer + * @override: true to enable the override, false to disable + * + * When the driver is transitioning in or out of protected mode, the L2 cache is + * forced to power off. This can be overridden to force the L2 cache to power + * on. This is required to change coherency settings on some GPUs. + */ +void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override); + +/** + * kbase_pm_protected_entry_override_enable - Enable the protected mode entry + * override + * @kbdev: Device pointer + * + * Initiate a GPU reset and enable the protected mode entry override flag if + * l2_always_on WA is enabled and platform is fully coherent. If the GPU + * reset is already ongoing then protected mode entry override flag will not + * be enabled and function will have to be called again. + * + * When protected mode entry override flag is enabled to power down L2 via GPU + * reset, the GPU reset handling behavior gets changed. For example call to + * kbase_backend_reset() is skipped, Hw counters are not re-enabled and L2 + * isn't powered up again post reset. + * This is needed only as a workaround for a Hw issue where explicit power down + * of L2 causes a glitch. For entering protected mode on fully coherent + * platforms L2 needs to be powered down to switch to IO coherency mode, so to + * avoid the glitch GPU reset is used to power down L2. Hence, this function + * does nothing on systems where the glitch issue isn't present. + * + * Caller must hold hwaccess_lock. Should be only called during the transition + * to enter protected mode. + * + * Return: -EAGAIN if a GPU reset was required for the glitch workaround but + * was already ongoing, otherwise 0. + */ +int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_protected_entry_override_disable - Disable the protected mode entry + * override + * @kbdev: Device pointer + * + * This shall be called once L2 has powered down and switch to IO coherency + * mode has been made. As with kbase_pm_protected_entry_override_enable(), + * this function does nothing on systems where the glitch issue isn't present. + * + * Caller must hold hwaccess_lock. Should be only called during the transition + * to enter protected mode. + */ +void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev); + +/* If true, the driver should explicitly control corestack power management, + * instead of relying on the Power Domain Controller. + */ +extern bool corestack_driver_control; + +/** + * kbase_pm_is_l2_desired - Check whether l2 is desired + * + * @kbdev: Device pointer + * + * This shall be called to check whether l2 is needed to power on + * + * Return: true if l2 need to power on + */ +bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); + +/** + * kbase_pm_lock - Lock all necessary mutexes to perform PM actions + * + * @kbdev: Device pointer + * + * This function locks correct mutexes independent of GPU architecture. + */ +static inline void kbase_pm_lock(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->js_data.runpool_mutex); + mutex_lock(&kbdev->pm.lock); +} + +/** + * kbase_pm_unlock - Unlock mutexes locked by kbase_pm_lock + * + * @kbdev: Device pointer + */ +static inline void kbase_pm_unlock(struct kbase_device *kbdev) +{ + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&kbdev->js_data.runpool_mutex); +} + +#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h new file mode 100644 index 0000000..12cb051 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h @@ -0,0 +1,38 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific Power Manager level 2 cache state definitions. + * The function-like macro KBASEP_L2_STATE() must be defined before including + * this header file. This header file can be included multiple times in the + * same compilation unit with different definitions of KBASEP_L2_STATE(). + */ +KBASEP_L2_STATE(OFF) +KBASEP_L2_STATE(PEND_ON) +KBASEP_L2_STATE(RESTORE_CLOCKS) +KBASEP_L2_STATE(ON_HWCNT_ENABLE) +KBASEP_L2_STATE(ON) +KBASEP_L2_STATE(ON_HWCNT_DISABLE) +KBASEP_L2_STATE(SLOW_DOWN_CLOCKS) +KBASEP_L2_STATE(POWER_DOWN) +KBASEP_L2_STATE(PEND_OFF) +KBASEP_L2_STATE(RESET_WAIT) diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c new file mode 100644 index 0000000..de3babe --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -0,0 +1,318 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Metrics for power management + */ + +#include +#include +#include +#include +#include +#include + +/* When VSync is being hit aim for utilisation between 70-90% */ +#define KBASE_PM_VSYNC_MIN_UTILISATION 70 +#define KBASE_PM_VSYNC_MAX_UTILISATION 90 +/* Otherwise aim for 10-40% */ +#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 +#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 + +/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns + * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly + * under 11s. Exceeding this will cause overflow */ +#define KBASE_PM_TIME_SHIFT 8 + +#ifdef CONFIG_MALI_MIDGARD_DVFS +static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbasep_pm_metrics_state *metrics; + + KBASE_DEBUG_ASSERT(timer != NULL); + + metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); + kbase_pm_get_dvfs_action(metrics->kbdev); + + spin_lock_irqsave(&metrics->lock, flags); + + if (metrics->timer_active) + hrtimer_start(timer, + HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&metrics->lock, flags); + + return HRTIMER_NORESTART; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +int kbasep_pm_metrics_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.metrics.kbdev = kbdev; + + kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.gpu_active = false; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; + + kbdev->pm.backend.metrics.values.time_busy = 0; + kbdev->pm.backend.metrics.values.time_idle = 0; + kbdev->pm.backend.metrics.values.busy_cl[0] = 0; + kbdev->pm.backend.metrics.values.busy_cl[1] = 0; + kbdev->pm.backend.metrics.values.busy_gl = 0; + + spin_lock_init(&kbdev->pm.backend.metrics.lock); + +#ifdef CONFIG_MALI_MIDGARD_DVFS + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->pm.backend.metrics.timer.function = dvfs_callback; + + kbase_pm_metrics_start(kbdev); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + + return 0; +} +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + +void kbasep_pm_metrics_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_MIDGARD_DVFS + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); + +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ +static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, + ktime_t now) +{ + ktime_t diff; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + if (ktime_to_ns(diff) < 0) + return; + + if (kbdev->pm.backend.metrics.gpu_active) { + u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + + kbdev->pm.backend.metrics.values.time_busy += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[0]) + kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; + if (kbdev->pm.backend.metrics.active_cl_ctx[1]) + kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[0]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[1]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + if (kbdev->pm.backend.metrics.active_gl_ctx[2]) + kbdev->pm.backend.metrics.values.busy_gl += ns_time; + } else { + kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff) + >> KBASE_PM_TIME_SHIFT); + } + + kbdev->pm.backend.metrics.time_period_start = now; +} + +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, + struct kbasep_pm_metrics *last, + struct kbasep_pm_metrics *diff) +{ + struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); + + memset(diff, 0, sizeof(*diff)); + diff->time_busy = cur->time_busy - last->time_busy; + diff->time_idle = cur->time_idle - last->time_idle; + diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; + diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; + diff->busy_gl = cur->busy_gl - last->busy_gl; + + *last = *cur; + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); +#endif + +#ifdef CONFIG_MALI_MIDGARD_DVFS +void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +{ + int utilisation, util_gl_share; + int util_cl_share[2]; + int busy; + struct kbasep_pm_metrics *diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + diff = &kbdev->pm.backend.metrics.dvfs_diff; + + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); + + utilisation = (100 * diff->time_busy) / + max(diff->time_busy + diff->time_idle, 1u); + + busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); + util_gl_share = (100 * diff->busy_gl) / busy; + util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; + util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; + + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); +} + +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) +{ + bool isactive; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + isactive = kbdev->pm.backend.metrics.timer_active; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + return isactive; +} +KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + +void kbase_pm_metrics_start(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = true; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + hrtimer_start(&kbdev->pm.backend.metrics.timer, + HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); +} + +void kbase_pm_metrics_stop(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); +} + + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +/** + * kbase_pm_metrics_active_calc - Update PM active counts based on currently + * running atoms + * @kbdev: Device pointer + * + * The caller must hold kbdev->pm.backend.metrics.lock + */ +static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.gpu_active = false; + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + + /* Head atom may have just completed, so if it isn't running + * then try the next atom */ + if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) + katom = kbase_gpu_inspect(kbdev, js, 1); + + if (katom && katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + int device_nr = (katom->core_req & + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) + ? katom->device_nr : 0; + if (!WARN_ON(device_nr >= 2)) + kbdev->pm.backend.metrics. + active_cl_ctx[device_nr] = 1; + } else { + kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; + trace_sysgraph(SGR_ACTIVE, 0, js); + } + kbdev->pm.backend.metrics.gpu_active = true; + } else { + trace_sysgraph(SGR_INACTIVE, 0, js); + } + } +} + +/* called when job is submitted to or removed from a GPU slot */ +void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) +{ + unsigned long flags; + ktime_t now; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + if (!timestamp) { + now = ktime_get(); + timestamp = &now; + } + + /* Track how long CL and/or GL jobs have been busy for */ + kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); + + kbase_pm_metrics_active_calc(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c new file mode 100644 index 0000000..17ed21e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -0,0 +1,249 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Power policy API implementations + */ + +#include +#include +#include +#include + +static const struct kbase_pm_policy *const all_policy_list[] = { +#ifdef CONFIG_MALI_NO_MALI + &kbase_pm_always_on_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_always_on_demand_policy_ops, +#endif +#else /* CONFIG_MALI_NO_MALI */ + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_always_on_demand_policy_ops, +#endif + &kbase_pm_always_on_policy_ops +#endif /* CONFIG_MALI_NO_MALI */ +}; + +void kbase_pm_policy_init(struct kbase_device *kbdev) +{ + kbdev->pm.backend.pm_current_policy = all_policy_list[0]; + kbdev->pm.backend.pm_current_policy->init(kbdev); +} + +void kbase_pm_policy_term(struct kbase_device *kbdev) +{ + kbdev->pm.backend.pm_current_policy->term(kbdev); +} + +void kbase_pm_update_active(struct kbase_device *kbdev) +{ + struct kbase_pm_device_data *pm = &kbdev->pm; + struct kbase_pm_backend_data *backend = &pm->backend; + unsigned long flags; + bool active; + + lockdep_assert_held(&pm->lock); + + /* pm_current_policy will never be NULL while pm.lock is held */ + KBASE_DEBUG_ASSERT(backend->pm_current_policy); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + active = backend->pm_current_policy->get_core_active(kbdev); + WARN((kbase_pm_is_active(kbdev) && !active), + "GPU is active but policy '%s' is indicating that it can be powered off", + kbdev->pm.backend.pm_current_policy->name); + + if (active) { + /* Power on the GPU and any cores requested by the policy */ + if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off && + pm->backend.poweroff_wait_in_progress) { + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + pm->backend.poweron_required = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + /* Cancel the invocation of + * kbase_pm_gpu_poweroff_wait_wq() from the L2 state + * machine. This is safe - it + * invoke_poweroff_wait_wq_when_l2_off is true, then + * the poweroff work hasn't even been queued yet, + * meaning we can go straight to powering on. + */ + pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; + pm->backend.poweroff_wait_in_progress = false; + pm->backend.l2_desired = true; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_pm_do_poweron(kbdev, false); + } + } else { + /* It is an error for the power policy to power off the GPU + * when there are contexts active */ + KBASE_DEBUG_ASSERT(pm->active_count == 0); + + pm->backend.poweron_required = false; + + /* Request power off */ + if (pm->backend.gpu_powered) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Power off the GPU immediately */ + kbase_pm_do_poweroff(kbdev); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } +} + +void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) +{ + bool shaders_desired; + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.backend.pm_current_policy == NULL) + return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; + /* In protected transition, don't allow outside shader core request + * affect transition, return directly + */ + if (kbdev->pm.backend.protected_transition_override) + return; + + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); + + if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) { + kbase_pm_update_state(kbdev); + } +} + +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) +{ + bool shaders_desired; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->pm.backend.pm_current_policy == NULL) + return; + if (kbdev->pm.backend.poweroff_wait_in_progress) + return; + + if (kbdev->pm.backend.protected_transition_override) + /* We are trying to change in/out of protected mode - force all + * cores off so that the L2 powers down */ + shaders_desired = false; + else + shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); + + if (kbdev->pm.backend.shaders_desired != shaders_desired) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); + + kbdev->pm.backend.shaders_desired = shaders_desired; + kbase_pm_update_state(kbdev); + } +} + +void kbase_pm_update_cores_state(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +int kbase_pm_list_policies(struct kbase_device *kbdev, + const struct kbase_pm_policy * const **list) +{ + if (list) + *list = all_policy_list; + + return ARRAY_SIZE(all_policy_list); +} + +KBASE_EXPORT_TEST_API(kbase_pm_list_policies); + +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.backend.pm_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_policy); + +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *new_policy) +{ + const struct kbase_pm_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kbdev); + + kbase_pm_lock(kbdev); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + old_policy = kbdev->pm.backend.pm_current_policy; + kbdev->pm.backend.pm_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, old_policy->id); + if (old_policy->term) + old_policy->term(kbdev); + + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.pm_current_policy = new_policy; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was + * NULL), then re-try them here. */ + kbase_pm_update_active(kbdev); + kbase_pm_update_cores_state(kbdev); + + kbase_pm_unlock(kbdev); + + /* Now the policy change is finished, we release our fake context active + * reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_set_policy); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h new file mode 100644 index 0000000..f103ef0 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -0,0 +1,106 @@ +/* + * + * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Power policy API definitions + */ + +#ifndef _KBASE_PM_POLICY_H_ +#define _KBASE_PM_POLICY_H_ + +/** + * kbase_pm_policy_init - Initialize power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Must be called before calling any other policy function + */ +void kbase_pm_policy_init(struct kbase_device *kbdev); + +/** + * kbase_pm_policy_term - Terminate power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_policy_term(struct kbase_device *kbdev); + +/** + * kbase_pm_update_active - Update the active power state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_active(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores - Update the desired core state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_cores(struct kbase_device *kbdev); + +/** + * kbase_pm_cores_requested - Check that a power request has been locked into + * the HW. + * @kbdev: Kbase device + * @shader_required: true if shaders are required + * + * Called by the scheduler to check if a power on request has been locked into + * the HW. + * + * Note that there is no guarantee that the cores are actually ready, however + * when the request has been locked into the HW, then it is safe to submit work + * since the HW will wait for the transition to ready. + * + * A reference must first be taken prior to making this call. + * + * Caller must hold the hwaccess_lock. + * + * Return: true if the request to the HW was successfully made else false if the + * request is still pending. + */ +static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev, + bool shader_required) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* If the L2 & tiler are not on or pending, then the tiler is not yet + * available, and shaders are definitely not powered. + */ + if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON && + kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE) + return false; + + if (shader_required && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON && + kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK) + return false; + + return true; +} + +#endif /* _KBASE_PM_POLICY_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h new file mode 100644 index 0000000..2bd9e47 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Backend-specific Power Manager shader core state definitions. + * The function-like macro KBASEP_SHADER_STATE() must be defined before + * including this header file. This header file can be included multiple + * times in the same compilation unit with different definitions of + * KBASEP_SHADER_STATE(). + */ +KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) +KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) +KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON) +KBASEP_SHADER_STATE(ON_CORESTACK_ON) +KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK) +KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON) +KBASEP_SHADER_STATE(WAIT_GPU_IDLE) +KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON) +KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON) +KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON) +KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON) +KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF) +KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF) +KBASEP_SHADER_STATE(RESET_WAIT) diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c new file mode 100644 index 0000000..cb10518 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts) +{ + u32 hi1, hi2; + + kbase_pm_request_gpu_cycle_counter(kbdev); + + if (cycle_counter) { + /* Read hi, lo, hi to ensure a coherent u64 */ + do { + hi1 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_HI)); + *cycle_counter = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO)); + hi2 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_HI)); + } while (hi1 != hi2); + *cycle_counter |= (((u64) hi1) << 32); + } + + if (system_time) { + /* Read hi, lo, hi to ensure a coherent u64 */ + do { + hi1 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_HI)); + *system_time = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_LO)); + hi2 = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_HI)); + } while (hi1 != hi2); + *system_time |= (((u64) hi1) << 32); + } + + /* Record the CPU's idea of current time */ + if (ts != NULL) +#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) + *ts = ktime_to_timespec64(ktime_get_raw()); +#else + ktime_get_raw_ts64(ts); +#endif + + kbase_pm_release_gpu_cycle_counter(kbdev); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/build.bp b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/build.bp new file mode 100644 index 0000000..51aeecd --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/build.bp @@ -0,0 +1,183 @@ +/* + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +/* Kernel-side tests may include mali_kbase's headers. Therefore any config + * options which affect the sizes of any structs (e.g. adding extra members) + * must be included in these defaults, so that the structs are consistent in + * both mali_kbase and the test modules. */ +bob_defaults { + name: "mali_kbase_shared_config_defaults", + no_mali: { + kbuild_options: ["CONFIG_MALI_NO_MALI=y"], + }, + mali_real_hw: { + kbuild_options: ["CONFIG_MALI_REAL_HW=y"], + }, + mali_devfreq: { + kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], + }, + mali_midgard_dvfs: { + kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"], + }, + mali_debug: { + kbuild_options: ["CONFIG_MALI_DEBUG=y"], + }, + buslog: { + kbuild_options: ["CONFIG_MALI_BUSLOG=y"], + }, + cinstr_vector_dump: { + kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"], + }, + cinstr_gwt: { + kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], + }, + mali_gator_support: { + kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], + }, + mali_midgard_enable_trace: { + kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"], + }, + mali_system_trace: { + kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], + }, + mali_pwrsoft_765: { + kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], + }, + mali_memory_fully_backed: { + kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], + }, + mali_dma_buf_map_on_demand: { + kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], + }, + mali_dma_buf_legacy_compat: { + kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], + }, + mali_arbiter_support: { + kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], + }, + mali_gem5_build: { + kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], + }, + kbuild_options: [ + "MALI_UNIT_TEST={{.unit_test_code}}", + "MALI_CUSTOMER_RELEASE={{.release}}", + "MALI_USE_CSF={{.gpu_has_csf}}", + "MALI_KERNEL_TEST_API={{.debug}}", + ], + defaults: ["kernel_defaults"], +} + +bob_kernel_module { + name: "mali_kbase", + srcs: [ + "*.c", + "*.h", + "Kbuild", + "backend/gpu/*.c", + "backend/gpu/*.h", + "backend/gpu/Kbuild", + "context/*.c", + "context/*.h", + "ipa/*.c", + "ipa/*.h", + "ipa/Kbuild", + "platform/*.h", + "platform/*/*.c", + "platform/*/*.h", + "platform/*/Kbuild", + "thirdparty/*.c", + "debug/*.c", + "debug/*.h", + "device/*.c", + "device/*.h", + "gpu/*.c", + "gpu/*.h", + "tl/*.c", + "tl/*.h", + "mmu/*.c", + "mmu/*.h", + ], + kbuild_options: [ + "CONFIG_MALI_KUTF=n", + "CONFIG_MALI_MIDGARD=m", + "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", + "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", + ], + buslog: { + extra_symbols: [ + "bus_logger", + ], + }, + mali_corestack: { + kbuild_options: ["CONFIG_MALI_CORESTACK=y"], + }, + mali_error_inject: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], + }, + mali_error_inject_random: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], + }, + cinstr_secondary_hwc: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], + }, + cinstr_secondary_hwc_via_debug_fs: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"], + }, + mali_2mb_alloc: { + kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], + }, + mali_hw_errata_1485982_not_affected: { + kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], + }, + mali_hw_errata_1485982_use_clock_alternative: { + kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], + }, + gpu_has_job_manager: { + srcs: [ + "context/backend/*_jm.c", + "debug/backend/*_jm.c", + "debug/backend/*_jm.h", + "device/backend/*_jm.c", + "gpu/backend/*_jm.c", + "gpu/backend/*_jm.h", + "jm/*.h", + "tl/backend/*_jm.c", + "mmu/backend/*_jm.c", + ], + }, + gpu_has_csf: { + srcs: [ + "context/backend/*_csf.c", + "csf/*.c", + "csf/*.h", + "csf/Kbuild", + "debug/backend/*_csf.c", + "debug/backend/*_csf.h", + "device/backend/*_csf.c", + "gpu/backend/*_csf.c", + "gpu/backend/*_csf.h", + "tl/backend/*_csf.c", + "mmu/backend/*_csf.c", + ], + }, + mali_arbiter_support: { + srcs: [ + "arbiter/*.c", + "arbiter/*.h", + "arbiter/Kbuild", + ], + }, + defaults: ["mali_kbase_shared_config_defaults"], +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c new file mode 100644 index 0000000..2cd2551 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel context APIs for Job Manager GPUs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS +#include +#include + +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + kbase_debug_mem_view_init(kctx); + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); + kbase_jit_debugfs_init(kctx); + kbasep_jd_debugfs_ctx_init(kctx); + kbase_debug_job_fault_context_init(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + debugfs_remove_recursive(kctx->kctx_dentry); + kbase_debug_job_fault_context_term(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#else +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#endif /* CONFIG_DEBUG_FS */ + +static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) +{ + kbase_timer_setup(&kctx->soft_job_timeout, + kbasep_soft_job_timeout_worker); + + return 0; +} + +static int kbase_context_submit_check(struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + unsigned long irq_flags = 0; + + base_context_create_flags const flags = kctx->create_flags; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + + /* Translate the flags */ + if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return 0; +} + +static const struct kbase_context_init context_init[] = { + {kbase_context_common_init, kbase_context_common_term, NULL}, + {kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, + "Memory pool goup initialization failed"}, + {kbase_mem_evictable_init, kbase_mem_evictable_deinit, + "Memory evictable initialization failed"}, + {kbasep_js_kctx_init, kbasep_js_kctx_term, + "JS kctx initialization failed"}, + {kbase_jd_init, kbase_jd_exit, + "JD initialization failed"}, + {kbase_event_init, kbase_event_cleanup, + "Event initialization failed"}, + {kbase_dma_fence_init, kbase_dma_fence_term, + "DMA fence initialization failed"}, + {kbase_context_mmu_init, kbase_context_mmu_term, + "MMU initialization failed"}, + {kbase_context_mem_alloc_page, kbase_context_mem_pool_free, + "Memory alloc page failed"}, + {kbase_region_tracker_init, kbase_region_tracker_term, + "Region tracker initialization failed"}, + {kbase_sticky_resource_init, kbase_context_sticky_resource_term, + "Sticky resource initialization failed"}, + {kbase_jit_init, kbase_jit_term, + "JIT initialization failed"}, + {kbase_context_kbase_timer_setup, NULL, NULL}, + {kbase_context_submit_check, NULL, NULL}, +}; + +static void kbase_context_term_partial( + struct kbase_context *kctx, + unsigned int i) +{ + while (i-- > 0) { + if (context_init[i].term) + context_init[i].term(kctx); + } +} + +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, + bool is_compat, + base_context_create_flags const flags, + unsigned long const api_version, + struct file *const filp) +{ + struct kbase_context *kctx; + unsigned int i = 0; + + if (WARN_ON(!kbdev)) + return NULL; + + /* Validate flags */ + if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) + return NULL; + + /* zero-inited as lot of code assume it's zero'ed out on create */ + kctx = vzalloc(sizeof(*kctx)); + if (WARN_ON(!kctx)) + return NULL; + + kctx->kbdev = kbdev; + kctx->api_version = api_version; + kctx->filp = filp; + kctx->create_flags = flags; + + if (is_compat) + kbase_ctx_flag_set(kctx, KCTX_COMPAT); +#if defined(CONFIG_64BIT) + else + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +#endif /* !defined(CONFIG_64BIT) */ + + for (i = 0; i < ARRAY_SIZE(context_init); i++) { + int err = context_init[i].init(kctx); + + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + context_init[i].err_mes, err); + kbase_context_term_partial(kctx, i); + return NULL; + } + } + + return kctx; +} +KBASE_EXPORT_SYMBOL(kbase_create_context); + +void kbase_destroy_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return; + + kbdev = kctx->kbdev; + if (WARN_ON(!kbdev)) + return; + + /* Ensure the core is powered up for the destroy process + * A suspend won't happen here, because we're in a syscall + * from a userspace thread. + */ + kbase_pm_context_active(kbdev); + + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); + + kbase_jd_zap_context(kctx); + flush_workqueue(kctx->jctx.job_done_wq); + + kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); + + kbase_pm_context_idle(kbdev); +} +KBASE_EXPORT_SYMBOL(kbase_destroy_context); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c new file mode 100644 index 0000000..93fe431 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel context APIs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int kbase_context_common_init(struct kbase_context *kctx) +{ + const unsigned long cookies_mask = KBASE_COOKIE_MASK; + + /* creating a context is considered a disjoint event */ + kbase_disjoint_event(kctx->kbdev); + + kctx->as_nr = KBASEP_AS_NR_INVALID; + + atomic_set(&kctx->refcount, 0); + + spin_lock_init(&kctx->mm_update_lock); + kctx->process_mm = NULL; + atomic_set(&kctx->nonmapped_pages, 0); + atomic_set(&kctx->permanent_mapped_pages, 0); + kctx->tgid = current->tgid; + kctx->pid = current->pid; + + atomic_set(&kctx->used_pages, 0); + + mutex_init(&kctx->reg_lock); + + spin_lock_init(&kctx->mem_partials_lock); + INIT_LIST_HEAD(&kctx->mem_partials); + + spin_lock_init(&kctx->waiting_soft_jobs_lock); + INIT_LIST_HEAD(&kctx->waiting_soft_jobs); + + init_waitqueue_head(&kctx->event_queue); + atomic_set(&kctx->event_count, 0); + atomic_set(&kctx->event_closed, false); + + bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_set(&kctx->jctx.work_id, 0); +#endif + + kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; + + mutex_init(&kctx->legacy_hwcnt_lock); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); + + KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id, + kctx->kbdev->gpu_props.props.raw_props.gpu_id); + KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id, + (u32)(kctx->tgid)); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + return 0; +} + +void kbase_context_common_term(struct kbase_context *kctx) +{ + unsigned long flags; + int pages; + + mutex_lock(&kctx->kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + kbase_ctx_sched_remove_ctx(kctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->mmu_hw_mutex); + + pages = atomic_read(&kctx->used_pages); + if (pages != 0) + dev_warn(kctx->kbdev->dev, + "%s: %d pages in use!\n", __func__, pages); + + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + + KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id); + + KBASE_TLSTREAM_TL_DEL_CTX(kctx->kbdev, kctx); + list_del(&kctx->kctx_list_link); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); + + /* Flush the timeline stream, so the user can see the termination + * tracepoints being fired. + * The "if" statement below is for optimization. It is safe to call + * kbase_timeline_streams_flush when timeline is disabled. + */ + if (atomic_read(&kctx->kbdev->timeline_flags) != 0) + kbase_timeline_streams_flush(kctx->kbdev->timeline); + + vfree(kctx); +} + +int kbase_context_mem_pool_group_init(struct kbase_context *kctx) +{ + return kbase_mem_pool_group_init(&kctx->mem_pools, + kctx->kbdev, + &kctx->kbdev->mem_pool_defaults, + &kctx->kbdev->mem_pools); +} + +void kbase_context_mem_pool_group_term(struct kbase_context *kctx) +{ + kbase_mem_pool_group_term(&kctx->mem_pools); +} + +int kbase_context_mmu_init(struct kbase_context *kctx) +{ + kbase_mmu_init(kctx->kbdev, + &kctx->mmu, kctx, + base_context_mmu_group_id_get(kctx->create_flags)); + + return 0; +} + +void kbase_context_mmu_term(struct kbase_context *kctx) +{ + kbase_mmu_term(kctx->kbdev, &kctx->mmu); +} + +int kbase_context_mem_alloc_page(struct kbase_context *kctx) +{ + struct page *p; + + p = kbase_mem_alloc_page(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]); + if (!p) + return -ENOMEM; + + kctx->aliasing_sink_page = as_tagged(page_to_phys(p)); + + return 0; +} + +void kbase_context_mem_pool_free(struct kbase_context *kctx) +{ + /* drop the aliasing sink page now that it can't be mapped anymore */ + kbase_mem_pool_free( + &kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], + as_page(kctx->aliasing_sink_page), + false); +} + +void kbase_context_sticky_resource_term(struct kbase_context *kctx) +{ + unsigned long pending_regions_to_clean; + + kbase_gpu_vm_lock(kctx); + kbase_sticky_resource_term(kctx); + + /* free pending region setups */ + pending_regions_to_clean = KBASE_COOKIE_MASK; + bitmap_andnot(&pending_regions_to_clean, &pending_regions_to_clean, + kctx->cookies, BITS_PER_LONG); + while (pending_regions_to_clean) { + unsigned int cookie = find_first_bit(&pending_regions_to_clean, + BITS_PER_LONG); + + if (!WARN_ON(!kctx->pending_regions[cookie])) { + dev_dbg(kctx->kbdev->dev, "Freeing pending unmapped region\n"); + kbase_mem_phy_alloc_put( + kctx->pending_regions[cookie]->cpu_alloc); + kbase_mem_phy_alloc_put( + kctx->pending_regions[cookie]->gpu_alloc); + kfree(kctx->pending_regions[cookie]); + + kctx->pending_regions[cookie] = NULL; + } + + bitmap_clear(&pending_regions_to_clean, cookie, 1); + } + kbase_gpu_vm_unlock(kctx); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h new file mode 100644 index 0000000..e4ed894 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + */ + +#ifndef _KBASE_CONTEXT_H_ +#define _KBASE_CONTEXT_H_ + +#include + +/** + * kbase_context_debugfs_init - Initialize the kctx platform + * specific debugfs + * + * @kctx: kbase context + * + * This initializes some debugfs interfaces specific to the platform the source + * is compiled for. + */ +void kbase_context_debugfs_init(struct kbase_context *const kctx); + +/** + * kbase_context_debugfs_term - Terminate the kctx platform + * specific debugfs + * + * @kctx: kbase context + * + * This terminates some debugfs interfaces specific to the platform the source + * is compiled for. + */ +void kbase_context_debugfs_term(struct kbase_context *const kctx); + +/** + * kbase_create_context() - Create a kernel base context. + * + * @kbdev: Object representing an instance of GPU platform device, + * allocated from the probe method of the Mali driver. + * @is_compat: Force creation of a 32-bit context + * @flags: Flags to set, which shall be any combination of + * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. + * @api_version: Application program interface version, as encoded in + * a single integer by the KBASE_API_VERSION macro. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * + * Up to one context can be created for each client that opens the device file + * /dev/malixx. Context creation is deferred until a special ioctl() system call + * is made on the device file. Each context has its own GPU address space. + * + * Return: new kbase context or NULL on failure + */ +struct kbase_context * +kbase_create_context(struct kbase_device *kbdev, bool is_compat, + base_context_create_flags const flags, + unsigned long api_version, + struct file *filp); + +/** + * kbase_destroy_context - Destroy a kernel base context. + * @kctx: Context to destroy + * + * Will release all outstanding regions. + */ +void kbase_destroy_context(struct kbase_context *kctx); + +/** + * kbase_ctx_flag - Check if @flag is set on @kctx + * @kctx: Pointer to kbase context to check + * @flag: Flag to check + * + * Return: true if @flag is set on @kctx, false if not. + */ +static inline bool kbase_ctx_flag(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + return atomic_read(&kctx->flags) & flag; +} + +/** + * kbase_ctx_flag_clear - Clear @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to clear + * + * Clear the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ +#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + /* + * Earlier kernel versions doesn't have atomic_andnot() or + * atomic_and(). atomic_clear_mask() was only available on some + * architectures and removed on arm in v3.13 on arm and arm64. + * + * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, + * when atomic_andnot() becomes available. + */ + int old, new; + + do { + old = atomic_read(&kctx->flags); + new = old & ~flag; + + } while (atomic_cmpxchg(&kctx->flags, old, new) != old); +#else + atomic_andnot(flag, &kctx->flags); +#endif +} + +/** + * kbase_ctx_flag_set - Set @flag on @kctx + * @kctx: Pointer to kbase context + * @flag: Flag to set + * + * Set the @flag on @kctx. This is done atomically, so other flags being + * cleared or set at the same time will be safe. + * + * Some flags have locking requirements, check the documentation for the + * respective flags. + */ +static inline void kbase_ctx_flag_set(struct kbase_context *kctx, + enum kbase_context_flags flag) +{ + atomic_or(flag, &kctx->flags); +} +#endif /* _KBASE_CONTEXT_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h new file mode 100644 index 0000000..818cdbe --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + */ + +#include + +typedef int kbase_context_init_method(struct kbase_context *kctx); +typedef void kbase_context_term_method(struct kbase_context *kctx); + +/** + * struct kbase_context_init - Device init/term methods. + * @init: Function pointer to a initialise method. + * @term: Function pointer to a terminate method. + * @err_mes: Error message to be printed when init method fails. + */ +struct kbase_context_init { + kbase_context_init_method *init; + kbase_context_term_method *term; + char *err_mes; +}; + +int kbase_context_common_init(struct kbase_context *kctx); +void kbase_context_common_term(struct kbase_context *kctx); + +int kbase_context_mem_pool_group_init(struct kbase_context *kctx); +void kbase_context_mem_pool_group_term(struct kbase_context *kctx); + +int kbase_context_mmu_init(struct kbase_context *kctx); +void kbase_context_mmu_term(struct kbase_context *kctx); + +int kbase_context_mem_alloc_page(struct kbase_context *kctx); +void kbase_context_mem_pool_free(struct kbase_context *kctx); + +void kbase_context_sticky_resource_term(struct kbase_context *kctx); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h new file mode 100644 index 0000000..d534f30 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h @@ -0,0 +1,170 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT + * DESCRIBED IN mali_kbase_debug_ktrace_codes.h + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* + * Job Slot management events + */ + /* info_val==irq rawstat at start */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ), + /* info_val==jobs processed */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_IRQ_END), + /* In the following: + * + * - ctx is set if a corresponding job found (NULL otherwise, e.g. some + * soft-stop cases) + * - uatom==kernel-side mapped uatom address (for correlation with + * user-side) + */ + /* info_val==exit code; gpu_addr==chain gpuaddr */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_JOB_DONE), + /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of + * affinity + */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT), + /* gpu_addr is as follows: + * - If JS_STATUS active after soft-stop, val==gpu addr written to + * JS_HEAD on submit + * - otherwise gpu_addr==0 + */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), + /* gpu_addr==JS_TAIL read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), + /* gpu_addr is as follows: + * - If JS_STATUS active before soft-stop, val==JS_HEAD + * - otherwise gpu_addr==0 + */ + /* gpu_addr==JS_HEAD read */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), + KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), + KBASE_KTRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), + /* info_val == is_scheduled */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), + /* info_val == is_scheduled */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), + KBASE_KTRACE_CODE_MAKE_CODE(JM_ZAP_DONE), + /* info_val == nr jobs submitted */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), + /* gpu_addr==JS_HEAD_NEXT last written */ + KBASE_KTRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), + KBASE_KTRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), + KBASE_KTRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), + /* + * Job dispatch events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==0, info_val==0, uatom==0 */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), + /* + * Scheduler Core events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_ADD_JOB), + /* gpu_addr==last value written/would be written to JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), + /* info_val == the ctx attribute now on ctx */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), + /* info_val == the ctx attribute now on runpool */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), + /* info_val == the ctx attribute now off ctx */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), + /* info_val == the ctx attribute now off runpool */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), + /* + * Scheduler Policy events + */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), + /* info_val == whether it was evicted */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), + /* gpu_addr==JS_HEAD to write if the job were run */ + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), + KBASE_KTRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h new file mode 100644 index 0000000..55b66ad --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h @@ -0,0 +1,75 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ + +/** + * DOC: KTrace version history, JM variant + * 1.0: + * - Original version (implicit, header did not carry version information) + * 2.0: + * - Introduced version information into the header + * - some changes of parameter names in header + * - trace now uses all 64-bits of info_val + * - Non-JM specific parts moved to using info_val instead of refcount/gpu_addr + */ +#define KBASE_KTRACE_VERSION_MAJOR 2 +#define KBASE_KTRACE_VERSION_MINOR 0 + +/* indicates if the trace message has a valid refcount member */ +#define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) +/* indicates if the trace message has a valid jobslot member */ +#define KBASE_KTRACE_FLAG_JM_JOBSLOT (((kbase_ktrace_flag_t)1) << 1) +/* indicates if the trace message has valid atom related info. */ +#define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) + + +/** + * struct kbase_ktrace_backend - backend specific part of a trace message + * + * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. + * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @gpu_addr: GPU address, usually of the job-chain represented by an atom. + * @atom_number: id of the atom for which trace message was added. Only valid + * if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @code: Identifies the event, refer to enum kbase_ktrace_code. + * @flags: indicates information about the trace message itself. Used + * during dumping of the message. + * @jobslot: job-slot for which trace message was added, valid only for + * job-slot management events. + * @refcount: reference count for the context, valid for certain events + * related to scheduler core and policy. + */ +struct kbase_ktrace_backend { + /* Place 64 and 32-bit members together */ + u64 atom_udata[2]; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + u64 gpu_addr; + int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + /* Pack smaller members together */ + kbase_ktrace_code_t code; + kbase_ktrace_flag_t flags; + u8 jobslot; + u8 refcount; +}; + +#endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c new file mode 100644 index 0000000..e651a09 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -0,0 +1,113 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" +#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + +#if KBASE_KTRACE_TARGET_RBUF + +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) +{ + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + "katom,gpu_addr,jobslot,refcount"), 0); +} + +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written) +{ + /* katom */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_ATOM) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "atom %d (ud: 0x%llx 0x%llx)", + trace_msg->backend.atom_number, + trace_msg->backend.atom_udata[0], + trace_msg->backend.atom_udata[1]), 0); + + /* gpu_addr */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_BACKEND) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",%.8llx,", trace_msg->backend.gpu_addr), 0); + else + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",,"), 0); + + /* jobslot */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", trace_msg->backend.jobslot), 0); + + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + ","), 0); + + /* refcount */ + if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", trace_msg->backend.refcount), 0); +} + +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_context *kctx, + struct kbase_jd_atom *katom, u64 gpu_addr, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message (including backend.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Indicate to the common code that backend-specific parts will be + * valid + */ + trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND; + + /* Fill the JM-specific parts of the message */ + if (katom) { + trace_msg->backend.flags |= KBASE_KTRACE_FLAG_JM_ATOM; + + trace_msg->backend.atom_number = kbase_jd_atom_id(katom->kctx, katom); + trace_msg->backend.atom_udata[0] = katom->udata.blob[0]; + trace_msg->backend.atom_udata[1] = katom->udata.blob[1]; + } + + trace_msg->backend.gpu_addr = gpu_addr; + trace_msg->backend.jobslot = jobslot; + /* Clamp refcount */ + trace_msg->backend.refcount = MIN((unsigned int)refcount, 0xFF); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h new file mode 100644 index 0000000..c1bacf9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h @@ -0,0 +1,362 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_JM_H_ +#define _KBASE_DEBUG_KTRACE_JM_H_ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add_jm - internal function to add trace about Job Management + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @flags: flags about the message + * @refcount: reference count information to add to the trace + * @jobslot: jobslot information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_JM() instead. + */ +void kbasep_ktrace_add_jm(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_context *kctx, + struct kbase_jd_atom *katom, u64 gpu_addr, + kbase_ktrace_flag_t flags, int refcount, int jobslot, + u64 info_val); + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, 0) + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, \ + info_val) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, 0) +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, \ + info_val) + +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ + gpu_addr, 0, 0, 0, info_val) + +#else /* KBASE_KTRACE_TARGET_RBUF */ +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + */ +#if KBASE_KTRACE_TARGET_FTRACE +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + trace_mali_##code(jobslot, 0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ + trace_mali_##code(jobslot, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ + trace_mali_##code(refcount, 0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + trace_mali_##code(refcount, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val) \ + trace_mali_##code(gpu_addr, info_val) +#else /* KBASE_KTRACE_TARGET_FTRACE */ +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, jobslot, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ + gpu_addr, refcount) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ + info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(kctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD_JM_SLOT - Add trace values about a job-slot + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @jobslot: jobslot information to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ + jobslot) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_SLOT_INFO - Add trace values about a job-slot, with info + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @jobslot: jobslot information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ + jobslot, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __jobslot = jobslot; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_REFCOUNT - Add trace values about a kctx refcount + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @refcount: reference count information to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ + refcount) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM_REFCOUNT_INFO - Add trace values about a kctx refcount, + * and info + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @refcount: reference count information to add to the trace + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ + gpu_addr, refcount, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + int __refcount = refcount; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_JM - Add trace values (no slot or refcount) + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @katom: kbase atom, or NULL if no atom + * @gpu_addr: GPU address, usually related to @katom + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __gpu_addr = gpu_addr; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_JM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h new file mode 100644 index 0000000..d964e5a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h @@ -0,0 +1,150 @@ +/* + * + * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ + +DECLARE_EVENT_CLASS(mali_jm_slot_template, + TP_PROTO(int jobslot, u64 info_val), + TP_ARGS(jobslot, info_val), + TP_STRUCT__entry( + __field(unsigned int, jobslot) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->jobslot = jobslot; + __entry->info_val = info_val; + ), + TP_printk("jobslot=%u info=0x%llx", __entry->jobslot, __entry->info_val) +); + +#define DEFINE_MALI_JM_SLOT_EVENT(name) \ +DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ + TP_PROTO(int jobslot, u64 info_val), \ + TP_ARGS(jobslot, info_val)) +DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); +DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); +DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); +DEFINE_MALI_JM_SLOT_EVENT(JM_CHECK_HEAD); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_0); +DEFINE_MALI_JM_SLOT_EVENT(JM_SOFTSTOP_1); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_0); +DEFINE_MALI_JM_SLOT_EVENT(JM_HARDSTOP_1); +DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); +DEFINE_MALI_JM_SLOT_EVENT(JM_SLOT_EVICT); +DEFINE_MALI_JM_SLOT_EVENT(JM_BEGIN_RESET_WORKER); +DEFINE_MALI_JM_SLOT_EVENT(JM_END_RESET_WORKER); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); +DEFINE_MALI_JM_SLOT_EVENT(JS_AFFINITY_CURRENT); +DEFINE_MALI_JM_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); +DEFINE_MALI_JM_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); +DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); +DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); +DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); +#undef DEFINE_MALI_JM_SLOT_EVENT + +DECLARE_EVENT_CLASS(mali_jm_refcount_template, + TP_PROTO(int refcount, u64 info_val), + TP_ARGS(refcount, info_val), + TP_STRUCT__entry( + __field(unsigned int, refcount) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->refcount = refcount; + __entry->info_val = info_val; + ), + TP_printk("refcount=%u info=0x%llx", __entry->refcount, __entry->info_val) +); + +#define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ +DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ + TP_PROTO(int refcount, u64 info_val), \ + TP_ARGS(refcount, info_val)) +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); +DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); +#undef DEFINE_MALI_JM_REFCOUNT_EVENT + +DECLARE_EVENT_CLASS(mali_jm_add_template, + TP_PROTO(u64 gpu_addr, u64 info_val), + TP_ARGS(gpu_addr, info_val), + TP_STRUCT__entry( + __field(u64, gpu_addr) + __field(u64, info_val) + ), + TP_fast_assign( + __entry->gpu_addr = gpu_addr; + __entry->info_val = info_val; + ), + TP_printk("gpu_addr=0x%llx info=0x%llx", __entry->gpu_addr, __entry->info_val) +); + +#define DEFINE_MALI_JM_ADD_EVENT(name) \ +DEFINE_EVENT(mali_jm_add_template, mali_##name, \ + TP_PROTO(u64 gpu_addr, u64 info_val), \ + TP_ARGS(gpu_addr, info_val)) +DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); +DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); +DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); +DEFINE_MALI_JM_ADD_EVENT(JD_DONE); +DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL); +DEFINE_MALI_JM_ADD_EVENT(JD_ZAP_CONTEXT); +DEFINE_MALI_JM_ADD_EVENT(JM_IRQ); +DEFINE_MALI_JM_ADD_EVENT(JM_IRQ_END); +DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS); +DEFINE_MALI_JM_ADD_EVENT(JM_FLUSH_WORKQS_DONE); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_NON_SCHEDULED); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_SCHEDULED); +DEFINE_MALI_JM_ADD_EVENT(JM_ZAP_DONE); +DEFINE_MALI_JM_ADD_EVENT(JM_SUBMIT_AFTER_RESET); +DEFINE_MALI_JM_ADD_EVENT(JM_JOB_COMPLETE); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); +DEFINE_MALI_JM_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_END); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_TIMER_START); +DEFINE_MALI_JM_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); +#undef DEFINE_MALI_JM_ADD_EVENT + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_JM_H_) || defined(TRACE_HEADER_MULTI_READ)*/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c new file mode 100644 index 0000000..6322abb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c @@ -0,0 +1,342 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" + +int kbase_ktrace_init(struct kbase_device *kbdev) +{ +#if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace_msg *rbuf; + + /* See also documentation of enum kbase_ktrace_code */ + compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || + KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), + "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); + + rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + + if (!rbuf) + return -EINVAL; + + kbdev->ktrace.rbuf = rbuf; + spin_lock_init(&kbdev->ktrace.lock); +#endif /* KBASE_KTRACE_TARGET_RBUF */ + return 0; +} + +void kbase_ktrace_term(struct kbase_device *kbdev) +{ +#if KBASE_KTRACE_TARGET_RBUF + kfree(kbdev->ktrace.rbuf); +#endif /* KBASE_KTRACE_TARGET_RBUF */ +} + +void kbase_ktrace_hook_wrapper(void *param) +{ + struct kbase_device *kbdev = (struct kbase_device *)param; + + KBASE_KTRACE_DUMP(kbdev); +} + +#if KBASE_KTRACE_TARGET_RBUF + +static const char * const kbasep_ktrace_code_string[] = { + /* + * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ARRAY + */ +#define KBASE_KTRACE_CODE_MAKE_CODE(X) # X +#include "debug/mali_kbase_debug_ktrace_codes.h" +#undef KBASE_KTRACE_CODE_MAKE_CODE +}; + +static void kbasep_ktrace_format_header(char *buffer, int sz, s32 written) +{ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "secs,thread_id,cpu,code,kctx,"), 0); + + kbasep_ktrace_backend_format_header(buffer, sz, &written); + + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ",info_val,ktrace_version=%u.%u", + KBASE_KTRACE_VERSION_MAJOR, + KBASE_KTRACE_VERSION_MINOR), 0); + + buffer[sz - 1] = 0; +} + +static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz) +{ + s32 written = 0; + + /* Initial part of message: + * + * secs,thread_id,cpu,code,kctx, + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "%d.%.6d,%d,%d,%s,%p,", + (int)trace_msg->timestamp.tv_sec, + (int)(trace_msg->timestamp.tv_nsec / 1000), + trace_msg->thread_id, trace_msg->cpu, + kbasep_ktrace_code_string[trace_msg->backend.code], + trace_msg->kctx), 0); + + /* Backend parts */ + kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, + &written); + + /* Rest of message: + * + * ,info_val + * + * Note that the last column is empty, it's simply to hold the ktrace + * version in the header + */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ",0x%.16llx", + (unsigned long long)trace_msg->info_val), 0); + buffer[sz - 1] = 0; +} + +static void kbasep_ktrace_dump_msg(struct kbase_device *kbdev, + struct kbase_ktrace_msg *trace_msg) +{ + char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + + lockdep_assert_held(&kbdev->ktrace.lock); + + kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + dev_dbg(kbdev->dev, "%s", buffer); +} + +struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace) +{ + struct kbase_ktrace_msg *trace_msg; + + lockdep_assert_held(&ktrace->lock); + + trace_msg = &ktrace->rbuf[ktrace->next_in]; + + /* Update the ringbuffer indices */ + ktrace->next_in = (ktrace->next_in + 1) & KBASE_KTRACE_MASK; + if (ktrace->next_in == ktrace->first_out) + ktrace->first_out = (ktrace->first_out + 1) & KBASE_KTRACE_MASK; + + return trace_msg; +} +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val) +{ + lockdep_assert_held(&ktrace->lock); + + trace_msg->thread_id = task_pid_nr(current); + trace_msg->cpu = task_cpu(current); + + ktime_get_real_ts64(&trace_msg->timestamp); + + trace_msg->kctx = kctx; + + trace_msg->info_val = info_val; + trace_msg->backend.code = code; + trace_msg->backend.flags = flags; +} + +void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message (including backend.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +static void kbasep_ktrace_clear_locked(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->ktrace.lock); + kbdev->ktrace.first_out = kbdev->ktrace.next_in; +} +void kbasep_ktrace_clear(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + kbasep_ktrace_clear_locked(kbdev); + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + +void kbasep_ktrace_dump(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 start; + u32 end; + char buffer[KTRACE_DUMP_MESSAGE_SIZE] = "Dumping trace:\n"; + + kbasep_ktrace_format_header(buffer, sizeof(buffer), strlen(buffer)); + dev_dbg(kbdev->dev, "%s", buffer); + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + start = kbdev->ktrace.first_out; + end = kbdev->ktrace.next_in; + + while (start != end) { + struct kbase_ktrace_msg *trace_msg = &kbdev->ktrace.rbuf[start]; + + kbasep_ktrace_dump_msg(kbdev, trace_msg); + + start = (start + 1) & KBASE_KTRACE_MASK; + } + dev_dbg(kbdev->dev, "TRACE_END"); + + kbasep_ktrace_clear_locked(kbdev); + + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); +} + +#ifdef CONFIG_DEBUG_FS +struct trace_seq_state { + struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; + u32 start; + u32 end; +}; + +static void *kbasep_ktrace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (*pos == 0) + /* See Documentation/filesystems/seq_file.txt */ + return SEQ_START_TOKEN; + + if (*pos > KBASE_KTRACE_SIZE) + return NULL; + i = state->start + *pos; + if ((state->end >= state->start && i >= state->end) || + i >= state->end + KBASE_KTRACE_SIZE) + return NULL; + + i &= KBASE_KTRACE_MASK; + + return &state->trace_buf[i]; +} + +static void kbasep_ktrace_seq_stop(struct seq_file *s, void *data) +{ +} + +static void *kbasep_ktrace_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (data != SEQ_START_TOKEN) + (*pos)++; + + i = (state->start + *pos) & KBASE_KTRACE_MASK; + if (i == state->end) + return NULL; + + return &state->trace_buf[i]; +} + +static int kbasep_ktrace_seq_show(struct seq_file *s, void *data) +{ + struct kbase_ktrace_msg *trace_msg = data; + char buffer[KTRACE_DUMP_MESSAGE_SIZE]; + + /* If this is the start, print a header */ + if (data == SEQ_START_TOKEN) + kbasep_ktrace_format_header(buffer, sizeof(buffer), 0); + else + kbasep_ktrace_format_msg(trace_msg, buffer, sizeof(buffer)); + + seq_printf(s, "%s\n", buffer); + return 0; +} + +static const struct seq_operations kbasep_ktrace_seq_ops = { + .start = kbasep_ktrace_seq_start, + .next = kbasep_ktrace_seq_next, + .stop = kbasep_ktrace_seq_stop, + .show = kbasep_ktrace_seq_show, +}; + +static int kbasep_ktrace_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *kbdev = inode->i_private; + unsigned long flags; + + struct trace_seq_state *state; + + state = __seq_open_private(file, &kbasep_ktrace_seq_ops, + sizeof(*state)); + if (!state) + return -ENOMEM; + + spin_lock_irqsave(&kbdev->ktrace.lock, flags); + state->start = kbdev->ktrace.first_out; + state->end = kbdev->ktrace.next_in; + memcpy(state->trace_buf, kbdev->ktrace.rbuf, sizeof(state->trace_buf)); + spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); + + return 0; +} + +static const struct file_operations kbasep_ktrace_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_ktrace_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("mali_trace", 0444, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_ktrace_debugfs_fops); +} +#endif /* CONFIG_DEBUG_FS */ + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#ifdef CONFIG_DEBUG_FS +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h new file mode 100644 index 0000000..0dd8b7a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h @@ -0,0 +1,219 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * DOC: Kbase's own trace, 'KTrace' + * + * Low overhead trace specific to kbase, aimed at: + * - common use-cases for tracing kbase specific functionality to do with + * running work on the GPU + * - easy 1-line addition of new types of trace + * + * KTrace can be recorded in one or more of the following targets: + * - KBASE_KTRACE_TARGET_RBUF: low overhead ringbuffer protected by an + * irq-spinlock, output available via dev_dbg() and debugfs file + * - KBASE_KTRACE_TARGET_FTRACE: ftrace based tracepoints under 'mali' events + */ + +#ifndef _KBASE_DEBUG_KTRACE_H_ +#define _KBASE_DEBUG_KTRACE_H_ + +#include "debug/backend/mali_kbase_debug_ktrace_jm.h" + +/** + * kbase_ktrace_init - initialize kbase ktrace. + * @kbdev: kbase device + */ +int kbase_ktrace_init(struct kbase_device *kbdev); + +/** + * kbase_ktrace_term - terminate kbase ktrace. + * @kbdev: kbase device + */ +void kbase_ktrace_term(struct kbase_device *kbdev); + +/** + * kbase_ktrace_hook_wrapper - wrapper so that dumping ktrace can be done via a + * callback. + * @param: kbase device, cast to void pointer + */ +void kbase_ktrace_hook_wrapper(void *param); + +#ifdef CONFIG_DEBUG_FS +/** + * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if + * the selected targets support it. + * @kbdev: kbase device + * + * There is no matching 'term' call, debugfs_remove_recursive() is sufficient. + */ +void kbase_ktrace_debugfs_init(struct kbase_device *kbdev); +#endif /* CONFIG_DEBUG_FS */ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add - internal function to add trace to the ringbuffer. + * @kbdev: kbase device + * @code: ktrace code + * @kctx: kbase context, or NULL if no context + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD() instead. + */ +void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val); + +/** + * kbasep_ktrace_clear - clear the trace ringbuffer + * @kbdev: kbase device + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_CLEAR() instead. + */ +void kbasep_ktrace_clear(struct kbase_device *kbdev); + +/** + * kbasep_ktrace_dump - dump ktrace ringbuffer to dev_dbg(), then clear it + * @kbdev: kbase device + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_DUMP() instead. + */ +void kbasep_ktrace_dump(struct kbase_device *kbdev); + +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + kbasep_ktrace_add(kbdev, KBASE_KTRACE_CODE(code), kctx, 0, \ + info_val) \ + +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ + kbasep_ktrace_clear(kbdev) + +#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ + kbasep_ktrace_dump(kbdev) + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#define KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ + } while (0) + +#define KBASE_KTRACE_RBUF_CLEAR(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) +#define KBASE_KTRACE_RBUF_DUMP(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + */ +#if KBASE_KTRACE_TARGET_FTRACE +#include "mali_linux_trace.h" + +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ + trace_mali_##code(info_val) + +#else /* KBASE_KTRACE_TARGET_FTRACE */ +#define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(code); \ + CSTD_UNUSED(kctx); \ + CSTD_UNUSED(info_val); \ + CSTD_NOP(0); \ + } while (0) +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* No 'clear' implementation for ftrace yet */ +#define KBASE_KTRACE_FTRACE_CLEAR(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) + +/* No 'dump' implementation for ftrace yet */ +#define KBASE_KTRACE_FTRACE_DUMP(kbdev) \ + do { \ + CSTD_UNUSED(kbdev); \ + CSTD_NOP(0); \ + } while (0) + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD - Add trace values + * @kbdev: kbase device + * @code: trace code + * @kctx: kbase context, or NULL if no context + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD(kbdev, code, kctx, info_val) \ + do { \ + /* capture values that could come from non-pure function calls */ \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD(kbdev, code, kctx, __info_val); \ + KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_CLEAR - Clear the trace, if applicable to the target(s) + * @kbdev: kbase device + */ +#define KBASE_KTRACE_CLEAR(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_CLEAR(kbdev); \ + KBASE_KTRACE_FTRACE_CLEAR(kbdev); \ + } while (0) + +/** + * KBASE_KTRACE_DUMP - Dump the trace, if applicable to the target(s) + * @kbdev: kbase device + */ +#define KBASE_KTRACE_DUMP(kbdev) \ + do { \ + KBASE_KTRACE_RBUF_DUMP(kbdev); \ + KBASE_KTRACE_FTRACE_DUMP(kbdev); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h new file mode 100644 index 0000000..364ed60 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h @@ -0,0 +1,158 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * Each identifier is wrapped in a macro, so that its string form and enum form + * can be created + * + * Each macro is separated with a comma, to allow insertion into an array + * initializer or enum definition block. + * + * This allows automatic creation of an enum and a corresponding array of + * strings + * + * Before #including, the includer MUST #define KBASE_KTRACE_CODE_MAKE_CODE. + * After #including, the includer MUST #under KBASE_KTRACE_CODE_MAKE_CODE. + * + * e.g.: + * #define KBASE_KTRACE_CODE( X ) KBASE_KTRACE_CODE_ ## X + * typedef enum + * { + * #define KBASE_KTRACE_CODE_MAKE_CODE( X ) KBASE_KTRACE_CODE( X ) + * #include "mali_kbase_debug_ktrace_codes.h" + * #undef KBASE_KTRACE_CODE_MAKE_CODE + * } kbase_ktrace_code; + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE + * + * + * The use of the macro here is: + * - KBASE_KTRACE_CODE_MAKE_CODE( X ) + * + * Which produces: + * - For an enum, KBASE_KTRACE_CODE_X + * - For a string, "X" + * + * + * For example: + * - KBASE_KTRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: + * - KBASE_KTRACE_CODE_JM_JOB_COMPLETE for the enum + * - "JM_JOB_COMPLETE" for the string + * - To use it to trace an event, do: + * - KBASE_KTRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + + /* + * Core events + */ + /* no info_val */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), + /* no info_val */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), + /* info_val == bits cleared */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), + /* info_val == dump address */ + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), + KBASE_KTRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), + + /* + * Power Management Events + */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), + KBASE_KTRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWRON_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_PWROFF_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), + KBASE_KTRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), + /* info_val == kbdev->pm.active_count*/ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), + /* info_val == kbdev->pm.active_count*/ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), + KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_ON), + KBASE_KTRACE_CODE_MAKE_CODE(PM_GPU_OFF), + /* info_val == policy number, or -1 for "Already changing" */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_SET_POLICY), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), + /* info_val == policy number */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), + /* info_val == policy number */ + KBASE_KTRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), + + /* + * Context Scheduler events + */ + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), + /* info_val == kctx->refcount */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), + + +#include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" + /* + * Unused code just to make it easier to not have a comma at the end. + * All other codes MUST come before this + */ + KBASE_KTRACE_CODE_MAKE_CODE(DUMMY) + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h new file mode 100644 index 0000000..d6baaf1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h @@ -0,0 +1,152 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_H_ + +/* Enable SW tracing when set */ +#if defined(CONFIG_MALI_MIDGARD_ENABLE_TRACE) || defined(CONFIG_MALI_SYSTEM_TRACE) +#define KBASE_KTRACE_ENABLE 1 +#endif + +#ifndef KBASE_KTRACE_ENABLE +#ifdef CONFIG_MALI_DEBUG +#define KBASE_KTRACE_ENABLE 1 +#else /* CONFIG_MALI_DEBUG */ +#define KBASE_KTRACE_ENABLE 0 +#endif /* CONFIG_MALI_DEBUG */ +#endif /* KBASE_KTRACE_ENABLE */ + +/* Select targets for recording of trace: + * + */ +#if KBASE_KTRACE_ENABLE + +#ifdef CONFIG_MALI_SYSTEM_TRACE +#define KBASE_KTRACE_TARGET_FTRACE 1 +#else /* CONFIG_MALI_SYSTEM_TRACE */ +#define KBASE_KTRACE_TARGET_FTRACE 0 +#endif /* CONFIG_MALI_SYSTEM_TRACE */ + +#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE +#define KBASE_KTRACE_TARGET_RBUF 1 +#else /* CONFIG_MALI_MIDGARD_ENABLE_TRACE*/ +#define KBASE_KTRACE_TARGET_RBUF 0 +#endif /* CONFIG_MALI_MIDGARD_ENABLE_TRACE */ + +#else /* KBASE_KTRACE_ENABLE */ +#define KBASE_KTRACE_TARGET_FTRACE 0 +#define KBASE_KTRACE_TARGET_RBUF 0 +#endif /* KBASE_KTRACE_ENABLE */ + +/* + * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in + * the backend, since updates can be made to one backend in a way that doesn't + * affect the other. + * + * However, modifying the common part could require both backend versions to be + * updated. + */ + +#if KBASE_KTRACE_TARGET_RBUF +typedef u8 kbase_ktrace_flag_t; +typedef u8 kbase_ktrace_code_t; + +/* + * struct kbase_ktrace_backend - backend specific part of a trace message + * + * At the very least, this must contain a kbase_ktrace_code_t 'code' member and + * a kbase_ktrace_flag_t 'flags' member + */ +struct kbase_ktrace_backend; + +#include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" + +/* Indicates if the trace message has backend related info. + * + * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg + * as uninitialized, apart from the mandatory parts: + * - code + * - flags + */ +#define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) + +#define KBASE_KTRACE_SHIFT 8 /* 256 entries */ +#define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) +#define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) + +#define KBASE_KTRACE_CODE(X) KBASE_KTRACE_CODE_ ## X + +/* Note: compiletime_assert() about this against kbase_ktrace_code_t is in + * kbase_ktrace_init() + */ +enum kbase_ktrace_code { + /* + * IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ENUM + */ +#define KBASE_KTRACE_CODE_MAKE_CODE(X) KBASE_KTRACE_CODE(X) +#include +#undef KBASE_KTRACE_CODE_MAKE_CODE + /* Comma on its own, to extend the list */ + , + /* Must be the last in the enum */ + KBASE_KTRACE_CODE_COUNT +}; + +/** + * struct kbase_ktrace - object representing a trace message added to trace + * buffer trace_rbuf in &kbase_device + * @timestamp: CPU timestamp at which the trace message was added. + * @thread_id: id of the thread in the context of which trace message was + * added. + * @cpu: indicates which CPU the @thread_id was scheduled on when the + * trace message was added. + * @kctx: Pointer to the kbase context for which the trace message was + * added. Will be NULL for certain trace messages associated with + * the &kbase_device itself, such as power management events. + * Will point to the appropriate context corresponding to + * backend-specific events. + * @info_val: value specific to the type of event being traced. Refer to the + * specific code in enum kbase_ktrace_code + * @backend: backend-specific trace information. All backends must implement + * a minimum common set of members + */ +struct kbase_ktrace_msg { + struct timespec64 timestamp; + u32 thread_id; + u32 cpu; + void *kctx; + u64 info_val; + + struct kbase_ktrace_backend backend; +}; + +struct kbase_ktrace { + spinlock_t lock; + u16 first_out; + u16 next_in; + struct kbase_ktrace_msg *rbuf; +}; + +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h new file mode 100644 index 0000000..e450760 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h @@ -0,0 +1,89 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ +#define _KBASE_DEBUG_KTRACE_INTERNAL_H_ + +#if KBASE_KTRACE_TARGET_RBUF + +#define KTRACE_DUMP_MESSAGE_SIZE 256 + +/** + * kbasep_ktrace_backend_format_header - format the backend part of the header + * @buffer: buffer to write to + * @sz: size of @buffer in bytes + * @written: pointer to storage for updating bytes written so far to @buffer + * + * The backend must format only the non-common backend specific parts of the + * header. It must format them as though they were standalone. The caller will + * handle adding any delimiters around this. + */ +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written); + +/** + * kbasep_ktrace_backend_format_msg - format the backend part of the message + * @trace_msg: ktrace message + * @buffer: buffer to write to + * @sz: size of @buffer in bytes + * @written: pointer to storage for updating bytes written so far to @buffer + * + * The backend must format only the non-common backend specific parts of the + * message. It must format them as though they were standalone. The caller will + * handle adding any delimiters around this. + * + * A caller may have the flags member of @trace_msg with + * %KBASE_KTRACE_FLAG_BACKEND clear. The backend must handle that setting + * appropriately. + */ +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written); + + +/** + * kbasep_ktrace_reserve - internal function to reserve space for a ktrace + * message + * @ktrace: kbase device's ktrace + * + * This may also empty the oldest entry in the ringbuffer to make space. + */ +struct kbase_ktrace_msg *kbasep_ktrace_reserve(struct kbase_ktrace *ktrace); + +/** + * kbasep_ktrace_msg_init - internal function to initialize just the common + * part of a ktrace message + * @ktrace: kbase device's ktrace + * @trace_msg: ktrace message to initialize + * @code: ktrace code + * @kctx: kbase context, or NULL if no context + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * The common part includes the mandatory parts of the backend part + */ +void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, + struct kbase_ktrace_msg *trace_msg, enum kbase_ktrace_code code, + struct kbase_context *kctx, kbase_ktrace_flag_t flags, + u64 info_val); + +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +#endif /* _KBASE_DEBUG_KTRACE_INTERNAL_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h new file mode 100644 index 0000000..18e4f7c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h @@ -0,0 +1,99 @@ +/* + * + * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_H_ + +#if KBASE_KTRACE_TARGET_FTRACE + +DECLARE_EVENT_CLASS(mali_add_template, + TP_PROTO(u64 info_val), + TP_ARGS(info_val), + TP_STRUCT__entry( + __field(u64, info_val) + ), + TP_fast_assign( + __entry->info_val = info_val; + ), + TP_printk("info=0x%llx", __entry->info_val) +); + +#define DEFINE_MALI_ADD_EVENT(name) \ +DEFINE_EVENT(mali_add_template, mali_##name, \ + TP_PROTO(u64 info_val), \ + TP_ARGS(info_val)) +DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); +DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); +DEFINE_MALI_ADD_EVENT(PM_PWRON); +DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); +DEFINE_MALI_ADD_EVENT(PM_PWROFF); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_TILER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_GPU_ON); +DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); +DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); +DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CONTEXT_ACTIVE); +DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); +DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); +DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); +DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); + +#undef DEFINE_MALI_ADD_EVENT + +#include "mali_kbase_debug_linux_ktrace_jm.h" + +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_H_) || defined(TRACE_HEADER_MULTI_READ) */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c new file mode 100644 index 0000000..fbba2e7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "../mali_kbase_device_internal.h" +#include "../mali_kbase_device.h" + +#include +#include +#include +#include + +#ifdef CONFIG_MALI_NO_MALI +#include +#endif + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif + +#include +#include +#include +#include +#include +#include + +/** + * kbase_backend_late_init - Perform any backend-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +static int kbase_backend_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbase_hwaccess_pm_init(kbdev); + if (err) + return err; + + err = kbase_reset_gpu_init(kbdev); + if (err) + goto fail_reset_gpu_init; + + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + goto fail_pm_powerup; + + err = kbase_backend_timer_init(kbdev); + if (err) + goto fail_timer; + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI + if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { + dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); + err = -EINVAL; + goto fail_interrupt_test; + } +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + err = kbase_job_slot_init(kbdev); + if (err) + goto fail_job_slot; + + /* Do the initialisation of devfreq. + * Devfreq needs backend_timer_init() for completion of its + * initialisation and it also needs to catch the first callback + * occurrence of the runtime_suspend event for maintaining state + * coherence with the backend power management, hence needs to be + * placed before the kbase_pm_context_idle(). + */ + err = kbase_backend_devfreq_init(kbdev); + if (err) + goto fail_devfreq_init; + + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + /* Update gpuprops with L2_FEATURES if applicable */ + err = kbase_gpuprops_update_l2_features(kbdev); + if (err) + goto fail_update_l2_features; + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + + return 0; + +fail_update_l2_features: +fail_devfreq_init: + kbase_job_slot_term(kbdev); +fail_job_slot: + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI +fail_interrupt_test: +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + kbase_backend_timer_term(kbdev); +fail_timer: + kbase_hwaccess_pm_halt(kbdev); +fail_pm_powerup: + kbase_reset_gpu_term(kbdev); +fail_reset_gpu_init: + kbase_hwaccess_pm_term(kbdev); + + return err; +} + +/** + * kbase_backend_late_term - Perform any backend-specific termination. + * @kbdev: Device pointer + */ +static void kbase_backend_late_term(struct kbase_device *kbdev) +{ + kbase_backend_devfreq_term(kbdev); + kbase_job_slot_halt(kbdev); + kbase_job_slot_term(kbdev); + kbase_backend_timer_term(kbdev); + kbase_hwaccess_pm_halt(kbdev); + kbase_reset_gpu_term(kbdev); + kbase_hwaccess_pm_term(kbdev); +} + +static const struct kbase_device_init dev_init[] = { +#ifdef CONFIG_MALI_NO_MALI + {kbase_gpu_device_create, kbase_gpu_device_destroy, + "Dummy model initialization failed"}, +#else + {assign_irqs, NULL, + "IRQ search failed"}, + {registers_map, registers_unmap, + "Register map failed"}, +#endif + {kbase_device_io_history_init, kbase_device_io_history_term, + "Register access history initialization failed"}, + {kbase_device_pm_init, kbase_device_pm_term, + "Power management initialization failed"}, + {kbase_device_early_init, kbase_device_early_term, + "Early device initialization failed"}, + {kbase_device_populate_max_freq, NULL, + "Populating max frequency failed"}, + {kbase_device_misc_init, kbase_device_misc_term, + "Miscellaneous device initialization failed"}, + {kbase_ctx_sched_init, kbase_ctx_sched_term, + "Context scheduler initialization failed"}, + {kbase_mem_init, kbase_mem_term, + "Memory subsystem initialization failed"}, + {kbase_device_coherency_init, NULL, + "Device coherency init failed"}, + {kbase_protected_mode_init, kbase_protected_mode_term, + "Protected mode subsystem initialization failed"}, + {kbase_device_list_init, kbase_device_list_term, + "Device list setup failed"}, + {kbasep_js_devdata_init, kbasep_js_devdata_term, + "Job JS devdata initialization failed"}, + {kbase_device_timeline_init, kbase_device_timeline_term, + "Timeline stream initialization failed"}, + {kbase_device_hwcnt_backend_gpu_init, + kbase_device_hwcnt_backend_gpu_term, + "GPU hwcnt backend creation failed"}, + {kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, + "GPU hwcnt context initialization failed"}, + {kbase_device_hwcnt_virtualizer_init, + kbase_device_hwcnt_virtualizer_term, + "GPU hwcnt virtualizer initialization failed"}, + {kbase_device_vinstr_init, kbase_device_vinstr_term, + "Virtual instrumentation initialization failed"}, + {kbase_backend_late_init, kbase_backend_late_term, + "Late backend initialization failed"}, +#ifdef MALI_KBASE_BUILD + {kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, + "Job fault debug initialization failed"}, + {kbase_device_debugfs_init, kbase_device_debugfs_term, + "DebugFS initialization failed"}, + /* Sysfs init needs to happen before registering the device with + * misc_register(), otherwise it causes a race condition between + * registering the device and a uevent event being generated for + * userspace, causing udev rules to run which might expect certain + * sysfs attributes present. As a result of the race condition + * we avoid, some Mali sysfs entries may have appeared to udev + * to not exist. + * For more information, see + * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the + * paragraph that starts with "Word of warning", currently the + * second-last paragraph. + */ + {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, + {kbase_device_misc_register, kbase_device_misc_deregister, + "Misc device registration failed"}, +#ifdef CONFIG_MALI_BUSLOG + {buslog_init, buslog_term, "Bus log client registration failed"}, +#endif + {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, + "GPU property population failed"}, +#endif + {kbase_dummy_job_wa_load, kbase_dummy_job_wa_cleanup, + "Dummy job workaround load failed"}, +}; + +static void kbase_device_term_partial(struct kbase_device *kbdev, + unsigned int i) +{ + while (i-- > 0) { + if (dev_init[i].term) + dev_init[i].term(kbdev); + } +} + +void kbase_device_term(struct kbase_device *kbdev) +{ + kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); + kbasep_js_devdata_halt(kbdev); + kbase_mem_halt(kbdev); +} + +int kbase_device_init(struct kbase_device *kbdev) +{ + int err = 0; + unsigned int i = 0; + + dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); + + kbase_device_id_init(kbdev); + kbase_disjoint_init(kbdev); + + for (i = 0; i < ARRAY_SIZE(dev_init); i++) { + err = dev_init[i].init(kbdev); + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + dev_init[i].err_mes, err); + kbase_device_term_partial(kbdev, i); + break; + } + } + + return err; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c new file mode 100644 index 0000000..76f14e5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Base kernel device APIs + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" + +#include "mali_kbase_device.h" +#include "mali_kbase_device_internal.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "backend/gpu/mali_kbase_irq_internal.h" + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include "arbiter/mali_kbase_arbiter_pm.h" +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +/* NOTE: Magic - 0x45435254 (TRCE in ASCII). + * Supports tracing feature provided in the base module. + * Please keep it in sync with the value of base module. + */ +#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 + +/* Number of register accesses for the buffer that we allocate during + * initialization time. The buffer size can be changed later via debugfs. + */ +#define KBASEP_DEFAULT_REGISTER_HISTORY_SIZE ((u16)512) + +static DEFINE_MUTEX(kbase_dev_list_lock); +static LIST_HEAD(kbase_dev_list); +static int kbase_dev_nr; + +struct kbase_device *kbase_device_alloc(void) +{ + return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); +} + +static int kbase_device_as_init(struct kbase_device *kbdev, int i) +{ + kbdev->as[i].number = i; + kbdev->as[i].bf_data.addr = 0ULL; + kbdev->as[i].pf_data.addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + if (!kbdev->as[i].pf_wq) + return -EINVAL; + + INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); + INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); + + return 0; +} + +static void kbase_device_as_term(struct kbase_device *kbdev, int i) +{ + destroy_workqueue(kbdev->as[i].pf_wq); +} + +static int kbase_device_all_as_init(struct kbase_device *kbdev) +{ + int i, err; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + err = kbase_device_as_init(kbdev, i); + if (err) + goto free_workqs; + } + + return 0; + +free_workqs: + for (; i > 0; i--) + kbase_device_as_term(kbdev, i); + + return err; +} + +static void kbase_device_all_as_term(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) + kbase_device_as_term(kbdev, i); +} + +int kbase_device_misc_init(struct kbase_device * const kbdev) +{ + int err; +#ifdef CONFIG_ARM64 + struct device_node *np = NULL; +#endif /* CONFIG_ARM64 */ + + spin_lock_init(&kbdev->mmu_mask_change); + mutex_init(&kbdev->mmu_hw_mutex); +#ifdef CONFIG_ARM64 + kbdev->cci_snoop_enabled = false; + np = kbdev->dev->of_node; + if (np != NULL) { + if (of_property_read_u32(np, "snoop_enable_smc", + &kbdev->snoop_enable_smc)) + kbdev->snoop_enable_smc = 0; + if (of_property_read_u32(np, "snoop_disable_smc", + &kbdev->snoop_disable_smc)) + kbdev->snoop_disable_smc = 0; + /* Either both or none of the calls should be provided. */ + if (!((kbdev->snoop_disable_smc == 0 + && kbdev->snoop_enable_smc == 0) + || (kbdev->snoop_disable_smc != 0 + && kbdev->snoop_enable_smc != 0))) { + WARN_ON(1); + err = -EINVAL; + goto fail; + } + } +#endif /* CONFIG_ARM64 */ + /* Get the list of workarounds for issues on the current HW + * (identified by the GPU_ID register) + */ + err = kbase_hw_set_issues_mask(kbdev); + if (err) + goto fail; + + /* Set the list of features available on the current HW + * (identified by the GPU_ID register) + */ + kbase_hw_set_features_mask(kbdev); + + err = kbase_gpuprops_set_features(kbdev); + if (err) + goto fail; + + /* On Linux 4.0+, dma coherency is determined from device tree */ +#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) + set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); +#endif + + /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our + * device structure was created by device-tree + */ + if (!kbdev->dev->dma_mask) + kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; + + err = dma_set_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + err = dma_set_coherent_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + + err = kbase_device_all_as_init(kbdev); + if (err) + goto as_init_failed; + + spin_lock_init(&kbdev->hwcnt.lock); + + err = kbase_ktrace_init(kbdev); + if (err) + goto term_as; + + init_waitqueue_head(&kbdev->cache_clean_wait); + + kbase_debug_assert_register_hook(&kbase_ktrace_hook_wrapper, kbdev); + + atomic_set(&kbdev->ctx_num, 0); + + err = kbase_instr_backend_init(kbdev); + if (err) + goto term_trace; + + kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + + kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); + else + kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + spin_lock_init(&kbdev->hwaccess_lock); + + return 0; +term_trace: + kbase_ktrace_term(kbdev); +term_as: + kbase_device_all_as_term(kbdev); +as_init_failed: +dma_set_mask_failed: +fail: + return err; +} + +void kbase_device_misc_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + WARN_ON(!list_empty(&kbdev->kctx_list)); + +#if KBASE_KTRACE_ENABLE + kbase_debug_assert_register_hook(NULL, NULL); +#endif + + kbase_instr_backend_term(kbdev); + + kbase_ktrace_term(kbdev); + + kbase_device_all_as_term(kbdev); +} + +void kbase_device_free(struct kbase_device *kbdev) +{ + kfree(kbdev); +} + +void kbase_device_id_init(struct kbase_device *kbdev) +{ + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); + kbdev->id = kbase_dev_nr; +} + +void kbase_increment_device_id(void) +{ + kbase_dev_nr++; +} + +int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); +} + +void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); +} + +int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, + &kbdev->hwcnt_gpu_ctx); +} + +void kbase_device_hwcnt_context_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx); +} + +int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_virtualizer_init(kbdev->hwcnt_gpu_ctx, + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); +} + +void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); +} + +int kbase_device_timeline_init(struct kbase_device *kbdev) +{ + atomic_set(&kbdev->timeline_flags, 0); + return kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_flags); +} + +void kbase_device_timeline_term(struct kbase_device *kbdev) +{ + kbase_timeline_term(kbdev->timeline); +} + +int kbase_device_vinstr_init(struct kbase_device *kbdev) +{ + return kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); +} + +void kbase_device_vinstr_term(struct kbase_device *kbdev) +{ + kbase_vinstr_term(kbdev->vinstr_ctx); +} + +int kbase_device_io_history_init(struct kbase_device *kbdev) +{ + return kbase_io_history_init(&kbdev->io_history, + KBASEP_DEFAULT_REGISTER_HISTORY_SIZE); +} + +void kbase_device_io_history_term(struct kbase_device *kbdev) +{ + kbase_io_history_term(&kbdev->io_history); +} + +int kbase_device_misc_register(struct kbase_device *kbdev) +{ + return misc_register(&kbdev->mdev); +} + +void kbase_device_misc_deregister(struct kbase_device *kbdev) +{ + misc_deregister(&kbdev->mdev); +} + +int kbase_device_list_init(struct kbase_device *kbdev) +{ + const struct list_head *dev_list; + + dev_list = kbase_device_get_list(); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_device_put_list(dev_list); + + return 0; +} + +void kbase_device_list_term(struct kbase_device *kbdev) +{ + const struct list_head *dev_list; + + dev_list = kbase_device_get_list(); + list_del(&kbdev->entry); + kbase_device_put_list(dev_list); +} + +const struct list_head *kbase_device_get_list(void) +{ + mutex_lock(&kbase_dev_list_lock); + return &kbase_dev_list; +} +KBASE_EXPORT_TEST_API(kbase_device_get_list); + +void kbase_device_put_list(const struct list_head *dev_list) +{ + mutex_unlock(&kbase_dev_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_device_put_list); + +int kbase_device_early_init(struct kbase_device *kbdev) +{ + int err; + + err = kbasep_platform_device_init(kbdev); + if (err) + return err; + + err = kbase_pm_runtime_init(kbdev); + if (err) + goto fail_runtime_pm; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + /* Find out GPU properties based on the GPU feature registers */ + kbase_gpuprops_set(kbdev); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + + err = kbase_install_interrupts(kbdev); + if (err) + goto fail_interrupts; + + return 0; + +fail_interrupts: + kbase_pm_runtime_term(kbdev); +fail_runtime_pm: + kbasep_platform_device_term(kbdev); + + return err; +} + +void kbase_device_early_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_release_interrupts(kbdev); + else + kbase_release_interrupts(kbdev); +#else + kbase_release_interrupts(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_pm_runtime_term(kbdev); + kbasep_platform_device_term(kbdev); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h new file mode 100644 index 0000000..16f1d70 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h @@ -0,0 +1,71 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +/** + * kbase_device_get_list - get device list. + * + * Get access to device list. + * + * Return: Pointer to the linked list head. + */ +const struct list_head *kbase_device_get_list(void); + +/** + * kbase_device_put_list - put device list. + * + * @dev_list: head of linked list containing device list. + * + * Put access to the device list. + */ +void kbase_device_put_list(const struct list_head *dev_list); + +/** + * Kbase_increment_device_id - increment device id. + * + * Used to increment device id on successful initialization of the device. + */ +void kbase_increment_device_id(void); + +/** + * kbase_device_init - Device initialisation. + * + * This is called from device probe to initialise various other + * components needed. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_device_init(struct kbase_device *kbdev); + +/** + * kbase_device_term - Device termination. + * + * This is called from device remove to terminate various components that + * were initialised during kbase_device_init. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + */ +void kbase_device_term(struct kbase_device *kbdev); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h new file mode 100644 index 0000000..9f96db0 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h @@ -0,0 +1,78 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +typedef int kbase_device_init_method(struct kbase_device *kbdev); +typedef void kbase_device_term_method(struct kbase_device *kbdev); + +/** + * struct kbase_device_init - Device init/term methods. + * @init: Function pointer to a initialise method. + * @term: Function pointer to a terminate method. + * @err_mes: Error message to be printed when init method fails. + */ +struct kbase_device_init { + kbase_device_init_method *init; + kbase_device_term_method *term; + char *err_mes; +}; + +int kbase_device_vinstr_init(struct kbase_device *kbdev); +void kbase_device_vinstr_term(struct kbase_device *kbdev); + +int kbase_device_timeline_init(struct kbase_device *kbdev); +void kbase_device_timeline_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); + +int kbase_device_hwcnt_virtualizer_init(struct kbase_device *kbdev); +void kbase_device_hwcnt_virtualizer_term(struct kbase_device *kbdev); + +int kbase_device_list_init(struct kbase_device *kbdev); +void kbase_device_list_term(struct kbase_device *kbdev); + +int kbase_device_io_history_init(struct kbase_device *kbdev); +void kbase_device_io_history_term(struct kbase_device *kbdev); + +int kbase_device_misc_register(struct kbase_device *kbdev); +void kbase_device_misc_deregister(struct kbase_device *kbdev); + +void kbase_device_id_init(struct kbase_device *kbdev); + +/** + * kbase_device_early_init - Perform any device-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_device_early_init(struct kbase_device *kbdev); + +/** + * kbase_device_early_term - Perform any device-specific termination. + * @kbdev: Device pointer + */ +void kbase_device_early_term(struct kbase_device *kbdev); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c new file mode 100644 index 0000000..63132dc --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#include "../mali_kbase_gpu_fault.h" + +const char *kbase_gpu_exception_name(u32 const exception_code) +{ + const char *e; + + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: + e = "NOT_STARTED/IDLE/OK"; + break; + case 0x01: + e = "DONE"; + break; + case 0x02: + e = "INTERRUPTED"; + break; + case 0x03: + e = "STOPPED"; + break; + case 0x04: + e = "TERMINATED"; + break; + case 0x08: + e = "ACTIVE"; + break; + /* Job exceptions */ + case 0x40: + e = "JOB_CONFIG_FAULT"; + break; + case 0x41: + e = "JOB_POWER_FAULT"; + break; + case 0x42: + e = "JOB_READ_FAULT"; + break; + case 0x43: + e = "JOB_WRITE_FAULT"; + break; + case 0x44: + e = "JOB_AFFINITY_FAULT"; + break; + case 0x48: + e = "JOB_BUS_FAULT"; + break; + case 0x50: + e = "INSTR_INVALID_PC"; + break; + case 0x51: + e = "INSTR_INVALID_ENC"; + break; + case 0x52: + e = "INSTR_TYPE_MISMATCH"; + break; + case 0x53: + e = "INSTR_OPERAND_FAULT"; + break; + case 0x54: + e = "INSTR_TLS_FAULT"; + break; + case 0x55: + e = "INSTR_BARRIER_FAULT"; + break; + case 0x56: + e = "INSTR_ALIGN_FAULT"; + break; + case 0x58: + e = "DATA_INVALID_FAULT"; + break; + case 0x59: + e = "TILE_RANGE_FAULT"; + break; + case 0x5A: + e = "ADDR_RANGE_FAULT"; + break; + case 0x60: + e = "OUT_OF_MEMORY"; + break; + /* GPU exceptions */ + case 0x80: + e = "DELAYED_BUS_FAULT"; + break; + case 0x88: + e = "SHAREABILITY_FAULT"; + break; + /* MMU exceptions */ + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + e = "TRANSLATION_FAULT"; + break; + case 0xC8: + e = "PERMISSION_FAULT"; + break; + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + e = "PERMISSION_FAULT"; + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + e = "TRANSTAB_BUS_FAULT"; + break; + case 0xD8: + e = "ACCESS_FLAG"; + break; + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + e = "ACCESS_FLAG"; + break; + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + e = "ADDRESS_SIZE_FAULT"; + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + e = "MEMORY_ATTRIBUTES_FAULT"; + break; + default: + e = "UNKNOWN"; + break; + }; + + return e; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c new file mode 100644 index 0000000..3128db4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c @@ -0,0 +1,41 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +const char *kbase_gpu_access_type_name(u32 fault_status) +{ + switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status)) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + return "ATOMIC"; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + return "READ"; + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + return "WRITE"; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + return "EXECUTE"; + default: + WARN_ON(1); + return NULL; + } +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.h diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h new file mode 100644 index 0000000..b59b9d1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h @@ -0,0 +1,59 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_FAULT_H_ +#define _KBASE_GPU_FAULT_H_ + +/** Returns the name associated with a Mali exception code + * + * @exception_code: exception code + * + * This function is called from the interrupt handler when a GPU fault occurs. + * + * Return: name associated with the exception code + */ +const char *kbase_gpu_exception_name(u32 exception_code); + +/** Returns the name associated with a Mali fatal exception code + * + * @fatal_exception_code: fatal exception code + * + * This function is called from the interrupt handler when a GPU fatal + * exception occurs. + * + * Return: name associated with the fatal exception code + */ +const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code); + +/** + * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE + * into string. + * @fault_status: value of FAULTSTATUS register. + * + * After MMU fault, this function can be used to get readable information about + * access_type of the MMU fault. + * + * Return: String of the access type. + */ +const char *kbase_gpu_access_type_name(u32 fault_status); + +#endif /* _KBASE_GPU_FAULT_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h new file mode 100644 index 0000000..759f30d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -0,0 +1,437 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_GPU_REGMAP_H_ +#define _KBASE_GPU_REGMAP_H_ + +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" +#include "backend/mali_kbase_gpu_regmap_jm.h" + +/* Begin Register Offsets */ +/* GPU control registers */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core + * supergroup are l2 coherent + */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ + +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +/* MMU address space control registers */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C + +/* End Register Offsets */ + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ + +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + +#ifdef CONFIG_MALI_DEBUG +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) +#else /* CONFIG_MALI_DEBUG */ +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) +#endif /* CONFIG_MALI_DEBUG */ + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin LPAE MMU TRANSTAB register values + */ +#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 +#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) +#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) +#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) +#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) +#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) + +#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) + +/* + * Begin MMU TRANSCFG register values + */ +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + +/* + * Begin Command Values + */ + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs + (deprecated - only for use with T60x) */ +#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then + flush all L2 caches then issue a flush region command to all MMUs */ + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 + +/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +/* There is no LPAE support for non-cacheable, since the memory type is always + * write-back. + * Marking this setting as reserved for LPAE + */ +#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) + +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +#define SC_TLS_HASH_ENABLE (1ul << 17) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +#define SC_VAR_ALGORITHM (1ul << 29) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +/* End TILER_CONFIG register */ + +/* L2_CONFIG register */ +#define L2_CONFIG_SIZE_SHIFT 16 +#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +#define L2_CONFIG_HASH_SHIFT 24 +#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +/* End L2_CONFIG register */ + +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) + +#endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild new file mode 100644 index 0000000..3d9cf80 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/Kbuild @@ -0,0 +1,28 @@ +# +# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + ipa/mali_kbase_ipa_simple.o \ + ipa/mali_kbase_ipa.o \ + ipa/mali_kbase_ipa_vinstr_g7x.o \ + ipa/mali_kbase_ipa_vinstr_common.o + +mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o \ No newline at end of file diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c new file mode 100644 index 0000000..d663ccb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c @@ -0,0 +1,675 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include +#include +#include +#include "mali_kbase.h" +#include "mali_kbase_ipa.h" +#include "mali_kbase_ipa_debugfs.h" +#include "mali_kbase_ipa_simple.h" +#include "backend/gpu/mali_kbase_pm_internal.h" + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#include +#else +#include +#define dev_pm_opp_find_freq_exact opp_find_freq_exact +#define dev_pm_opp_get_voltage opp_get_voltage +#define dev_pm_opp opp +#endif + +#define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" + +static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { + &kbase_simple_ipa_model_ops, + &kbase_g71_ipa_model_ops, + &kbase_g72_ipa_model_ops, + &kbase_g76_ipa_model_ops, + &kbase_g52_ipa_model_ops, + &kbase_g52_r1_ipa_model_ops, + &kbase_g51_ipa_model_ops, + &kbase_g77_ipa_model_ops, + &kbase_tnax_ipa_model_ops, + &kbase_tbex_ipa_model_ops +}; + +int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) +{ + int err = 0; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (model->ops->recalculate) { + err = model->ops->recalculate(model); + if (err) { + dev_err(model->kbdev->dev, + "recalculation of power model %s returned error %d\n", + model->ops->name, err); + } + } + + return err; +} + +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { + const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; + + if (!strcmp(ops->name, name)) + return ops; + } + + dev_err(kbdev->dev, "power model \'%s\' not found\n", name); + + return NULL; +} +KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); + +const char *kbase_ipa_model_name_from_id(u32 gpu_id) +{ + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { + case GPU_ID2_PRODUCT_TMIX: + return "mali-g71-power-model"; + case GPU_ID2_PRODUCT_THEX: + return "mali-g72-power-model"; + case GPU_ID2_PRODUCT_TNOX: + return "mali-g76-power-model"; + case GPU_ID2_PRODUCT_TSIX: + return "mali-g51-power-model"; + case GPU_ID2_PRODUCT_TGOX: + if ((gpu_id & GPU_ID2_VERSION_MAJOR) == + (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) + /* g52 aliased to g76 power-model's ops */ + return "mali-g52-power-model"; + else + return "mali-g52_r1-power-model"; + case GPU_ID2_PRODUCT_TNAX: + return "mali-tnax-power-model"; + case GPU_ID2_PRODUCT_TTRX: + return "mali-g77-power-model"; + case GPU_ID2_PRODUCT_TBEX: + return "mali-tbex-power-model"; + default: + return KBASE_IPA_FALLBACK_MODEL_NAME; + } +} +KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); + +static struct device_node *get_model_dt_node(struct kbase_ipa_model *model, + bool dt_required) +{ + struct device_node *model_dt_node; + char compat_string[64]; + + snprintf(compat_string, sizeof(compat_string), "arm,%s", + model->ops->name); + + /* of_find_compatible_node() will call of_node_put() on the root node, + * so take a reference on it first. + */ + of_node_get(model->kbdev->dev->of_node); + model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node, + NULL, compat_string); + if (!model_dt_node && !model->missing_dt_node_warning) { + if (dt_required) + dev_warn(model->kbdev->dev, + "Couldn't find power_model DT node matching \'%s\'\n", + compat_string); + model->missing_dt_node_warning = true; + } + + return model_dt_node; +} + +int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, + const char *name, s32 *addr, + size_t num_elems, bool dt_required) +{ + int err, i; + struct device_node *model_dt_node = get_model_dt_node(model, + dt_required); + char *origin; + + err = of_property_read_u32_array(model_dt_node, name, addr, num_elems); + /* We're done with model_dt_node now, so drop the reference taken in + * get_model_dt_node()/of_find_compatible_node(). + */ + of_node_put(model_dt_node); + + if (err && dt_required) { + memset(addr, 0, sizeof(s32) * num_elems); + dev_warn(model->kbdev->dev, + "Error %d, no DT entry: %s.%s = %zu*[0]\n", + err, model->ops->name, name, num_elems); + origin = "zero"; + } else if (err && !dt_required) { + origin = "default"; + } else /* !err */ { + origin = "DT"; + } + + /* Create a unique debugfs entry for each element */ + for (i = 0; i < num_elems; ++i) { + char elem_name[32]; + + if (num_elems == 1) + snprintf(elem_name, sizeof(elem_name), "%s", name); + else + snprintf(elem_name, sizeof(elem_name), "%s.%d", + name, i); + + dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n", + model->ops->name, elem_name, addr[i], origin); + + err = kbase_ipa_model_param_add(model, elem_name, + &addr[i], sizeof(s32), + PARAM_TYPE_S32); + if (err) + goto exit; + } +exit: + return err; +} + +int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, + const char *name, char *addr, + size_t size, bool dt_required) +{ + int err; + struct device_node *model_dt_node = get_model_dt_node(model, + dt_required); + const char *string_prop_value; + char *origin; + + err = of_property_read_string(model_dt_node, name, + &string_prop_value); + + /* We're done with model_dt_node now, so drop the reference taken in + * get_model_dt_node()/of_find_compatible_node(). + */ + of_node_put(model_dt_node); + + if (err && dt_required) { + strncpy(addr, "", size - 1); + dev_warn(model->kbdev->dev, + "Error %d, no DT entry: %s.%s = \'%s\'\n", + err, model->ops->name, name, addr); + err = 0; + origin = "zero"; + } else if (err && !dt_required) { + origin = "default"; + } else /* !err */ { + strncpy(addr, string_prop_value, size - 1); + origin = "DT"; + } + + addr[size - 1] = '\0'; + + dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n", + model->ops->name, name, string_prop_value, origin); + + err = kbase_ipa_model_param_add(model, name, addr, size, + PARAM_TYPE_STRING); + return err; +} + +void kbase_ipa_term_model(struct kbase_ipa_model *model) +{ + if (!model) + return; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (model->ops->term) + model->ops->term(model); + + kbase_ipa_model_param_free_all(model); + + kfree(model); +} +KBASE_EXPORT_TEST_API(kbase_ipa_term_model); + +struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, + const struct kbase_ipa_model_ops *ops) +{ + struct kbase_ipa_model *model; + int err; + + lockdep_assert_held(&kbdev->ipa.lock); + + if (!ops || !ops->name) + return NULL; + + model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL); + if (!model) + return NULL; + + model->kbdev = kbdev; + model->ops = ops; + INIT_LIST_HEAD(&model->params); + + err = model->ops->init(model); + if (err) { + dev_err(kbdev->dev, + "init of power model \'%s\' returned error %d\n", + ops->name, err); + kfree(model); + return NULL; + } + + err = kbase_ipa_model_recalculate(model); + if (err) { + kbase_ipa_term_model(model); + return NULL; + } + + return model; +} +KBASE_EXPORT_TEST_API(kbase_ipa_init_model); + +static void kbase_ipa_term_locked(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->ipa.lock); + + /* Clean up the models */ + if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) + kbase_ipa_term_model(kbdev->ipa.configured_model); + kbase_ipa_term_model(kbdev->ipa.fallback_model); + + kbdev->ipa.configured_model = NULL; + kbdev->ipa.fallback_model = NULL; +} + +int kbase_ipa_init(struct kbase_device *kbdev) +{ + + const char *model_name; + const struct kbase_ipa_model_ops *ops; + struct kbase_ipa_model *default_model = NULL; + int err; + + mutex_init(&kbdev->ipa.lock); + /* + * Lock during init to avoid warnings from lockdep_assert_held (there + * shouldn't be any concurrent access yet). + */ + mutex_lock(&kbdev->ipa.lock); + + /* The simple IPA model must *always* be present.*/ + ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME); + + default_model = kbase_ipa_init_model(kbdev, ops); + if (!default_model) { + err = -EINVAL; + goto end; + } + + kbdev->ipa.fallback_model = default_model; + err = of_property_read_string(kbdev->dev->of_node, + "ipa-model", + &model_name); + if (err) { + /* Attempt to load a match from GPU-ID */ + u32 gpu_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + model_name = kbase_ipa_model_name_from_id(gpu_id); + dev_dbg(kbdev->dev, + "Inferring model from GPU ID 0x%x: \'%s\'\n", + gpu_id, model_name); + err = 0; + } else { + dev_dbg(kbdev->dev, + "Using ipa-model parameter from DT: \'%s\'\n", + model_name); + } + + if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) { + ops = kbase_ipa_model_ops_find(kbdev, model_name); + kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops); + if (!kbdev->ipa.configured_model) { + dev_warn(kbdev->dev, + "Failed to initialize ipa-model: \'%s\'\n" + "Falling back on default model\n", + model_name); + kbdev->ipa.configured_model = default_model; + } + } else { + kbdev->ipa.configured_model = default_model; + } + +end: + if (err) + kbase_ipa_term_locked(kbdev); + else + dev_info(kbdev->dev, + "Using configured power model %s, and fallback %s\n", + kbdev->ipa.configured_model->ops->name, + kbdev->ipa.fallback_model->ops->name); + + mutex_unlock(&kbdev->ipa.lock); + return err; +} +KBASE_EXPORT_TEST_API(kbase_ipa_init); + +void kbase_ipa_term(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->ipa.lock); + kbase_ipa_term_locked(kbdev); + mutex_unlock(&kbdev->ipa.lock); + + mutex_destroy(&kbdev->ipa.lock); +} +KBASE_EXPORT_TEST_API(kbase_ipa_term); + +/** + * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP + * @c: Dynamic model coefficient, in pW/(Hz V^2). Should be in range + * 0 < c < 2^26 to prevent overflow. + * @freq: Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz) + * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) + * + * Keep a record of the approximate range of each value at every stage of the + * calculation, to ensure we don't overflow. This makes heavy use of the + * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual + * calculations in decimal for increased accuracy. + * + * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ +static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, + const u32 voltage) +{ + /* Range: 2^8 < v2 < 2^16 m(V^2) */ + const u32 v2 = (voltage * voltage) / 1000; + + /* Range: 2^3 < f_MHz < 2^10 MHz */ + const u32 f_MHz = freq / 1000000; + + /* Range: 2^11 < v2f_big < 2^26 kHz V^2 */ + const u32 v2f_big = v2 * f_MHz; + + /* Range: 2^1 < v2f < 2^16 MHz V^2 */ + const u32 v2f = v2f_big / 1000; + + /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. + * Must be < 2^42 to avoid overflowing the return value. */ + const u64 v2fc = (u64) c * (u64) v2f; + + /* Range: 0 < v2fc / 1000 < 2^13 mW */ + return div_u64(v2fc, 1000); +} + +/** + * kbase_scale_static_power() - Scale a static power coefficient to an OPP + * @c: Static model coefficient, in uW/V^3. Should be in range + * 0 < c < 2^32 to prevent overflow. + * @voltage: Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V) + * + * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W) + */ +u32 kbase_scale_static_power(const u32 c, const u32 voltage) +{ + /* Range: 2^8 < v2 < 2^16 m(V^2) */ + const u32 v2 = (voltage * voltage) / 1000; + + /* Range: 2^17 < v3_big < 2^29 m(V^2) mV */ + const u32 v3_big = v2 * voltage; + + /* Range: 2^7 < v3 < 2^19 m(V^3) */ + const u32 v3 = v3_big / 1000; + + /* + * Range (working backwards from next line): 0 < v3c_big < 2^33 nW. + * The result should be < 2^52 to avoid overflowing the return value. + */ + const u64 v3c_big = (u64) c * (u64) v3; + + /* Range: 0 < v3c_big / 1000000 < 2^13 mW */ + return div_u64(v3c_big, 1000000); +} + +void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Record the event of GPU entering protected mode. */ + kbdev->ipa_protection_mode_switched = true; +} + +static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev) +{ + struct kbase_ipa_model *model; + unsigned long flags; + + lockdep_assert_held(&kbdev->ipa.lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->ipa_protection_mode_switched || + kbdev->ipa.force_fallback_model) + model = kbdev->ipa.fallback_model; + else + model = kbdev->ipa.configured_model; + + /* + * Having taken cognizance of the fact that whether GPU earlier + * protected mode or not, the event can be now reset (if GPU is not + * currently in protected mode) so that configured model is used + * for the next sample. + */ + if (!kbdev->protected_mode) + kbdev->ipa_protection_mode_switched = false; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return model; +} + +static u32 get_static_power_locked(struct kbase_device *kbdev, + struct kbase_ipa_model *model, + unsigned long voltage) +{ + u32 power = 0; + int err; + u32 power_coeff; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (!model->ops->get_static_coeff) + model = kbdev->ipa.fallback_model; + + if (model->ops->get_static_coeff) { + err = model->ops->get_static_coeff(model, &power_coeff); + if (!err) + power = kbase_scale_static_power(power_coeff, + (u32) voltage); + } + + return power; +} + +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +static unsigned long kbase_get_static_power(struct devfreq *df, + unsigned long voltage) +#else +static unsigned long kbase_get_static_power(unsigned long voltage) +#endif +{ + struct kbase_ipa_model *model; + u32 power = 0; +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); +#else + struct kbase_device *kbdev = kbase_find_device(-1); +#endif + + if (!kbdev) + return 0ul; + + mutex_lock(&kbdev->ipa.lock); + + model = get_current_model(kbdev); + power = get_static_power_locked(kbdev, model, voltage); + + mutex_unlock(&kbdev->ipa.lock); + +#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) + kbase_release_device(kbdev); +#endif + + return power; +} + +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +static unsigned long kbase_get_dynamic_power(struct devfreq *df, + unsigned long freq, + unsigned long voltage) +#else +static unsigned long kbase_get_dynamic_power(unsigned long freq, + unsigned long voltage) +#endif +{ + struct kbase_ipa_model *model; + u32 power_coeff = 0, power = 0; + int err = 0; +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); +#else + struct kbase_device *kbdev = kbase_find_device(-1); +#endif + + if (!kbdev) + return 0ul; + + mutex_lock(&kbdev->ipa.lock); + + model = kbdev->ipa.fallback_model; + + err = model->ops->get_dynamic_coeff(model, &power_coeff); + + if (!err) + power = kbase_scale_dynamic_power(power_coeff, freq, voltage); + else + dev_err_ratelimited(kbdev->dev, + "Model %s returned error code %d\n", + model->ops->name, err); + + mutex_unlock(&kbdev->ipa.lock); + +#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) + kbase_release_device(kbdev); +#endif + + return power; +} + +int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + unsigned long freq, + unsigned long voltage) +{ + struct kbase_ipa_model *model; + u32 power_coeff = 0; + int err = 0; + struct kbasep_pm_metrics diff; + u64 total_time; + + lockdep_assert_held(&kbdev->ipa.lock); + + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff); + + model = get_current_model(kbdev); + + err = model->ops->get_dynamic_coeff(model, &power_coeff); + + /* If the counter model returns an error (e.g. switching back to + * protected mode and failing to read counters, or a counter sample + * with too few cycles), revert to the fallback model. + */ + if (err && model != kbdev->ipa.fallback_model) { + model = kbdev->ipa.fallback_model; + err = model->ops->get_dynamic_coeff(model, &power_coeff); + } + + if (err) + return err; + + *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); + + /* time_busy / total_time cannot be >1, so assigning the 64-bit + * result of div_u64 to *power cannot overflow. + */ + total_time = diff.time_busy + (u64) diff.time_idle; + *power = div_u64(*power * (u64) diff.time_busy, + max(total_time, 1ull)); + + *power += get_static_power_locked(kbdev, model, voltage); + + return err; +} +KBASE_EXPORT_TEST_API(kbase_get_real_power_locked); + +int kbase_get_real_power(struct devfreq *df, u32 *power, + unsigned long freq, + unsigned long voltage) +{ + int ret; + struct kbase_device *kbdev = dev_get_drvdata(&df->dev); + + if (!kbdev) + return -ENODEV; + + mutex_lock(&kbdev->ipa.lock); + ret = kbase_get_real_power_locked(kbdev, power, freq, voltage); + mutex_unlock(&kbdev->ipa.lock); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_get_real_power); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +struct devfreq_cooling_ops kbase_ipa_power_model_ops = { +#else +struct devfreq_cooling_power kbase_ipa_power_model_ops = { +#endif +#ifdef CONFIG_MALI_DEVFREQ + .get_static_power = &kbase_get_static_power, + .get_dynamic_power = &kbase_get_dynamic_power, +#endif +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + .get_real_power = &kbase_get_real_power, +#endif +}; +KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h new file mode 100644 index 0000000..92aace9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h @@ -0,0 +1,253 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_H_ +#define _KBASE_IPA_H_ + +#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + +struct devfreq; + +/** + * struct kbase_ipa_model - Object describing a particular IPA model. + * @kbdev: pointer to kbase device + * @model_data: opaque pointer to model specific data, accessed + * only by model specific methods. + * @ops: pointer to object containing model specific methods. + * @params: head of the list of debugfs params added for model + * @missing_dt_node_warning: flag to limit the matching power model DT not found + * warning to once. + */ +struct kbase_ipa_model { + struct kbase_device *kbdev; + void *model_data; + const struct kbase_ipa_model_ops *ops; + struct list_head params; + bool missing_dt_node_warning; +}; + +/** + * kbase_ipa_model_add_param_s32 - Add an integer model parameter + * @model: pointer to IPA model + * @name: name of corresponding debugfs entry + * @addr: address where the value is stored + * @num_elems: number of elements (1 if not an array) + * @dt_required: if false, a corresponding devicetree entry is not required, + * and the current value will be used. If true, a warning is + * output and the data is zeroed + * + * Return: 0 on success, or an error code + */ +int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model, + const char *name, s32 *addr, + size_t num_elems, bool dt_required); + +/** + * kbase_ipa_model_add_param_string - Add a string model parameter + * @model: pointer to IPA model + * @name: name of corresponding debugfs entry + * @addr: address where the value is stored + * @size: size, in bytes, of the value storage (so the maximum string + * length is size - 1) + * @dt_required: if false, a corresponding devicetree entry is not required, + * and the current value will be used. If true, a warning is + * output and the data is zeroed + * + * Return: 0 on success, or an error code + */ +int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model, + const char *name, char *addr, + size_t size, bool dt_required); + +struct kbase_ipa_model_ops { + char *name; + /* The init, recalculate and term ops on the default model are always + * called. However, all the other models are only invoked if the model + * is selected in the device tree. Otherwise they are never + * initialized. Additional resources can be acquired by models in + * init(), however they must be terminated in the term(). + */ + int (*init)(struct kbase_ipa_model *model); + /* Called immediately after init(), or when a parameter is changed, so + * that any coefficients derived from model parameters can be + * recalculated. */ + int (*recalculate)(struct kbase_ipa_model *model); + void (*term)(struct kbase_ipa_model *model); + /* + * get_dynamic_coeff() - calculate dynamic power coefficient + * @model: pointer to model + * @coeffp: pointer to return value location + * + * Calculate a dynamic power coefficient, with units pW/(Hz V^2), which + * is then scaled by the IPA framework according to the current OPP's + * frequency and voltage. + * + * Return: 0 on success, or an error code. + */ + int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); + /* + * get_static_coeff() - calculate static power coefficient + * @model: pointer to model + * @coeffp: pointer to return value location + * + * Calculate a static power coefficient, with units uW/(V^3), which is + * scaled by the IPA framework according to the current OPP's voltage. + * + * Return: 0 on success, or an error code. + */ + int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); +}; + +/** + * kbase_ipa_init - Initialize the IPA feature + * @kbdev: pointer to kbase device + * + * simple IPA power model is initialized as a fallback model and if that + * initialization fails then IPA is not used. + * The device tree is read for the name of ipa model to be used, by using the + * property string "ipa-model". If that ipa model is supported then it is + * initialized but if the initialization fails then simple power model is used. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_init(struct kbase_device *kbdev); + +/** + * kbase_ipa_term - Terminate the IPA feature + * @kbdev: pointer to kbase device + * + * Both simple IPA power model and model retrieved from device tree are + * terminated. + */ +void kbase_ipa_term(struct kbase_device *kbdev); + +/** + * kbase_ipa_model_recalculate - Recalculate the model coefficients + * @model: pointer to the IPA model object, already initialized + * + * It shall be called immediately after the model has been initialized + * or when the model parameter has changed, so that any coefficients + * derived from parameters can be recalculated. + * Its a wrapper for the module specific recalculate() method. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); + +/** + * kbase_ipa_model_ops_find - Lookup an IPA model using its name + * @kbdev: pointer to kbase device + * @name: name of model to lookup + * + * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed. + */ +const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, + const char *name); + +/** + * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID + * @gpu_id: GPU ID of GPU the model will be used for + * + * Return: The name of the appropriate counter-based model, or the name of the + * fallback model if no counter model exists. + */ +const char *kbase_ipa_model_name_from_id(u32 gpu_id); + +/** + * kbase_ipa_init_model - Initilaize the particular IPA model + * @kbdev: pointer to kbase device + * @ops: pointer to object containing model specific methods. + * + * Initialize the model corresponding to the @ops pointer passed. + * The init() method specified in @ops would be called. + * + * Return: pointer to kbase_ipa_model on success, NULL on error + */ +struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, + const struct kbase_ipa_model_ops *ops); +/** + * kbase_ipa_term_model - Terminate the particular IPA model + * @model: pointer to the IPA model object, already initialized + * + * Terminate the model, using the term() method. + * Module specific parameters would be freed. + */ +void kbase_ipa_term_model(struct kbase_ipa_model *model); + +/** + * kbase_ipa_protection_mode_switch_event - Inform IPA of the GPU's entry into + * protected mode + * @kbdev: pointer to kbase device + * + * Makes IPA aware of the GPU switching to protected mode. + */ +void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); + +extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops; +extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops; + +/** + * kbase_get_real_power() - get the real power consumption of the GPU + * @df: dynamic voltage and frequency scaling information for the GPU. + * @power: where to store the power consumption, in mW. + * @freq: a frequency, in HZ. + * @voltage: a voltage, in mV. + * + * The returned value incorporates both static and dynamic power consumption. + * + * Return: 0 on success, or an error code. + */ +int kbase_get_real_power(struct devfreq *df, u32 *power, + unsigned long freq, + unsigned long voltage); + +#if MALI_UNIT_TEST +/* Called by kbase_get_real_power() to invoke the power models. + * Must be called with kbdev->ipa.lock held. + * This function is only exposed for use by unit tests. + */ +int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, + unsigned long freq, + unsigned long voltage); +#endif /* MALI_UNIT_TEST */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; +#else +extern struct devfreq_cooling_power kbase_ipa_power_model_ops; +#endif + +#else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +static inline void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev) +{ } + +#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c new file mode 100644 index 0000000..30a3b7d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c @@ -0,0 +1,322 @@ +/* + * + * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_ipa.h" +#include "mali_kbase_ipa_debugfs.h" + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) +#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE +#endif + +struct kbase_ipa_model_param { + char *name; + union { + void *voidp; + s32 *s32p; + char *str; + } addr; + size_t size; + enum kbase_ipa_model_param_type type; + struct kbase_ipa_model *model; + struct list_head link; +}; + +static int param_int_get(void *data, u64 *val) +{ + struct kbase_ipa_model_param *param = data; + + mutex_lock(¶m->model->kbdev->ipa.lock); + *(s64 *) val = *param->addr.s32p; + mutex_unlock(¶m->model->kbdev->ipa.lock); + + return 0; +} + +static int param_int_set(void *data, u64 val) +{ + struct kbase_ipa_model_param *param = data; + struct kbase_ipa_model *model = param->model; + s64 sval = (s64) val; + s32 old_val; + int err = 0; + + if (sval < S32_MIN || sval > S32_MAX) + return -ERANGE; + + mutex_lock(¶m->model->kbdev->ipa.lock); + old_val = *param->addr.s32p; + *param->addr.s32p = val; + err = kbase_ipa_model_recalculate(model); + if (err < 0) + *param->addr.s32p = old_val; + mutex_unlock(¶m->model->kbdev->ipa.lock); + + return err; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_s32, param_int_get, param_int_set, "%lld\n"); + +static ssize_t param_string_get(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct kbase_ipa_model_param *param = file->private_data; + ssize_t ret; + size_t len; + + mutex_lock(¶m->model->kbdev->ipa.lock); + len = strnlen(param->addr.str, param->size - 1) + 1; + ret = simple_read_from_buffer(user_buf, count, ppos, + param->addr.str, len); + mutex_unlock(¶m->model->kbdev->ipa.lock); + + return ret; +} + +static ssize_t param_string_set(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct kbase_ipa_model_param *param = file->private_data; + struct kbase_ipa_model *model = param->model; + char *old_str = NULL; + ssize_t ret = count; + size_t buf_size; + int err; + + mutex_lock(&model->kbdev->ipa.lock); + + if (count > param->size) { + ret = -EINVAL; + goto end; + } + + old_str = kstrndup(param->addr.str, param->size, GFP_KERNEL); + if (!old_str) { + ret = -ENOMEM; + goto end; + } + + buf_size = min(param->size - 1, count); + if (copy_from_user(param->addr.str, user_buf, buf_size)) { + ret = -EFAULT; + goto end; + } + + param->addr.str[buf_size] = '\0'; + + err = kbase_ipa_model_recalculate(model); + if (err < 0) { + ret = err; + strlcpy(param->addr.str, old_str, param->size); + } + +end: + kfree(old_str); + mutex_unlock(&model->kbdev->ipa.lock); + + return ret; +} + +static const struct file_operations fops_string = { + .owner = THIS_MODULE, + .read = param_string_get, + .write = param_string_set, + .open = simple_open, + .llseek = default_llseek, +}; + +int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, + void *addr, size_t size, + enum kbase_ipa_model_param_type type) +{ + struct kbase_ipa_model_param *param; + + param = kzalloc(sizeof(*param), GFP_KERNEL); + + if (!param) + return -ENOMEM; + + /* 'name' is stack-allocated for array elements, so copy it into + * heap-allocated storage */ + param->name = kstrdup(name, GFP_KERNEL); + + if (!param->name) { + kfree(param); + return -ENOMEM; + } + + param->addr.voidp = addr; + param->size = size; + param->type = type; + param->model = model; + + list_add(¶m->link, &model->params); + + return 0; +} + +void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_param *param_p, *param_n; + + list_for_each_entry_safe(param_p, param_n, &model->params, link) { + list_del(¶m_p->link); + kfree(param_p->name); + kfree(param_p); + } +} + +static int force_fallback_model_get(void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + + mutex_lock(&kbdev->ipa.lock); + *val = kbdev->ipa.force_fallback_model; + mutex_unlock(&kbdev->ipa.lock); + + return 0; +} + +static int force_fallback_model_set(void *data, u64 val) +{ + struct kbase_device *kbdev = data; + + mutex_lock(&kbdev->ipa.lock); + kbdev->ipa.force_fallback_model = (val ? true : false); + mutex_unlock(&kbdev->ipa.lock); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model, + force_fallback_model_get, + force_fallback_model_set, + "%llu\n"); + +static int current_power_get(void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + struct devfreq *df = kbdev->devfreq; + u32 power; + + kbase_pm_context_active(kbdev); + /* The current model assumes that there's no more than one voltage + * regulator currently available in the system. + */ + kbase_get_real_power(df, &power, + kbdev->current_nominal_freq, + (kbdev->current_voltages[0] / 1000)); + kbase_pm_context_idle(kbdev); + + *val = power; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(current_power, current_power_get, NULL, "%llu\n"); + +static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model) +{ + struct list_head *it; + struct dentry *dir; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + dir = debugfs_create_dir(model->ops->name, + model->kbdev->mali_debugfs_directory); + + if (!dir) { + dev_err(model->kbdev->dev, + "Couldn't create mali debugfs %s directory", + model->ops->name); + return; + } + + list_for_each(it, &model->params) { + struct kbase_ipa_model_param *param = + list_entry(it, + struct kbase_ipa_model_param, + link); + const struct file_operations *fops = NULL; + + switch (param->type) { + case PARAM_TYPE_S32: + fops = &fops_s32; + break; + case PARAM_TYPE_STRING: + fops = &fops_string; + break; + } + + if (unlikely(!fops)) { + dev_err(model->kbdev->dev, + "Type not set for %s parameter %s\n", + model->ops->name, param->name); + } else { + debugfs_create_file(param->name, S_IRUGO | S_IWUSR, + dir, param, fops); + } + } +} + +void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, + const char *name, s32 val) +{ + struct kbase_ipa_model_param *param; + + mutex_lock(&model->kbdev->ipa.lock); + + list_for_each_entry(param, &model->params, link) { + if (!strcmp(param->name, name)) { + if (param->type == PARAM_TYPE_S32) { + *param->addr.s32p = val; + } else { + dev_err(model->kbdev->dev, + "Wrong type for %s parameter %s\n", + model->ops->name, param->name); + } + break; + } + } + + mutex_unlock(&model->kbdev->ipa.lock); +} +KBASE_EXPORT_TEST_API(kbase_ipa_model_param_set_s32); + +void kbase_ipa_debugfs_init(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->ipa.lock); + + if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model) + kbase_ipa_model_debugfs_init(kbdev->ipa.configured_model); + kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model); + + debugfs_create_file("ipa_current_power", 0444, + kbdev->mali_debugfs_directory, kbdev, ¤t_power); + debugfs_create_file("ipa_force_fallback_model", 0644, + kbdev->mali_debugfs_directory, kbdev, &force_fallback_model); + + mutex_unlock(&kbdev->ipa.lock); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h new file mode 100644 index 0000000..a983d9c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h @@ -0,0 +1,68 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_DEBUGFS_H_ +#define _KBASE_IPA_DEBUGFS_H_ + +enum kbase_ipa_model_param_type { + PARAM_TYPE_S32 = 1, + PARAM_TYPE_STRING, +}; + +#ifdef CONFIG_DEBUG_FS + +void kbase_ipa_debugfs_init(struct kbase_device *kbdev); +int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, + void *addr, size_t size, + enum kbase_ipa_model_param_type type); +void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model); + +/** + * kbase_ipa_model_param_set_s32 - Set an integer model parameter + * + * @model: pointer to IPA model + * @name: name of corresponding debugfs entry + * @val: new value of the parameter + * + * This function is only exposed for use by unit tests running in + * kernel space. Normally it is expected that parameter values will + * instead be set via debugfs. + */ +void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, + const char *name, s32 val); + +#else /* CONFIG_DEBUG_FS */ + +static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, + const char *name, void *addr, + size_t size, + enum kbase_ipa_model_param_type type) +{ + return 0; +} + +static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) +{ } + +#endif /* CONFIG_DEBUG_FS */ + +#endif /* _KBASE_IPA_DEBUGFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c new file mode 100644 index 0000000..852559e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c @@ -0,0 +1,351 @@ +/* + * + * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_ipa_simple.h" +#include "mali_kbase_ipa_debugfs.h" + +#if MALI_UNIT_TEST + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +static unsigned long dummy_temp; + +static int kbase_simple_power_model_get_dummy_temp( + struct thermal_zone_device *tz, + unsigned long *temp) +{ + *temp = READ_ONCE(dummy_temp); + return 0; +} + +#else +static int dummy_temp; + +static int kbase_simple_power_model_get_dummy_temp( + struct thermal_zone_device *tz, + int *temp) +{ + *temp = READ_ONCE(dummy_temp); + return 0; +} +#endif + +/* Intercept calls to the kernel function using a macro */ +#ifdef thermal_zone_get_temp +#undef thermal_zone_get_temp +#endif +#define thermal_zone_get_temp(tz, temp) \ + kbase_simple_power_model_get_dummy_temp(tz, temp) + +void kbase_simple_power_model_set_dummy_temp(int temp) +{ + WRITE_ONCE(dummy_temp, temp); +} +KBASE_EXPORT_TEST_API(kbase_simple_power_model_set_dummy_temp); + +#endif /* MALI_UNIT_TEST */ + +/* + * This model is primarily designed for the Juno platform. It may not be + * suitable for other platforms. The additional resources in this model + * should preferably be minimal, as this model is rarely used when a dynamic + * model is available. + */ + +/** + * struct kbase_ipa_model_simple_data - IPA context per device + * @dynamic_coefficient: dynamic coefficient of the model + * @static_coefficient: static coefficient of the model + * @ts: Thermal scaling coefficients of the model + * @tz_name: Thermal zone name + * @gpu_tz: thermal zone device + * @poll_temperature_thread: Handle for temperature polling thread + * @current_temperature: Most recent value of polled temperature + * @temperature_poll_interval_ms: How often temperature should be checked, in ms + */ + +struct kbase_ipa_model_simple_data { + u32 dynamic_coefficient; + u32 static_coefficient; + s32 ts[4]; + char tz_name[THERMAL_NAME_LENGTH]; + struct thermal_zone_device *gpu_tz; + struct task_struct *poll_temperature_thread; + int current_temperature; + int temperature_poll_interval_ms; +}; +#define FALLBACK_STATIC_TEMPERATURE 55000 + +/** + * calculate_temp_scaling_factor() - Calculate temperature scaling coefficient + * @ts: Signed coefficients, in order t^0 to t^3, with units Deg^-N + * @t: Temperature, in mDeg C. Range: -2^17 < t < 2^17 + * + * Scale the temperature according to a cubic polynomial whose coefficients are + * provided in the device tree. The result is used to scale the static power + * coefficient, where 1000000 means no change. + * + * Return: Temperature scaling factor. Range 0 <= ret <= 10,000,000. + */ +static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) +{ + /* Range: -2^24 < t2 < 2^24 m(Deg^2) */ + const s64 t2 = div_s64((t * t), 1000); + + /* Range: -2^31 < t3 < 2^31 m(Deg^3) */ + const s64 t3 = div_s64((t * t2), 1000); + + /* + * Sum the parts. t^[1-3] are in m(Deg^N), but the coefficients are in + * Deg^-N, so we need to multiply the last coefficient by 1000. + * Range: -2^63 < res_big < 2^63 + */ + const s64 res_big = ts[3] * t3 /* +/- 2^62 */ + + ts[2] * t2 /* +/- 2^55 */ + + ts[1] * t /* +/- 2^48 */ + + ts[0] * (s64)1000; /* +/- 2^41 */ + + /* Range: -2^60 < res_unclamped < 2^60 */ + s64 res_unclamped = div_s64(res_big, 1000); + + /* Clamp to range of 0x to 10x the static power */ + return clamp(res_unclamped, (s64) 0, (s64) 10000000); +} + +/* We can't call thermal_zone_get_temp() directly in model_static_coeff(), + * because we don't know if tz->lock is held in the same thread. So poll it in + * a separate thread to get around this. */ +static int poll_temperature(void *data) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) data; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + unsigned long temp; +#else + int temp; +#endif + + while (!kthread_should_stop()) { + struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); + + if (tz) { + int ret; + + ret = thermal_zone_get_temp(tz, &temp); + if (ret) { + pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", + ret); + temp = FALLBACK_STATIC_TEMPERATURE; + } + } else { + temp = FALLBACK_STATIC_TEMPERATURE; + } + + WRITE_ONCE(model_data->current_temperature, temp); + + msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms)); + } + + return 0; +} + +static int model_static_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + u32 temp_scaling_factor; + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) model->model_data; + u64 coeff_big; + int temp; + + temp = READ_ONCE(model_data->current_temperature); + + /* Range: 0 <= temp_scaling_factor < 2^24 */ + temp_scaling_factor = calculate_temp_scaling_factor(model_data->ts, + temp); + + /* + * Range: 0 <= coeff_big < 2^52 to avoid overflowing *coeffp. This + * means static_coefficient must be in range + * 0 <= static_coefficient < 2^28. + */ + coeff_big = (u64) model_data->static_coefficient * (u64) temp_scaling_factor; + *coeffp = div_u64(coeff_big, 1000000); + + return 0; +} + +static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *) model->model_data; + + *coeffp = model_data->dynamic_coefficient; + + return 0; +} + +static int add_params(struct kbase_ipa_model *model) +{ + int err = 0; + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *)model->model_data; + + err = kbase_ipa_model_add_param_s32(model, "static-coefficient", + &model_data->static_coefficient, + 1, true); + if (err) + goto end; + + err = kbase_ipa_model_add_param_s32(model, "dynamic-coefficient", + &model_data->dynamic_coefficient, + 1, true); + if (err) + goto end; + + err = kbase_ipa_model_add_param_s32(model, "ts", + model_data->ts, 4, true); + if (err) + goto end; + + err = kbase_ipa_model_add_param_string(model, "thermal-zone", + model_data->tz_name, + sizeof(model_data->tz_name), true); + if (err) + goto end; + + model_data->temperature_poll_interval_ms = 200; + err = kbase_ipa_model_add_param_s32(model, "temp-poll-interval-ms", + &model_data->temperature_poll_interval_ms, + 1, false); + +end: + return err; +} + +static int kbase_simple_power_model_init(struct kbase_ipa_model *model) +{ + int err; + struct kbase_ipa_model_simple_data *model_data; + + model_data = kzalloc(sizeof(struct kbase_ipa_model_simple_data), + GFP_KERNEL); + if (!model_data) + return -ENOMEM; + + model->model_data = (void *) model_data; + + model_data->current_temperature = FALLBACK_STATIC_TEMPERATURE; + model_data->poll_temperature_thread = kthread_run(poll_temperature, + (void *) model_data, + "mali-simple-power-model-temp-poll"); + if (IS_ERR(model_data->poll_temperature_thread)) { + err = PTR_ERR(model_data->poll_temperature_thread); + kfree(model_data); + return err; + } + + err = add_params(model); + if (err) { + kbase_ipa_model_param_free_all(model); + kthread_stop(model_data->poll_temperature_thread); + kfree(model_data); + } + + return err; +} + +static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *)model->model_data; + struct thermal_zone_device *tz; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + if (!strnlen(model_data->tz_name, sizeof(model_data->tz_name))) { + model_data->gpu_tz = NULL; + } else { + char tz_name[THERMAL_NAME_LENGTH]; + + strlcpy(tz_name, model_data->tz_name, sizeof(tz_name)); + + /* Release ipa.lock so that thermal_list_lock is not acquired + * with ipa.lock held, thereby avoid lock ordering violation + * lockdep warning. The warning comes as a chain of locks + * ipa.lock --> thermal_list_lock --> tz->lock gets formed + * on registering devfreq cooling device when probe method + * of mali platform driver is invoked. + */ + mutex_unlock(&model->kbdev->ipa.lock); + tz = thermal_zone_get_zone_by_name(tz_name); + mutex_lock(&model->kbdev->ipa.lock); + + if (IS_ERR_OR_NULL(tz)) { + pr_warn_ratelimited("Error %ld getting thermal zone \'%s\', not yet ready?\n", + PTR_ERR(tz), tz_name); + return -EPROBE_DEFER; + } + + /* Check if another thread raced against us & updated the + * thermal zone name string. Update the gpu_tz pointer only if + * the name string did not change whilst we retrieved the new + * thermal_zone_device pointer, otherwise model_data->tz_name & + * model_data->gpu_tz would become inconsistent with each other. + * The below check will succeed only for the thread which last + * updated the name string. + */ + if (strncmp(tz_name, model_data->tz_name, sizeof(tz_name)) == 0) + model_data->gpu_tz = tz; + } + + return 0; +} + +static void kbase_simple_power_model_term(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_simple_data *model_data = + (struct kbase_ipa_model_simple_data *)model->model_data; + + kthread_stop(model_data->poll_temperature_thread); + + kfree(model_data); +} + +struct kbase_ipa_model_ops kbase_simple_ipa_model_ops = { + .name = "mali-simple-power-model", + .init = &kbase_simple_power_model_init, + .recalculate = &kbase_simple_power_model_recalculate, + .term = &kbase_simple_power_model_term, + .get_dynamic_coeff = &model_dynamic_coeff, + .get_static_coeff = &model_static_coeff, +}; +KBASE_EXPORT_TEST_API(kbase_simple_ipa_model_ops); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h new file mode 100644 index 0000000..fed67d5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_IPA_SIMPLE_H_ +#define _KBASE_IPA_SIMPLE_H_ + +#if defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL) + +extern struct kbase_ipa_model_ops kbase_simple_ipa_model_ops; + +#if MALI_UNIT_TEST +/** + * kbase_simple_power_model_set_dummy_temp() - set a dummy temperature value + * @temp: Temperature of the thermal zone, in millidegrees celsius. + * + * This is only intended for use in unit tests, to ensure that the temperature + * values used by the simple power model are predictable. Deterministic + * behavior is necessary to allow validation of the static power values + * computed by this model. + */ +void kbase_simple_power_model_set_dummy_temp(int temp); +#endif /* MALI_UNIT_TEST */ + +#endif /* (defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ + +#endif /* _KBASE_IPA_SIMPLE_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_base_jm_kernel.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h new file mode 100644 index 0000000..aac561b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h @@ -0,0 +1,818 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Definitions (types, defines, etcs) specific to Job Manager Kbase. + * They are placed here to allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_JM_DEFS_H_ +#define _KBASE_JM_DEFS_H_ + +#include "mali_kbase_js_defs.h" + +/* Dump Job slot trace on error (only active if KBASE_KTRACE_ENABLE != 0) */ +#define KBASE_KTRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/* + * Number of milliseconds before resetting the GPU when a job cannot be "zapped" + * from the hardware. Note that the time is actually + * ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and + * the GPU actually being reset to give other contexts time for their jobs + * to be soft-stopped and removed from the hardware before resetting. + */ +#define ZAP_TIMEOUT 1000 + +/* + * Prevent soft-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Therefore, soft stop may still be disabled due to HW issues. + * + * Soft stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 + +/* + * Prevent hard-stops from occurring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more + * predictable. + * + * Hard stop will still be used for non-scheduling purposes e.g. when + * terminating a context. + * + * if not in use, define this value to 0 instead of being undefined. + */ +#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/* Atom has been previously soft-stopped */ +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED (1<<1) +/* Atom has been previously retried to execute */ +#define KBASE_KATOM_FLAGS_RERUN (1<<2) +/* Atom submitted with JOB_CHAIN_FLAG bit set in JS_CONFIG_NEXT register, helps + * to disambiguate short-running job chains during soft/hard stopping of jobs + */ +#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) +/* Atom has been previously hard-stopped. */ +#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) +/* Atom has caused us to enter disjoint state */ +#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +/* Atom blocked on cross-slot dependency */ +#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) +/* Atom has fail dependency on cross-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) +/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ +#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) +/* Atom is currently holding a context reference */ +#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) +/* Atom requires GPU to be in protected mode */ +#define KBASE_KATOM_FLAG_PROTECTED (1<<11) +/* Atom has been stored in runnable_tree */ +#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +/* Atom is waiting for L2 caches to power up in order to enter protected mode */ +#define KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT (1<<13) + +/* SW related flags about types of JS_COMMAND action + * NOTE: These must be masked off by JS_COMMAND_MASK + */ + +/* This command causes a disjoint event */ +#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + +/* Bitmask of all SW related flags */ +#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + +#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) +#error "JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK." \ + "Must update JS_COMMAND_SW_<..> bitmasks" +#endif + +/* Soft-stop command that causes a Disjoint event. This of course isn't + * entirely masked off by JS_COMMAND_MASK + */ +#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ + (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + +#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + +/* Serialize atoms within a slot (ie only one atom per job slot) */ +#define KBASE_SERIALIZE_INTRA_SLOT (1 << 0) +/* Serialize atoms between slots (ie only one job slot running at any time) */ +#define KBASE_SERIALIZE_INTER_SLOT (1 << 1) +/* Reset the GPU after each atom completion */ +#define KBASE_SERIALIZE_RESET (1 << 2) + +#ifdef CONFIG_DEBUG_FS +/** + * struct base_job_fault_event - keeps track of the atom which faulted or which + * completed after the faulty atom but before the + * debug data for faulty atom was dumped. + * + * @event_code: event code for the atom, should != BASE_JD_EVENT_DONE for + * the atom which faulted. + * @katom: pointer to the atom for which job fault occurred or which + * completed after the faulty atom. + * @job_fault_work: work item, queued only for the faulty atom, which waits for + * the dumping to get completed and then does the bottom half + * of job done for the atoms which followed the faulty atom. + * @head: List head used to store the atom in the global list of + * faulty atoms or context specific list of atoms which got + * completed during the dump. + * @reg_offset: offset of the register to be dumped next, only applicable + * for the faulty atom. + */ +struct base_job_fault_event { + + u32 event_code; + struct kbase_jd_atom *katom; + struct work_struct job_fault_work; + struct list_head head; + int reg_offset; +}; +#endif + +/** + * struct kbase_jd_atom_dependency - Contains the dependency info for an atom. + * @atom: pointer to the dependee atom. + * @dep_type: type of dependency on the dependee @atom, i.e. order or data + * dependency. BASE_JD_DEP_TYPE_INVALID indicates no dependency. + */ +struct kbase_jd_atom_dependency { + struct kbase_jd_atom *atom; + u8 dep_type; +}; + +/** + * kbase_jd_katom_dep_atom - Retrieves a read-only reference to the + * dependee atom. + * @dep: pointer to the dependency info structure. + * + * Return: readonly reference to dependee atom. + */ +static inline const struct kbase_jd_atom * +kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return (const struct kbase_jd_atom *)(dep->atom); +} + +/** + * kbase_jd_katom_dep_type - Retrieves the dependency type info + * + * @dep: pointer to the dependency info structure. + * + * Return: the type of dependency there is on the dependee atom. + */ +static inline u8 kbase_jd_katom_dep_type( + const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return dep->dep_type; +} + +/** + * kbase_jd_katom_dep_set - sets up the dependency info structure + * as per the values passed. + * @const_dep: pointer to the dependency info structure to be setup. + * @a: pointer to the dependee atom. + * @type: type of dependency there is on the dependee atom. + */ +static inline void kbase_jd_katom_dep_set( + const struct kbase_jd_atom_dependency *const_dep, + struct kbase_jd_atom *a, u8 type) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = a; + dep->dep_type = type; +} + +/** + * kbase_jd_katom_dep_clear - resets the dependency info structure + * + * @const_dep: pointer to the dependency info structure to be setup. + */ +static inline void kbase_jd_katom_dep_clear( + const struct kbase_jd_atom_dependency *const_dep) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = NULL; + dep->dep_type = BASE_JD_DEP_TYPE_INVALID; +} + +/** + * enum kbase_atom_gpu_rb_state - The state of an atom, pertinent after it + * becomes runnable, with respect to job slot + * ringbuffer/fifo. + * @KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: Atom not currently present in slot fifo, + * which implies that either atom has not become + * runnable due to dependency or has completed + * the execution on GPU. + * @KBASE_ATOM_GPU_RB_WAITING_BLOCKED: Atom has been added to slot fifo but is + * blocked due to cross slot dependency, + * can't be submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV: Atom has been added to slot + * fifo but is waiting for the completion of + * previously added atoms in current & other + * slots, as their protected mode requirements + * do not match with the current atom. + * @KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION: Atom is in slot fifo + * and is waiting for completion of protected + * mode transition, needed before the atom is + * submitted to GPU. + * @KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: Atom is in slot fifo but is + * waiting for the cores, which are needed to + * execute the job chain represented by the atom, + * to become available + * @KBASE_ATOM_GPU_RB_READY: Atom is in slot fifo and can be submitted to + * GPU. + * @KBASE_ATOM_GPU_RB_SUBMITTED: Atom is in slot fifo and has been submitted + * to GPU. + * @KBASE_ATOM_GPU_RB_RETURN_TO_JS: Atom must be returned to JS due to some + * failure, but only after the previously added + * atoms in fifo have completed or have also + * been returned to JS. + */ +enum kbase_atom_gpu_rb_state { + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, + KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV, + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION, + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, + KBASE_ATOM_GPU_RB_READY, + KBASE_ATOM_GPU_RB_SUBMITTED, + KBASE_ATOM_GPU_RB_RETURN_TO_JS = -1 +}; + +/** + * enum kbase_atom_enter_protected_state - The state of an atom with respect to + * the preparation for GPU's entry into protected mode, + * becomes pertinent only after atom's state with respect + * to slot ringbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_ENTER_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to + * become disabled before entry into protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the coherency change. L2 shall be + * powered down and GPU shall come out of fully + * coherent mode before entering protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY: Prepare coherency change; + * for BASE_HW_ISSUE_TGOX_R1_1234 also request L2 power on + * so that coherency register contains correct value when + * GPU enters protected mode. + * @KBASE_ATOM_ENTER_PROTECTED_FINISHED: End state; for + * BASE_HW_ISSUE_TGOX_R1_1234 check + * that L2 is powered up and switch GPU to protected mode. + */ +enum kbase_atom_enter_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_EXIT_PROTECTED_CHECK. + */ + KBASE_ATOM_ENTER_PROTECTED_CHECK = 0, + KBASE_ATOM_ENTER_PROTECTED_HWCNT, + KBASE_ATOM_ENTER_PROTECTED_IDLE_L2, + KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY, + KBASE_ATOM_ENTER_PROTECTED_FINISHED, +}; + +/** + * enum kbase_atom_exit_protected_state - The state of an atom with respect to + * the preparation for GPU's exit from protected mode, + * becomes pertinent only after atom's state with respect + * to slot ngbuffer is + * KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION + * @KBASE_ATOM_EXIT_PROTECTED_CHECK: Starting state. Check if there are any + * atoms currently submitted to GPU and protected mode + * transition is not already in progress. + * @KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: Wait for the L2 to become idle in + * preparation for the reset, as exiting protected mode + * requires a reset. + * @KBASE_ATOM_EXIT_PROTECTED_RESET: Issue the reset to trigger exit from + * protected mode + * @KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: End state, Wait for the reset to + * complete + */ +enum kbase_atom_exit_protected_state { + /* + * NOTE: The integer value of this must match + * KBASE_ATOM_ENTER_PROTECTED_CHECK. + */ + KBASE_ATOM_EXIT_PROTECTED_CHECK = 0, + KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, + KBASE_ATOM_EXIT_PROTECTED_RESET, + KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, +}; + +/** + * struct kbase_ext_res - Contains the info for external resources referred + * by an atom, which have been mapped on GPU side. + * @gpu_address: Start address of the memory region allocated for + * the resource from GPU virtual address space. + * @alloc: pointer to physical pages tracking object, set on + * mapping the external resource on GPU side. + */ +struct kbase_ext_res { + u64 gpu_address; + struct kbase_mem_phy_alloc *alloc; +}; + +/** + * struct kbase_jd_atom - object representing the atom, containing the complete + * state and attributes of an atom. + * @work: work item for the bottom half processing of the atom, + * by JD or JS, after it got executed on GPU or the + * input fence got signaled + * @start_timestamp: time at which the atom was submitted to the GPU, by + * updating the JS_HEAD_NEXTn register. + * @udata: copy of the user data sent for the atom in + * base_jd_submit. + * @kctx: Pointer to the base context with which the atom is + * associated. + * @dep_head: Array of 2 list heads, pointing to the two list of + * atoms + * which are blocked due to dependency on this atom. + * @dep_item: Array of 2 list heads, used to store the atom in the + * list of other atoms depending on the same dependee + * atom. + * @dep: Array containing the dependency info for the 2 atoms + * on which the atom depends upon. + * @jd_item: List head used during job dispatch job_done + * processing - as dependencies may not be entirely + * resolved at this point, + * we need to use a separate list head. + * @in_jd_list: flag set to true if atom's @jd_item is currently on + * a list, prevents atom being processed twice. + * @jit_ids: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @nr_extres: number of external resources referenced by the atom. + * @extres: pointer to the location containing info about + * @nr_extres external resources referenced by the atom. + * @device_nr: indicates the coregroup with which the atom is + * associated, when + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified. + * @jc: GPU address of the job-chain. + * @softjob_data: Copy of data read from the user space buffer that @jc + * points to. + * @fence: Stores either an input or output sync fence, + * depending on soft-job type + * @sync_waiter: Pointer to the sync fence waiter structure passed to + * the callback function on signaling of the input + * fence. + * @dma_fence: object containing pointers to both input & output + * fences and other related members used for explicit + * sync through soft jobs and for the implicit + * synchronization required on access to external + * resources. + * @event_code: Event code for the job chain represented by the atom, + * both HW and low-level SW events are represented by + * event codes. + * @core_req: bitmask of BASE_JD_REQ_* flags specifying either + * Hw or Sw requirements for the job chain represented + * by the atom. + * @ticks: Number of scheduling ticks for which atom has been + * running on the GPU. + * @sched_priority: Priority of the atom for Job scheduling, as per the + * KBASE_JS_ATOM_SCHED_PRIO_*. + * @completed: Wait queue to wait upon for the completion of atom. + * @status: Indicates at high level at what stage the atom is in, + * as per KBASE_JD_ATOM_STATE_*, that whether it is not + * in use or its queued in JD or given to JS or + * submitted to Hw or it completed the execution on Hw. + * @work_id: used for GPU tracepoints, its a snapshot of the + * 'work_id' counter in kbase_jd_context which is + * incremented on every call to base_jd_submit. + * @slot_nr: Job slot chosen for the atom. + * @atom_flags: bitmask of KBASE_KATOM_FLAG* flags capturing the + * excat low level state of the atom. + * @gpu_rb_state: bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely + * tracking atom's state after it has entered + * Job scheduler on becoming runnable. Atom + * could be blocked due to cross slot dependency + * or waiting for the shader cores to become available + * or waiting for protected mode transitions to + * complete. + * @need_cache_flush_cores_retained: flag indicating that manual flush of GPU + * cache is needed for the atom and the shader cores + * used for atom have been kept on. + * @blocked: flag indicating that atom's resubmission to GPU is + * blocked till the work item is scheduled to return the + * atom to JS. + * @pre_dep: Pointer to atom that this atom has same-slot + * dependency on + * @post_dep: Pointer to atom that has same-slot dependency on + * this atom + * @x_pre_dep: Pointer to atom that this atom has cross-slot + * dependency on + * @x_post_dep: Pointer to atom that has cross-slot dependency on + * this atom + * @flush_id: The GPU's flush count recorded at the time of + * submission, + * used for the cache flush optimization + * @fault_event: Info for dumping the debug data on Job fault. + * @queue: List head used for 4 different purposes : + * Adds atom to the list of dma-buf fence waiting atoms. + * Adds atom to the list of atoms blocked due to cross + * slot dependency. + * Adds atom to the list of softjob atoms for which JIT + * allocation has been deferred + * Adds atom to the list of softjob atoms waiting for + * the signaling of fence. + * @jit_node: Used to keep track of all JIT free/alloc jobs in + * submission order + * @jit_blocked: Flag indicating that JIT allocation requested through + * softjob atom will be reattempted after the impending + * free of other active JIT allocations. + * @will_fail_event_code: If non-zero, this indicates that the atom will fail + * with the set event_code when the atom is processed. + * Used for special handling of atoms, which have a data + * dependency on the failed atoms. + * @protected_state: State of the atom, as per + * KBASE_ATOM_(ENTER|EXIT)_PROTECTED_*, + * when transitioning into or out of protected mode. + * Atom will be either entering or exiting the + * protected mode. + * @runnable_tree_node: The node added to context's job slot specific rb tree + * when the atom becomes runnable. + * @age: Age of atom relative to other atoms in the context, + * is snapshot of the age_count counter in kbase + * context. + */ +struct kbase_jd_atom { + struct work_struct work; + ktime_t start_timestamp; + + struct base_jd_udata udata; + struct kbase_context *kctx; + + struct list_head dep_head[2]; + struct list_head dep_item[2]; + const struct kbase_jd_atom_dependency dep[2]; + struct list_head jd_item; + bool in_jd_list; + +#if MALI_JIT_PRESSURE_LIMIT + u8 jit_ids[2]; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + u16 nr_extres; + struct kbase_ext_res *extres; + + u32 device_nr; + u64 jc; + void *softjob_data; +#if defined(CONFIG_SYNC) + struct sync_fence *fence; + struct sync_fence_waiter sync_waiter; +#endif /* CONFIG_SYNC */ +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + struct { + /* Use the functions/API defined in mali_kbase_fence.h to + * when working with this sub struct + */ +#if defined(CONFIG_SYNC_FILE) + /* Input fence */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_in; +#else + struct dma_fence *fence_in; +#endif +#endif + /* This points to the dma-buf output fence for this atom. If + * this is NULL then there is no fence for this atom and the + * following fields related to dma_fence may have invalid data. + * + * The context and seqno fields contain the details for this + * fence. + * + * This fence is signaled when the katom is completed, + * regardless of the event_code of the katom (signal also on + * failure). + */ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + /* The dma-buf fence context number for this atom. A unique + * context number is allocated to each katom in the context on + * context creation. + */ + unsigned int context; + /* The dma-buf fence sequence number for this atom. This is + * increased every time this katom uses dma-buf fence. + */ + atomic_t seqno; + /* This contains a list of all callbacks set up to wait on + * other fences. This atom must be held back from JS until all + * these callbacks have been called and dep_count have reached + * 0. The initial value of dep_count must be equal to the + * number of callbacks on this list. + * + * This list is protected by jctx.lock. Callbacks are added to + * this list when the atom is built and the wait are set up. + * All the callbacks then stay on the list until all callbacks + * have been called and the atom is queued, or cancelled, and + * then all callbacks are taken off the list and freed. + */ + struct list_head callbacks; + /* Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. When dep_count reaches 0 the + * atom may be queued. + * + * The special value "-1" may only be set after the count + * reaches 0, while holding jctx.lock. This indicates that the + * atom has been handled, either queued in JS or cancelled. + * + * If anyone but the dma-fence worker sets this to -1 they must + * ensure that any potentially queued worker must have + * completed before allowing the atom to be marked as unused. + * This can be done by flushing the fence work queue: + * kctx->dma_fence.wq. + */ + atomic_t dep_count; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ + + /* Note: refer to kbasep_js_atom_retained_state, which will take a copy + * of some of the following members + */ + enum base_jd_event_code event_code; + base_jd_core_req core_req; + u8 jobslot; + u8 renderpass_id; + struct base_jd_fragment jc_fragment; + + u32 ticks; + int sched_priority; + + wait_queue_head_t completed; + enum kbase_jd_atom_state status; +#ifdef CONFIG_GPU_TRACEPOINTS + int work_id; +#endif + int slot_nr; + + u32 atom_flags; + + int retry_count; + + enum kbase_atom_gpu_rb_state gpu_rb_state; + + bool need_cache_flush_cores_retained; + + atomic_t blocked; + + struct kbase_jd_atom *pre_dep; + struct kbase_jd_atom *post_dep; + + struct kbase_jd_atom *x_pre_dep; + struct kbase_jd_atom *x_post_dep; + + u32 flush_id; + +#ifdef CONFIG_DEBUG_FS + struct base_job_fault_event fault_event; +#endif + struct list_head queue; + + struct list_head jit_node; + bool jit_blocked; + + enum base_jd_event_code will_fail_event_code; + + union { + enum kbase_atom_enter_protected_state enter; + enum kbase_atom_exit_protected_state exit; + } protected_state; + + struct rb_node runnable_tree_node; + + u32 age; +}; + +static inline bool kbase_jd_katom_is_protected( + const struct kbase_jd_atom *katom) +{ + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); +} + +/* + * Theory of operations: + * + * Atom objects are statically allocated within the context structure. + * + * Each atom is the head of two lists, one for the "left" set of dependencies, + * one for the "right" set. + */ + +#define KBASE_JD_DEP_QUEUE_SIZE 256 + +/** + * enum kbase_jd_renderpass_state - State of a renderpass + * @KBASE_JD_RP_COMPLETE: Unused or completed renderpass. Can only transition to + * START. + * @KBASE_JD_RP_START: Renderpass making a first attempt at tiling. + * Can transition to PEND_OOM or COMPLETE. + * @KBASE_JD_RP_PEND_OOM: Renderpass whose first attempt at tiling used too much + * memory and has a soft-stop pending. Can transition to + * OOM or COMPLETE. + * @KBASE_JD_RP_OOM: Renderpass whose first attempt at tiling used too much + * memory and therefore switched to incremental + * rendering. The fragment job chain is forced to run. + * Can only transition to RETRY. + * @KBASE_JD_RP_RETRY: Renderpass making a second or subsequent attempt at + * tiling. Can transition to RETRY_PEND_OOM or COMPLETE. + * @KBASE_JD_RP_RETRY_PEND_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again and has a + * soft-stop pending. Can transition to RETRY_OOM + * or COMPLETE. + * @KBASE_JD_RP_RETRY_OOM: Renderpass whose second or subsequent attempt at + * tiling used too much memory again. The fragment job + * chain is forced to run. Can only transition to RETRY. + * + * A state machine is used to control incremental rendering. + */ +enum kbase_jd_renderpass_state { + KBASE_JD_RP_COMPLETE, /* COMPLETE => START */ + KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */ + KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */ + KBASE_JD_RP_OOM, /* OOM => RETRY */ + KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or + * COMPLETE + */ + KBASE_JD_RP_RETRY_OOM, /* RETRY_OOM => RETRY */ +}; + +/** + * struct kbase_jd_renderpass - Data for a renderpass + * @state: Current state of the renderpass. If KBASE_JD_RP_COMPLETE then + * all other members are invalid. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @start_katom: Address of the atom that is the start of a renderpass. + * Both the job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @end_katom: Address of the atom that is the end of a renderpass, or NULL + * if that atom hasn't been added to the job scheduler yet. + * The job dispatcher context and hwaccess_lock must be + * locked to modify this so that it can be read with either + * (or both) locked. + * @oom_reg_list: A list of region structures which triggered out-of-memory. + * The hwaccess_lock must be locked to access this. + * + * Atoms tagged with BASE_JD_REQ_START_RENDERPASS or BASE_JD_REQ_END_RENDERPASS + * are associated with an object of this type, which is created and maintained + * by kbase to keep track of each renderpass. + */ +struct kbase_jd_renderpass { + enum kbase_jd_renderpass_state state; + struct kbase_jd_atom *start_katom; + struct kbase_jd_atom *end_katom; + struct list_head oom_reg_list; +}; + +/** + * struct kbase_jd_context - per context object encapsulating all the + * Job dispatcher related state. + * @lock: lock to serialize the updates made to the + * Job dispatcher state and kbase_jd_atom objects. + * @sched_info: Structure encapsulating all the Job scheduling + * info. + * @atoms: Array of the objects representing atoms, + * containing the complete state and attributes + * of an atom. + * @renderpasses: Array of renderpass state for incremental + * rendering, indexed by user-specified renderpass + * ID. + * @job_nr: Tracks the number of atoms being processed by the + * kbase. This includes atoms that are not tracked by + * scheduler: 'not ready to run' & 'dependency-only' + * jobs. + * @zero_jobs_wait: Waitq that reflects whether there are no jobs + * (including SW-only dependency jobs). This is set + * when no jobs are present on the ctx, and clear + * when there are jobs. + * This must be updated atomically with @job_nr. + * note: Job Dispatcher knows about more jobs than + * the Job Scheduler as it is unaware of jobs that + * are blocked on dependencies and SW-only dependency + * jobs. This waitq can be waited upon to find out + * when the context jobs are all done/cancelled + * (including those that might've been blocked + * on dependencies) - and so, whether it can be + * terminated. However, it should only be terminated + * once it is not present in the run-pool. + * Since the waitq is only set under @lock, + * the waiter should also briefly obtain and drop + * @lock to guarantee that the setter has completed + * its work on the kbase_context + * @job_done_wq: Workqueue to which the per atom work item is + * queued for bottom half processing when the + * atom completes + * execution on GPU or the input fence get signaled. + * @tb_lock: Lock to serialize the write access made to @tb to + * to store the register access trace messages. + * @tb: Pointer to the Userspace accessible buffer storing + * the trace messages for register read/write + * accesses made by the Kbase. The buffer is filled + * in circular fashion. + * @tb_wrap_offset: Offset to the end location in the trace buffer, + * the write pointer is moved to the beginning on + * reaching this offset. + * @work_id: atomic variable used for GPU tracepoints, + * incremented on every call to base_jd_submit. + * @jit_atoms_head: A list of the just-in-time memory soft-jobs, both + * allocate & free, in submission order, protected + * by kbase_jd_context.lock. + * @jit_pending_alloc: A list of just-in-time memory allocation + * soft-jobs which will be reattempted after the + * impending free of other active allocations. + */ +struct kbase_jd_context { + struct mutex lock; + struct kbasep_js_kctx_info sched_info; + struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + struct kbase_jd_renderpass renderpasses[BASE_JD_RP_COUNT]; + struct workqueue_struct *job_done_wq; + + wait_queue_head_t zero_jobs_wait; + spinlock_t tb_lock; + u32 *tb; + u32 job_nr; + size_t tb_wrap_offset; + +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_t work_id; +#endif + + struct list_head jit_atoms_head; + struct list_head jit_pending_alloc; +}; + +/** + * struct jsctx_queue - JS context atom queue + * @runnable_tree: Root of RB-tree containing currently runnable atoms on this + * job slot. + * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot + * dependencies. Atoms on this list will be moved to the + * runnable_tree when the blocking atom completes. + * + * hwaccess_lock must be held when accessing this structure. + */ +struct jsctx_queue { + struct rb_root runnable_tree; + struct list_head x_dep_head; +}; + +#endif /* _KBASE_JM_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h new file mode 100644 index 0000000..6c222ce --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h @@ -0,0 +1,892 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Job Scheduler Interface. + * These interfaces are Internal to KBase. + */ + +#ifndef _KBASE_JM_JS_H_ +#define _KBASE_JM_JS_H_ + +#include "mali_kbase_js_ctx_attr.h" + +/** + * kbasep_js_devdata_init - Initialize the Job Scheduler + * + * The struct kbasep_js_device_data sub-structure of kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is + * to give efficient error path code. + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev); + +/** + * kbasep_js_devdata_halt - Halt the Job Scheduler. + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + * + */ +void kbasep_js_devdata_halt(struct kbase_device *kbdev); + +/** + * kbasep_js_devdata_term - Terminate the Job Scheduler + * + * It is safe to call this on kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of kbdev + * must be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a programming error to call this whilst there are still kbase_context + * structures registered with this scheduler. + */ +void kbasep_js_devdata_term(struct kbase_device *kbdev); + +/** + * kbasep_js_kctx_init - Initialize the Scheduling Component of a + * struct kbase_context on the Job Scheduler. + * + * This effectively registers a struct kbase_context with a Job Scheduler. + * + * It does not register any jobs owned by the struct kbase_context with + * the scheduler. Those must be separately registered by kbasep_js_add_job(). + * + * The struct kbase_context must be zero initialized before passing to the + * kbase_js_init() function. This is to give efficient error path code. + */ +int kbasep_js_kctx_init(struct kbase_context *const kctx); + +/** + * kbasep_js_kctx_term - Terminate the Scheduling Component of a + * struct kbase_context on the Job Scheduler + * + * This effectively de-registers a struct kbase_context from its Job Scheduler + * + * It is safe to call this on a struct kbase_context that has never had or + * failed initialization of its jctx.sched_info member, to give efficient + * error-path code. + * + * For this to work, the struct kbase_context must be zero intitialized before + * passing to the kbase_js_init() function. + * + * It is a Programming Error to call this whilst there are still jobs + * registered with this context. + */ +void kbasep_js_kctx_term(struct kbase_context *kctx); + +/** + * kbasep_js_add_job - Add a job chain to the Job Scheduler, + * and take necessary actions to + * schedule the context/run the job. + * + * This atomically does the following: + * * Update the numbers of jobs information + * * Add the job to the run pool if necessary (part of init_job) + * + * Once this is done, then an appropriate action is taken: + * * If the ctx is scheduled, it attempts to start the next job (which might be + * this added job) + * * Otherwise, and if this is the first job on the context, it enqueues it on + * the Policy Queue + * + * The Policy's Queue can be updated by this in the following ways: + * * In the above case that this is the first job on the context + * * If the context is high priority and the context is not scheduled, then it + * could cause the Policy to schedule out a low-priority context, allowing + * this context to be scheduled in. + * + * If the context is already scheduled on the RunPool, then adding a job to it + * is guaranteed not to update the Policy Queue. And so, the caller is + * guaranteed to not need to try scheduling a context from the Run Pool - it + * can safely assert that the result is false. + * + * It is a programming error to have more than U32_MAX jobs in flight at a time. + * + * The following locking conditions are made on the caller: + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold hwaccess_lock (as this will be obtained internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * + * Return: true indicates that the Policy Queue was updated, and so the + * caller will need to try scheduling a context onto the Run Pool, + * false indicates that no updates were made to the Policy Queue, + * so no further action is required from the caller. This is always returned + * when the context is currently scheduled. + */ +bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, + * except for its 'retained state'. + * + * Completely removing a job requires several calls: + * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom + * * kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler + * * kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the + * remaining state held as part of the job having been run. + * + * In the common case of atoms completing normally, this set of actions is more + * optimal for spinlock purposes than having kbasep_js_remove_job() handle all + * of the actions. + * + * In the case of canceling atoms, it is easier to call + * kbasep_js_remove_cancelled_job(), which handles all the necessary actions. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool + * + * Do not use this for removing jobs being killed by kbase_jd_cancel() - use + * kbasep_js_remove_cancelled_job() instead. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * + */ +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * kbasep_js_remove_cancelled_job - Completely remove a job chain from the + * Job Scheduler, in the case + * where the job chain was cancelled. + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, + * which need not be handled in an optimal way. + * + * It is a programming error to call this when: + * * a atom is not a job belonging to kctx. + * * a atom has already been removed from the Job Scheduler. + * * a atom is still in the runpool: + * * it is not being killed with kbasep_jd_cancel() + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold the hwaccess_lock, (as this will be obtained + * internally) + * * it must not hold kbasep_js_device_data::runpool_mutex (as this could be + * obtained internally) + * + * Return: true indicates that ctx attributes have changed and the caller + * should call kbase_js_sched_all() to try to run more jobs and + * false otherwise. + */ +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a + * context that was evicted from the + * policy queue or runpool. + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: + * * If the context is not dying and has jobs, it gets re-added to the policy + * queue + * * Otherwise, it is not added + * + * In addition, if the context is dying the jobs are killed asynchronously. + * + * In all cases, the Power Manager active reference is released + * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. + * has_pm_ref must be set to false whenever the context was not previously in + * the runpool and does not hold a Power Manager active refcount. Note that + * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an + * active refcount even though they weren't in the runpool. + * + * The following locking conditions are made on the caller: + * * it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + */ +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref); + +/** + * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, + * allowing it to be scheduled out. + * + * When the refcount reaches zero and the context might be scheduled out + * (depending on whether the Scheduling Policy has deemed it so, or if it has + * run out of jobs). + * + * If the context does get scheduled out, then The following actions will be + * taken as part of deschduling a context: + * For the context being descheduled: + * * If the context is in the processing of dying (all the jobs are being + * removed from it), then descheduling also kills off any jobs remaining in the + * context. + * * If the context is not dying, and any jobs remain after descheduling the + * context then it is re-enqueued to the Policy's Queue. + * * Otherwise, the context is still known to the scheduler, but remains absent + * from the Policy Queue until a job is next added to it. + * * In all descheduling cases, the Power Manager active reference (obtained + * during kbasep_js_try_schedule_head_ctx()) is released + * (kbase_pm_context_idle()). + * + * Whilst the context is being descheduled, this also handles actions that + * cause more atoms to be run: + * * Attempt submitting atoms when the Context Attributes on the Runpool have + * changed. This is because the context being scheduled out could mean that + * there are more opportunities to run atoms. + * * Attempt submitting to a slot that was previously blocked due to affinity + * restrictions. This is usually only necessary when releasing a context + * happens as part of completing a previous job, but is harmless nonetheless. + * * Attempt scheduling in a new context (if one is available), and if + * necessary, running a job from that new context. + * + * Unlike retaining a context in the runpool, this function cannot be called + * from IRQ context. + * + * It is a programming error to call this on a kctx that is not currently + * scheduled, or that already has a zero refcount. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of + * kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. + * + * Therefore, the extra actions carried out are part of handling actions queued + * on a completed atom, namely: + * * Releasing the atom's context attributes + * * Retrying the submission on a particular slot, because we couldn't submit + * on that slot from an IRQ handler. + * + * The locking conditions of this function are the same as those for + * kbasep_js_runpool_release_ctx() + */ +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * kbasep_js_runpool_release_ctx_nolock - Variant of + * kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_device_data::runpool_mutex and + * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not + * attempt to schedule new contexts. + */ +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the + * function will wait for the new context to be scheduled in. + * The context will be kept scheduled in (and the corresponding address space + * reserved) until kbasep_js_release_privileged_ctx is called). + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * * it must not hold kbasep_jd_device_data::queue_mutex (again, it's used + * internally). + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbasep_js_release_privileged_ctx - Release a privileged context, + * allowing it to be scheduled out. + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * + * The following locking conditions are made on the caller: + * * it must not hold the hwaccess_lock, because it will be used internally. + * * it must not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * * it must not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * * it must not hold the kbase_device::mmu_hw_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_js_try_run_jobs - Try to submit the next job on each slot + * + * The following locks may be used: + * * kbasep_js_device_data::runpool_mutex + * * hwaccess_lock + */ +void kbase_js_try_run_jobs(struct kbase_device *kbdev); + +/** + * kbasep_js_suspend - Suspend the job scheduler during a Power Management + * Suspend event. + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. + * + * This does not handle suspending the one privileged context: the caller must + * instead do this by by suspending the GPU HW Counter Instrumentation. + * + * This will eventually cause all Power Management active references held by + * contexts on the runpool to be released, without running any more atoms. + * + * The caller must then wait for all Power Management active refcount to become + * zero before completing the suspend. + * + * The emptying mechanism may take some time to complete, since it can wait for + * jobs to complete naturally instead of forcing them to end quickly. However, + * this is bounded by the Job Scheduler's Job Timeouts. Hence, this + * function is guaranteed to complete in a finite time. + */ +void kbasep_js_suspend(struct kbase_device *kbdev); + +/** + * kbasep_js_resume - Resume the Job Scheduler after a Power Management + * Resume event. + * + * This restores the actions from kbasep_js_suspend(): + * * Schedules contexts back into the runpool + * * Resumes running atoms on the GPU + */ +void kbasep_js_resume(struct kbase_device *kbdev); + +/** + * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to submit + * + * The atom is enqueued on the context's ringbuffer. The caller must have + * ensured that all dependencies can be represented in the ringbuffer. + * + * Caller must hold jctx->lock + * + * Return: true if the context requires to be enqueued, otherwise false. + */ +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. + * @kctx: Context Pointer + * @prio: Priority (specifies the queue together with js). + * @js: Job slot (specifies the queue together with prio). + * + * Pushes all possible atoms from the linked list to the ringbuffer. + * Number of atoms are limited to free space in the ringbuffer and + * number of available atoms in the linked list. + * + */ +void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); + +/** + * kbase_js_pull - Pull an atom from a context in the job scheduler for + * execution. + * + * @kctx: Context to pull from + * @js: Job slot to pull from + * + * The atom will not be removed from the ringbuffer at this stage. + * + * The HW access lock must be held when calling this function. + * + * Return: a pointer to an atom, or NULL if there are no atoms for this + * slot that can be currently run. + */ +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + +/** + * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. + * + * @kctx: Context pointer + * @atom: Pointer to the atom to unpull + * + * An atom is 'unpulled' if execution is stopped but intended to be returned to + * later. The most common reason for this is that the atom has been + * soft-stopped. Another reason is if an end-of-renderpass atom completed + * but will need to be run again as part of the same renderpass. + * + * Note that if multiple atoms are to be 'unpulled', they must be returned in + * the reverse order to which they were originally pulled. It is a programming + * error to return atoms in any other order. + * + * The HW access lock must be held when calling this function. + * + */ +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom_wq - Complete an atom from jd_done_worker(), + * removing it from the job + * scheduler ringbuffer. + * @kctx: Context pointer + * @katom: Pointer to the atom to complete + * + * If the atom failed then all dependee atoms marked for failure propagation + * will also fail. + * + * Return: true if the context is now idle (no jobs pulled) false otherwise. + */ +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbase_js_complete_atom - Complete an atom. + * + * @katom: Pointer to the atom to complete + * @end_timestamp: The time that the atom completed (may be NULL) + * + * Most of the work required to complete an atom will be performed by + * jd_done_worker(). + * + * The HW access lock must be held when calling this function. + * + * Return: a atom that has now been unblocked and can now be run, or NULL + * if none + */ +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware. + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *katom); + +/** + * kbase_js_sched - Submit atoms from all available contexts. + * + * @kbdev: Device pointer + * @js_mask: Mask of job slots to submit to + * + * This will attempt to submit as many jobs as possible to the provided job + * slots. It will exit when either all job slots are full, or all contexts have + * been used. + * + */ +void kbase_js_sched(struct kbase_device *kbdev, int js_mask); + +/** + * kbase_jd_zap_context - Attempt to deschedule a context that is being + * destroyed + * @kctx: Context pointer + * + * This will attempt to remove a context from any internal job scheduler queues + * and perform any other actions to ensure a context will not be submitted + * from. + * + * If the context is currently scheduled, then the caller must wait for all + * pending jobs to complete before taking any further action. + */ +void kbase_js_zap_context(struct kbase_context *kctx); + +/** + * kbase_js_is_atom_valid - Validate an atom + * + * @kbdev: Device pointer + * @katom: Atom to validate + * + * This will determine whether the atom can be scheduled onto the GPU. Atoms + * with invalid combinations of core requirements will be rejected. + * + * Return: true if atom is valid false otherwise. + */ +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +void kbase_js_set_timeouts(struct kbase_device *kbdev); + +/** + * kbase_js_set_ctx_priority - set the context priority + * + * @kctx: Context pointer + * @new_priority: New priority value for the Context + * + * The context priority is set to a new value and it is moved to the + * pullable/unpullable list as per the new priority. + */ +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); + + +/** + * kbase_js_update_ctx_priority - update the context priority + * + * @kctx: Context pointer + * + * The context priority gets updated as per the priority of atoms currently in + * use for that context, but only if system priority mode for context scheduling + * is being used. + */ +void kbase_js_update_ctx_priority(struct kbase_context *kctx); + +/* + * Helpers follow + */ + +/** + * kbasep_js_is_submit_allowed - Check that a context is allowed to submit + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * As with any bool, never test the return value with true. + * + * The caller must hold hwaccess_lock. + */ +static inline bool kbasep_js_is_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 test_bit; + bool is_allowed; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + test_bit = (u16) (1u << kctx->as_nr); + + is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); + return is_allowed; +} + +/** + * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_set_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 set_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + set_bit = (u16) (1u << kctx->as_nr); + + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +} + +/** + * kbasep_js_clear_submit_allowed - Prevent a context from submitting more + * jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, + * and wrap up the long repeated line of code. + * + * The caller must hold hwaccess_lock. + */ +static inline void kbasep_js_clear_submit_allowed( + struct kbasep_js_device_data *js_devdata, + struct kbase_context *kctx) +{ + u16 clear_bit; + u16 clear_mask; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + +/** + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() + */ +static inline void kbasep_js_atom_retained_state_init_invalid( + struct kbasep_js_atom_retained_state *retained_state) +{ + retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; + retained_state->core_req = + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; +} + +/** + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +static inline void kbasep_js_atom_retained_state_copy( + struct kbasep_js_atom_retained_state *retained_state, + const struct kbase_jd_atom *katom) +{ + retained_state->event_code = katom->event_code; + retained_state->core_req = katom->core_req; + retained_state->sched_priority = katom->sched_priority; + retained_state->device_nr = katom->device_nr; +} + +/** + * kbasep_js_has_atom_finished - Determine whether an atom has finished + * (given its retained state), + * and so should be given back to + * userspace/removed from the system. + * + * @katom_retained_state: the retained state of the atom to check + * + * Reasons for an atom not finishing include: + * * Being soft-stopped (and so, the atom should be resubmitted sometime later) + * * It is an end of renderpass atom that was run to consume the output of a + * start-of-renderpass atom that was soft-stopped because it used too much + * memory. In this case, it will have to be run again later. + * + * Return: false if the atom has not finished, true otherwise. + */ +static inline bool kbasep_js_has_atom_finished( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->event_code != + BASE_JD_EVENT_STOPPED && + katom_retained_state->event_code != + BASE_JD_EVENT_REMOVED_FROM_NEXT && + katom_retained_state->event_code != + BASE_JD_EVENT_END_RP_DONE); +} + +/** + * kbasep_js_atom_retained_state_is_valid - Determine whether a struct + * kbasep_js_atom_retained_state + * is valid + * @katom_retained_state the atom's retained state to check + * + * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates + * that the code should just ignore it. + * + * Return: false if the retained state is invalid, true otherwise. + */ +static inline bool kbasep_js_atom_retained_state_is_valid( + const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->core_req != + KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + +/** + * kbase_js_runpool_inc_context_count - Increment number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_inc_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + ++(js_devdata->nr_all_contexts_running); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < + S8_MAX); + ++(js_devdata->nr_user_contexts_running); + } +} + +/** + * kbase_js_runpool_dec_context_count - decrement number of running contexts. + * + * The following locking conditions are made on the caller: + * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * * The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_dec_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + --(js_devdata->nr_all_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + + if (!kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + /* Track contexts that can submit jobs */ + --(js_devdata->nr_user_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } +} + +/** + * kbase_js_sched_all - Submit atoms from all available contexts to all + * job slots. + * + * @kbdev: Device pointer + * + * This will attempt to submit as many jobs as possible. It will exit when + * either all job slots are full, or all contexts have been used. + */ +static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ + kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +extern const int +kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + +extern const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + +/** + * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) + * to relative ordering + * @atom_prio: Priority ID to translate. + * + * Atom priority values for @ref base_jd_prio cannot be compared directly to + * find out which are higher or lower. + * + * This function will convert base_jd_prio values for successively lower + * priorities into a monotonically increasing sequence. That is, the lower the + * base_jd_prio priority, the higher the value produced by this function. This + * is in accordance with how the rest of the kernel treats priority. + * + * The mapping is 1:1 and the size of the valid input range is the same as the + * size of the valid output range, i.e. + * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS + * + * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions + * + * Return: On success: a value in the inclusive range + * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: + * KBASE_JS_ATOM_SCHED_PRIO_INVALID + */ +static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ + if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) + return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + + return kbasep_js_atom_priority_to_relative[atom_prio]; +} + +static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) +{ + unsigned int prio_idx; + + KBASE_DEBUG_ASSERT(sched_prio >= 0 && + sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); + + prio_idx = (unsigned int)sched_prio; + + return kbasep_js_relative_priority_to_atom[prio_idx]; +} + +#endif /* _KBASE_JM_JS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h new file mode 100644 index 0000000..0b48615 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h @@ -0,0 +1,409 @@ +/* + * + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_js.h + * Job Scheduler Type Definitions + */ + +#ifndef _KBASE_JS_DEFS_H_ +#define _KBASE_JS_DEFS_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ +/* Forward decls */ +struct kbase_device; +struct kbase_jd_atom; + + +typedef u32 kbase_context_flags; + +/** Callback function run on all of a context's jobs registered with the Job + * Scheduler */ +typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); + +/** + * @brief Maximum number of jobs that can be submitted to a job slot whilst + * inside the IRQ handler. + * + * This is important because GPU NULL jobs can complete whilst the IRQ handler + * is running. Otherwise, it potentially allows an unlimited number of GPU NULL + * jobs to be submitted inside the IRQ handler, which increases IRQ latency. + */ +#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + +/** + * @brief Context attributes + * + * Each context attribute can be thought of as a boolean value that caches some + * state information about either the runpool, or the context: + * - In the case of the runpool, it is a cache of "Do any contexts owned by + * the runpool have attribute X?" + * - In the case of a context, it is a cache of "Do any atoms owned by the + * context have attribute X?" + * + * The boolean value of the context attributes often affect scheduling + * decisions, such as affinities to use and job slots to use. + * + * To accomodate changes of state in the context, each attribute is refcounted + * in the context, and in the runpool for all running contexts. Specifically: + * - The runpool holds a refcount of how many contexts in the runpool have this + * attribute. + * - The context holds a refcount of how many atoms have this attribute. + */ +enum kbasep_js_ctx_attr { + /** Attribute indicating a context that contains Compute jobs. That is, + * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_COMPUTE, + + /** Attribute indicating a context that contains Non-Compute jobs. That is, + * the context has some jobs that are \b not of type @ref + * BASE_JD_REQ_ONLY_COMPUTE. + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_NON_COMPUTE, + + /** Attribute indicating that a context contains compute-job atoms that + * aren't restricted to a coherent group, and can run on all cores. + * + * Specifically, this is when the atom's \a core_req satisfy: + * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 + * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups + * + * Such atoms could be blocked from running if one of the coherent groups + * is being used by another job slot, so tracking this context attribute + * allows us to prevent such situations. + * + * @note This doesn't take into account the 1-coregroup case, where all + * compute atoms would effectively be able to run on 'all cores', but + * contexts will still not always get marked with this attribute. Instead, + * it is the caller's responsibility to take into account the number of + * coregroups when interpreting this attribute. + * + * @note Whilst Tiler atoms are normally combined with + * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without + * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy + * enough to handle anyway. + */ + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, + + /** Must be the last in the enum */ + KBASEP_JS_CTX_ATTR_COUNT +}; + +enum { + /** Bit indicating that new atom should be started because this atom completed */ + KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), + /** Bit indicating that the atom was evicted from the JS_NEXT registers */ + KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) +}; + +/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ +typedef u32 kbasep_js_atom_done_code; + +/* + * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode + */ +enum { + /* + * In this mode, higher priority atoms will be scheduled first, + * regardless of the context they belong to. Newly-runnable higher + * priority atoms can preempt lower priority atoms currently running on + * the GPU, even if they belong to a different context. + */ + KBASE_JS_SYSTEM_PRIORITY_MODE = 0, + + /* + * In this mode, the highest-priority atom will be chosen from each + * context in turn using a round-robin algorithm, so priority only has + * an effect within the context an atom belongs to. Newly-runnable + * higher priority atoms can preempt the lower priority atoms currently + * running on the GPU, but only if they belong to the same context. + */ + KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, + + /* Must be the last in the enum */ + KBASE_JS_PRIORITY_MODE_COUNT, +}; + +/* + * Internal atom priority defines for kbase_jd_atom::sched_prio + */ +enum { + KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, + KBASE_JS_ATOM_SCHED_PRIO_MED, + KBASE_JS_ATOM_SCHED_PRIO_LOW, + KBASE_JS_ATOM_SCHED_PRIO_COUNT, +}; + +/* Invalid priority for kbase_jd_atom::sched_prio */ +#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 + +/* Default priority in the case of contexts with no atoms, or being lenient + * about invalid priorities from userspace. + */ +#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + +/** + * @brief KBase Device Data Job Scheduler sub-structure + * + * This encapsulates the current context of the Job Scheduler on a particular + * device. This context is global to the device, and is not tied to any + * particular struct kbase_context running on the device. + * + * nr_contexts_running and as_free are optimized for packing together (by making + * them smaller types than u32). The operations on them should rarely involve + * masking. The use of signed types for arithmetic indicates to the compiler that + * the value will not rollover (which would be undefined behavior), and so under + * the Total License model, it is free to make optimizations based on that (i.e. + * to remove masking). + */ +struct kbasep_js_device_data { + /* Sub-structure to collect together Job Scheduling data used in IRQ + * context. The hwaccess_lock must be held when accessing. */ + struct runpool_irq { + /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. + * When bit 'N' is set in this, it indicates whether the context bound to address space + * 'N' is allowed to submit jobs. + */ + u16 submit_allowed; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of contexts + * that can fit into the runpool. This is currently BASE_MAX_NR_AS + * + * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store + * the refcount. Hence, it's not worthwhile reducing this to + * bit-manipulation on u32s to save space (where in contrast, 4 bit + * sub-fields would be easy to do and would save space). + * + * Whilst this must not become negative, the sign bit is used for: + * - error detection in debug builds + * - Optimization: it is undefined for a signed int to overflow, and so + * the compiler can optimize for that never happening (thus, no masking + * is required on updating the variable) */ + s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + /* + * Affinity management and tracking + */ + /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates + * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ + u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; + /** Refcount for each core owned by each slot. Used to generate the + * slot_affinities array of bitvectors + * + * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, + * because it is refcounted only when a job is definitely about to be + * submitted to a slot, and is de-refcounted immediately after a job + * finishes */ + s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; + } runpool_irq; + + /** + * Scheduling semaphore. This must be held when calling + * kbase_jm_kick() + */ + struct semaphore schedule_sem; + + /** + * List of contexts that can currently be pulled from + */ + struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + /** + * List of contexts that can not currently be pulled from, but have + * jobs currently running. + */ + struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + + /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ + s8 nr_user_contexts_running; + /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ + s8 nr_all_contexts_running; + + /** Core Requirements to match up with base_js_atom's core_req memeber + * @note This is a write-once member, and so no locking is required to read */ + base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + + u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ + u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ + u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ + u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ + u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ + u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ + u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ + u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ + u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ + u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ + + /** List of suspended soft jobs */ + struct list_head suspended_soft_jobs_list; + +#ifdef CONFIG_MALI_DEBUG + /* Support soft-stop on a single context */ + bool softstop_always; +#endif /* CONFIG_MALI_DEBUG */ + + /** The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths). + * @note This is a write-once member, and so no locking is required to read */ + int init_status; + + /* Number of contexts that can currently be pulled from */ + u32 nr_contexts_pullable; + + /* Number of contexts that can either be pulled from or are currently + * running */ + atomic_t nr_contexts_runnable; + + /** Value for JS_SOFT_JOB_TIMEOUT */ + atomic_t soft_job_timeout_ms; + + /** + * Queue Lock, used to access the Policy's queue of contexts + * independently of the Run Pool. + * + * Of course, you don't need the Run Pool lock to access this. + */ + struct mutex queue_mutex; + + /** + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing + * any members that follow + * + * In addition, this is used to access: + * * the kbasep_js_kctx_info::runpool substructure + */ + struct mutex runpool_mutex; +}; + +/** + * @brief KBase Context Job Scheduling information structure + * + * This is a substructure in the struct kbase_context that encapsulates all the + * scheduling information. + */ +struct kbasep_js_kctx_info { + + /** + * Job Scheduler Context information sub-structure. These members are + * accessed regardless of whether the context is: + * - In the Policy's Run Pool + * - In the Policy's Queue + * - Not queued nor in the Run Pool. + * + * You must obtain the jsctx_mutex before accessing any other members of + * this substructure. + * + * You may not access any of these members from IRQ context. + */ + struct kbase_jsctx { + struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ + + /** Number of jobs ready to run - does \em not include the jobs waiting in + * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr + * for such jobs*/ + u32 nr_jobs; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of atoms on + * the context. **/ + u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + /** + * Wait queue to wait for KCTX_SHEDULED flag state changes. + * */ + wait_queue_head_t is_scheduled_wait; + + /** Link implementing JS queues. Context can be present on one + * list per job slot + */ + struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; + } ctx; + + /* The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths) */ + int init_status; +}; + +/** Subset of atom state that can be available after jd_done_nolock() is called + * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), + * because the original atom could disappear. */ +struct kbasep_js_atom_retained_state { + /** Event code - to determine whether the atom has finished */ + enum base_jd_event_code event_code; + /** core requirements */ + base_jd_core_req core_req; + /* priority */ + int sched_priority; + /* Core group atom was executed on */ + u32 device_nr; + +}; + +/** + * Value signifying 'no retry on a slot required' for: + * - kbase_js_atom_retained_state::retry_submit_on_slot + * - kbase_jd_atom::retry_submit_on_slot + */ +#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + +/** + * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. + * + * @see kbase_atom_retained_state_is_valid() + */ +#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP + +/** + * @brief The JS timer resolution, in microseconds + * + * Any non-zero difference in time will be at least this size. + */ +#define KBASEP_JS_TICK_RESOLUTION_US 1 + + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h new file mode 100644 index 0000000..6885f8d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -0,0 +1,486 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, + * please update base/tools/hwconfig_generator/hwc_{issues,features}.py + * For more information see base/tools/hwconfig_generator/README + */ + +#ifndef _BASE_HWCONFIG_FEATURES_H_ +#define _BASE_HWCONFIG_FEATURES_H_ + +enum base_hw_feature { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_generic[] = { + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tMIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tHEx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tSIx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tDVx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tNOx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tGOx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_THREAD_GROUP_SPLIT, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_TLS_HASHING, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tTRx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tNAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tBEx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tDUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tODx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tGRx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tVAx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tTUx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tE2x[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_XAFFINITY, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, + BASE_HW_FEATURE_COHERENCY_REG, + BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, + BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_CLEAN_ONLY_SAFE, + BASE_HW_FEATURE_END +}; + +#endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h new file mode 100644 index 0000000..3966069 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -0,0 +1,635 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, + * please update base/tools/hwconfig_generator/hwc_{issues,features}.py + * For more information see base/tools/hwconfig_generator/README + */ + +#ifndef _BASE_HWCONFIG_ISSUES_H_ +#define _BASE_HWCONFIG_ISSUES_H_ + +enum base_hw_issue { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TGOX_R1_1234, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_generic[] = { + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tMIx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8463, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TMIX_8438, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tMIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_7940, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TMIX_8138, + BASE_HW_ISSUE_TMIX_8206, + BASE_HW_ISSUE_TMIX_8343, + BASE_HW_ISSUE_TMIX_8456, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tHEx_r0p3[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tHEx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_7891, + BASE_HW_ISSUE_TMIX_8042, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TSIX_1792, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11054, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tSIx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tSIx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tDVx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tDVx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNOx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tNOx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tGOx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TNOX_1194, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tGOx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TGOX_R1_1234, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tGOx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TMIX_8133, + BASE_HW_ISSUE_TSIX_1116, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTRx_r0p2[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tTRx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_3076, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_GPU2017_1336, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tNAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r0p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tBEx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tBEx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_lBEx_r1p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tDUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tODx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tGRx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tVAx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tTUx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = { + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tE2x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_TSIX_2033, + BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_END +}; + +#endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_kernel.h diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_base_mem_priv.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h new file mode 100755 index 0000000..907142d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase.h @@ -0,0 +1,628 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_H_ +#define _KBASE_H_ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) +#include +#endif +#include +#include +#include +#include +#include + +#include "mali_base_kernel.h" +#include + +/* + * Include mali_kbase_defs.h first as this provides types needed by other local + * header files. + */ +#include "mali_kbase_defs.h" + +#include "debug/mali_kbase_debug_ktrace.h" +#include "context/mali_kbase_context.h" +#include "mali_kbase_strings.h" +#include "mali_kbase_mem_lowlevel.h" +#include "mali_kbase_utility.h" +#include "mali_kbase_mem.h" +#include "mmu/mali_kbase_mmu.h" +#include "mali_kbase_gpu_memory_debugfs.h" +#include "mali_kbase_mem_profile_debugfs.h" +#include "mali_kbase_gpuprops.h" +#include "mali_kbase_ioctl.h" +#include "mali_kbase_debug_job_fault.h" +#include "mali_kbase_jd_debugfs.h" +#include "mali_kbase_jm.h" +#include "mali_kbase_js.h" + +#include "ipa/mali_kbase_ipa.h" + +#ifdef CONFIG_GPU_TRACEPOINTS +#include +#endif + +#include "mali_linux_trace.h" + + +#ifndef u64_to_user_ptr +/* Introduced in Linux v4.6 */ +#define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) +#endif + + +/* Physical memory group ID for a special page which can alias several regions. + */ +#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT + +/* + * Kernel-side Base (KBase) APIs + */ + +struct kbase_device *kbase_device_alloc(void); +/* +* note: configuration attributes member of kbdev needs to have +* been setup before calling kbase_device_init +*/ + +int kbase_device_misc_init(struct kbase_device *kbdev); +void kbase_device_misc_term(struct kbase_device *kbdev); +void kbase_device_free(struct kbase_device *kbdev); +int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); + +/* Needed for gator integration and for reporting vsync information */ +struct kbase_device *kbase_find_device(int minor); +void kbase_release_device(struct kbase_device *kbdev); + +/** + * kbase_context_get_unmapped_area() - get an address range which is currently + * unmapped. + * @kctx: A kernel base context (which has its own GPU address space). + * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed + * as Mali GPU driver decides about the mapping). + * @len: Length of the address range. + * @pgoff: Page offset within the GPU address space of the kbase context. + * @flags: Flags for the allocation. + * + * Finds the unmapped address range which satisfies requirements specific to + * GPU and those provided by the call parameters. + * + * 1) Requirement for allocations greater than 2MB: + * - alignment offset is set to 2MB and the alignment mask to 2MB decremented + * by 1. + * + * 2) Requirements imposed for the shader memory alignment: + * - alignment is decided by the number of GPU pc bits which can be read from + * GPU properties of the device associated with this kbase context; alignment + * offset is set to this value in bytes and the alignment mask to the offset + * decremented by 1. + * - allocations must not to be at 4GB boundaries. Such cases are indicated + * by the flag KBASE_REG_GPU_NX not being set (check the flags of the kbase + * region). 4GB boundaries can be checked against @ref BASE_MEM_MASK_4GB. + * + * 3) Requirements imposed for tiler memory alignment, cases indicated by + * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase + * region): + * - alignment offset is set to the difference between the kbase region + * extent (converted from the original value in pages to bytes) and the kbase + * region initial_commit (also converted from the original value in pages to + * bytes); alignment mask is set to the kbase region extent in bytes and + * decremented by 1. + * + * Return: if successful, address of the unmapped area aligned as required; + * error code (negative) in case of failure; + */ +unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags); + + +int assign_irqs(struct kbase_device *kbdev); + +int kbase_sysfs_init(struct kbase_device *kbdev); +void kbase_sysfs_term(struct kbase_device *kbdev); + + +int kbase_protected_mode_init(struct kbase_device *kbdev); +void kbase_protected_mode_term(struct kbase_device *kbdev); + +/** + * kbase_device_pm_init() - Performs power management initialization and + * Verifies device tree configurations. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if successful, otherwise a standard Linux error code + */ +int kbase_device_pm_init(struct kbase_device *kbdev); + +/** + * kbase_device_pm_term() - Performs power management deinitialization and + * Free resources. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ +void kbase_device_pm_term(struct kbase_device *kbdev); + + +int power_control_init(struct kbase_device *kbdev); +void power_control_term(struct kbase_device *kbdev); + +#ifdef CONFIG_DEBUG_FS +void kbase_device_debugfs_term(struct kbase_device *kbdev); +int kbase_device_debugfs_init(struct kbase_device *kbdev); +#else /* CONFIG_DEBUG_FS */ +static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + return 0; +} + +static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } +#endif /* CONFIG_DEBUG_FS */ + +int registers_map(struct kbase_device *kbdev); +void registers_unmap(struct kbase_device *kbdev); + +int kbase_device_coherency_init(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_BUSLOG +int buslog_init(struct kbase_device *kbdev); +void buslog_term(struct kbase_device *kbdev); +#endif + +int kbase_jd_init(struct kbase_context *kctx); +void kbase_jd_exit(struct kbase_context *kctx); + +/** + * kbase_jd_submit - Submit atoms to the job dispatcher + * + * @kctx: The kbase context to submit to + * @user_addr: The address in user space of the struct base_jd_atom_v2 array + * @nr_atoms: The number of atoms in the array + * @stride: sizeof(struct base_jd_atom_v2) + * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) + * + * Return: 0 on success or error code + */ +int kbase_jd_submit(struct kbase_context *kctx, + void __user *user_addr, u32 nr_atoms, u32 stride, + bool uk6_atom); + +/** + * kbase_jd_done_worker - Handle a job completion + * @data: a &struct work_struct + * + * This function requeues the job from the runpool (if it was soft-stopped or + * removed from NEXT registers). + * + * Removes it from the system if it finished/failed/was cancelled. + * + * Resolves dependencies to add dependent jobs to the context, potentially + * starting them if necessary (which may add more references to the context) + * + * Releases the reference to the context from the no-longer-running job. + * + * Handles retrying submission outside of IRQ context if it failed from within + * IRQ context. + */ +void kbase_jd_done_worker(struct work_struct *data); + +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code); +void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +void kbase_jd_zap_context(struct kbase_context *kctx); +bool jd_done_nolock(struct kbase_jd_atom *katom, + struct list_head *completed_jobs_ctx); +void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); + +/** + * kbase_job_done - Process completed jobs from job interrupt + * @kbdev: Pointer to the kbase device. + * @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register + * + * This function processes the completed, or failed, jobs from the GPU job + * slots, for the bits set in the @done bitmask. + * + * The hwaccess_lock must be held when calling this function. + */ +void kbase_job_done(struct kbase_device *kbdev, u32 done); + +/** + * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms + * and soft stop them + * @kctx: Pointer to context to check. + * @katom: Pointer to priority atom. + * + * Atoms from @kctx on the same job slot as @katom, which have lower priority + * than @katom will be soft stopped and put back in the queue, so that atoms + * with higher priority can run. + * + * The hwaccess_lock must be held when calling this function. + */ +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * kbase_job_slot_softstop_start_rp() - Soft-stop the atom at the start + * of a renderpass. + * @kctx: Pointer to a kernel base context. + * @reg: Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region and the atom currently + * executing on a job slot is the tiler job chain at the start of a renderpass. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx, + struct kbase_va_region *reg); + +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom); +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags); +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom); +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); +int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); +int kbase_event_pending(struct kbase_context *ctx); +int kbase_event_init(struct kbase_context *kctx); +void kbase_event_close(struct kbase_context *kctx); +void kbase_event_cleanup(struct kbase_context *kctx); +void kbase_event_wakeup(struct kbase_context *kctx); + +/** + * kbasep_jit_alloc_validate() - Validate the JIT allocation info. + * + * @kctx: Pointer to the kbase context within which the JIT + * allocation is to be validated. + * @info: Pointer to struct @base_jit_alloc_info + * which is to be validated. + * @return: 0 if jit allocation is valid; negative error code otherwise + */ +int kbasep_jit_alloc_validate(struct kbase_context *kctx, + struct base_jit_alloc_info *info); + +/** + * kbase_jit_retry_pending_alloc() - Retry blocked just-in-time memory + * allocations. + * + * @kctx: Pointer to the kbase context within which the just-in-time + * memory allocations are to be retried. + */ +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx); + +/** + * kbase_free_user_buffer() - Free memory allocated for struct + * @kbase_debug_copy_buffer. + * + * @buffer: Pointer to the memory location allocated for the object + * of the type struct @kbase_debug_copy_buffer. + */ +static inline void kbase_free_user_buffer( + struct kbase_debug_copy_buffer *buffer) +{ + struct page **pages = buffer->extres_pages; + int nr_pages = buffer->nr_extres_pages; + + if (pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + struct page *pg = pages[i]; + + if (pg) + put_page(pg); + } + kfree(pages); + } +} + +/** + * kbase_mem_copy_from_extres() - Copy from external resources. + * + * @kctx: kbase context within which the copying is to take place. + * @buf_data: Pointer to the information about external resources: + * pages pertaining to the external resource, number of + * pages to copy. + */ +int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data); +int kbase_process_soft_job(struct kbase_jd_atom *katom); +int kbase_prepare_soft_job(struct kbase_jd_atom *katom); +void kbase_finish_soft_job(struct kbase_jd_atom *katom); +void kbase_cancel_soft_job(struct kbase_jd_atom *katom); +void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom); +#endif +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status); + +void kbasep_soft_job_timeout_worker(struct timer_list *timer); +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); + +void kbasep_as_do_poke(struct work_struct *work); + +/** + * Check whether a system suspend is in progress, or has already been suspended + * + * The caller should ensure that either kbdev->pm.active_count_lock is held, or + * a dmb was executed recently (to ensure the value is most + * up-to-date). However, without a lock the value could change afterwards. + * + * @return false if a suspend is not in progress + * @return !=false otherwise + */ +static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) +{ + return kbdev->pm.suspending; +} + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/* + * Check whether a gpu lost is in progress + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Indicates whether a gpu lost has been received and jobs are no longer + * being scheduled + * + * Return: false if gpu is lost + * Return: != false otherwise + */ +static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) +{ + return kbdev->pm.gpu_lost; +} +#endif + +/** + * kbase_pm_is_active - Determine whether the GPU is active + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This takes into account whether there is an active context reference. + * + * Return: true if the GPU is active, false otherwise + */ +static inline bool kbase_pm_is_active(struct kbase_device *kbdev) +{ + return kbdev->pm.active_count > 0; +} + +/** + * kbase_pm_metrics_start - Start the utilization metrics timer + * @kbdev: Pointer to the kbase device for which to start the utilization + * metrics calculation thread. + * + * Start the timer that drives the metrics calculation, runs the custom DVFS. + */ +void kbase_pm_metrics_start(struct kbase_device *kbdev); + +/** + * kbase_pm_metrics_stop - Stop the utilization metrics timer + * @kbdev: Pointer to the kbase device for which to stop the utilization + * metrics calculation thread. + * + * Stop the timer that drives the metrics calculation, runs the custom DVFS. + */ +void kbase_pm_metrics_stop(struct kbase_device *kbdev); + +/** + * Return the atom's ID, as was originally supplied by userspace in + * base_jd_atom_v2::atom_number + */ +static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int result; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->kctx == kctx); + + result = katom - &kctx->jctx.atoms[0]; + KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); + return result; +} + +/** + * kbase_jd_atom_from_id - Return the atom structure for the given atom ID + * @kctx: Context pointer + * @id: ID of atom to retrieve + * + * Return: Pointer to struct kbase_jd_atom associated with the supplied ID + */ +static inline struct kbase_jd_atom *kbase_jd_atom_from_id( + struct kbase_context *kctx, int id) +{ + return &kctx->jctx.atoms[id]; +} + +/** + * Initialize the disjoint state + * + * The disjoint event count and state are both set to zero. + * + * Disjoint functions usage: + * + * The disjoint event count should be incremented whenever a disjoint event occurs. + * + * There are several cases which are regarded as disjoint behavior. Rather than just increment + * the counter during disjoint events we also increment the counter when jobs may be affected + * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. + * + * Disjoint state is entered during GPU reset. Increasing the disjoint state also increases + * the count of disjoint events. + * + * The disjoint state is then used to increase the count of disjoint events during job submission + * and job completion. Any atom submitted or completed while the disjoint state is greater than + * zero is regarded as a disjoint event. + * + * The disjoint event counter is also incremented immediately whenever a job is soft stopped + * and during context creation. + * + * @param kbdev The kbase device + * + * Return: 0 on success and non-zero value on failure. + */ +void kbase_disjoint_init(struct kbase_device *kbdev); + +/** + * Increase the count of disjoint events + * called when a disjoint event has happened + * + * @param kbdev The kbase device + */ +void kbase_disjoint_event(struct kbase_device *kbdev); + +/** + * Increase the count of disjoint events only if the GPU is in a disjoint state + * + * This should be called when something happens which could be disjoint if the GPU + * is in a disjoint state. The state refcount keeps track of this. + * + * @param kbdev The kbase device + */ +void kbase_disjoint_event_potential(struct kbase_device *kbdev); + +/** + * Returns the count of disjoint events + * + * @param kbdev The kbase device + * @return the count of disjoint events + */ +u32 kbase_disjoint_event_get(struct kbase_device *kbdev); + +/** + * Increment the refcount state indicating that the GPU is in a disjoint state. + * + * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * eventually after the disjoint state has completed @ref kbase_disjoint_state_down + * should be called + * + * @param kbdev The kbase device + */ +void kbase_disjoint_state_up(struct kbase_device *kbdev); + +/** + * Decrement the refcount state + * + * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * + * Called after @ref kbase_disjoint_state_up once the disjoint state is over + * + * @param kbdev The kbase device + */ +void kbase_disjoint_state_down(struct kbase_device *kbdev); + +/** + * If a job is soft stopped and the number of contexts is >= this value + * it is reported as a disjoint event + */ +#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 + +#if !defined(UINT64_MAX) + #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/* kbase_io_history_init - initialize data struct for register access history + * + * @kbdev The register history to initialize + * @n The number of register accesses that the buffer could hold + * + * @return 0 if successfully initialized, failure otherwise + */ +int kbase_io_history_init(struct kbase_io_history *h, u16 n); + +/* kbase_io_history_term - uninit all resources for the register access history + * + * @h The register history to terminate + */ +void kbase_io_history_term(struct kbase_io_history *h); + +/* kbase_io_history_dump - print the register history to the kernel ring buffer + * + * @kbdev Pointer to kbase_device containing the register history to dump + */ +void kbase_io_history_dump(struct kbase_device *kbdev); + +/** + * kbase_io_history_resize - resize the register access history buffer. + * + * @h: Pointer to a valid register history to resize + * @new_size: Number of accesses the buffer could hold + * + * A successful resize will clear all recent register accesses. + * If resizing fails for any reason (e.g., could not allocate memory, invalid + * buffer size) then the original buffer will be kept intact. + * + * @return 0 if the buffer was resized, failure otherwise + */ +int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); + +#else /* CONFIG_DEBUG_FS */ + +#define kbase_io_history_init(...) ((int)0) + +#define kbase_io_history_term CSTD_NOP + +#define kbase_io_history_dump CSTD_NOP + +#define kbase_io_history_resize CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + +/*meson graphics start */ +extern int meson_gpu_data_invalid_count; +extern int meson_gpu_fault; +/*meson graphics end*/ +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c new file mode 100644 index 0000000..2e2e394 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c @@ -0,0 +1,113 @@ +/* + * + * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + +static int kbase_as_fault_read(struct seq_file *sfile, void *data) +{ + uintptr_t as_no = (uintptr_t) sfile->private; + + struct list_head *entry; + const struct list_head *kbdev_list; + struct kbase_device *kbdev = NULL; + + kbdev_list = kbase_device_get_list(); + + list_for_each(entry, kbdev_list) { + kbdev = list_entry(entry, struct kbase_device, entry); + + if (kbdev->debugfs_as_read_bitmap & (1ULL << as_no)) { + + /* don't show this one again until another fault occors */ + kbdev->debugfs_as_read_bitmap &= ~(1ULL << as_no); + + /* output the last page fault addr */ + seq_printf(sfile, "%llu\n", + (u64) kbdev->as[as_no].pf_data.addr); + } + + } + + kbase_device_put_list(kbdev_list); + + return 0; +} + +static int kbase_as_fault_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbase_as_fault_read, in->i_private); +} + +static const struct file_operations as_fault_fops = { + .owner = THIS_MODULE, + .open = kbase_as_fault_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ + +/* + * Initialize debugfs entry for each address space + */ +void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + uint i; + char as_name[64]; + struct dentry *debugfs_directory; + + kbdev->debugfs_as_read_bitmap = 0ULL; + + KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces); + KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64)); + + debugfs_directory = debugfs_create_dir("address_spaces", + kbdev->mali_debugfs_directory); + + if (debugfs_directory) { + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i); + debugfs_create_file(as_name, S_IRUGO, + debugfs_directory, + (void *)(uintptr_t)i, + &as_fault_fops); + } + } else { + dev_warn(kbdev->dev, + "unable to create address_spaces debugfs directory"); + } + +#endif /* CONFIG_MALI_DEBUG */ +#endif /* CONFIG_DEBUG_FS */ + return; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h new file mode 100644 index 0000000..496d8b1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h @@ -0,0 +1,50 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_AS_FAULT_DEBUG_FS_H +#define _KBASE_AS_FAULT_DEBUG_FS_H + +/** + * kbase_as_fault_debugfs_init() - Add debugfs files for reporting page faults + * + * @kbdev: Pointer to kbase_device + */ +void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_as_fault_debugfs_new() - make the last fault available on debugfs + * + * @kbdev: Pointer to kbase_device + * @as_no: The address space the fault occurred on + */ +static inline void +kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) +{ +#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_MALI_DEBUG + kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); +#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_MALI_DEBUG */ + return; +} + +#endif /*_KBASE_AS_FAULT_DEBUG_FS_H*/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h new file mode 100644 index 0000000..2c11093 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h @@ -0,0 +1,41 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + */ + +#ifndef _KBASE_BITS_H_ +#define _KBASE_BITS_H_ + +#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + +#endif /* _KBASE_BITS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c new file mode 100644 index 0000000..27a03cf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Cache Policy API. + */ + +#include "mali_kbase_cache_policy.h" + +/* + * The output flags should be a combination of the following values: + * KBASE_REG_CPU_CACHED: CPU cache should be enabled + * KBASE_REG_GPU_CACHED: GPU cache should be enabled + * + * NOTE: Some components within the GPU might only be able to access memory + * that is KBASE_REG_GPU_CACHED. Refer to the specific GPU implementation for + * more details. + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages) +{ + u32 cache_flags = 0; + + CSTD_UNUSED(nr_pages); + + if (!(flags & BASE_MEM_UNCACHED_GPU)) + cache_flags |= KBASE_REG_GPU_CACHED; + + if (flags & BASE_MEM_CACHED_CPU) + cache_flags |= KBASE_REG_CPU_CACHED; + + return cache_flags; +} + + +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_device(kbdev->dev, handle, size, dir); +} + + +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h new file mode 100644 index 0000000..8a1e529 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h @@ -0,0 +1,50 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Cache Policy API. + */ + +#ifndef _KBASE_CACHE_POLICY_H_ +#define _KBASE_CACHE_POLICY_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" + +/** + * kbase_cache_enabled - Choose the cache policy for a specific region + * @flags: flags describing attributes of the region + * @nr_pages: total number of pages (backed or not) for the region + * + * Tells whether the CPU and GPU caches should be enabled or not for a specific + * region. + * This function can be modified to customize the cache policy depending on the + * flags and size of the region. + * + * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED + * depending on the cache policy + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c new file mode 100644 index 0000000..ce7070d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c @@ -0,0 +1,48 @@ +/* + * + * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include + +int kbasep_platform_device_init(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_init_func) + return platform_funcs_p->platform_init_func(kbdev); + + return 0; +} + +void kbasep_platform_device_term(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_term_func) + platform_funcs_p->platform_term_func(kbdev); +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h new file mode 100644 index 0000000..69723ea --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -0,0 +1,311 @@ +/* + * + * (C) COPYRIGHT 2010-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_config.h + * Configuration API and Attributes for KBase + */ + +#ifndef _KBASE_CONFIG_H_ +#define _KBASE_CONFIG_H_ + +#include +#include +#include +#include + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_config Configuration API and Attributes + * @{ + */ + +/* Forward declaration of struct kbase_device */ +struct kbase_device; + +/** + * kbase_platform_funcs_conf - Specifies platform init/term function pointers + * + * Specifies the functions pointers for platform specific initialization and + * termination. By default no functions are required. No additional platform + * specific control is necessary. + */ +struct kbase_platform_funcs_conf { + /** + * platform_init_func - platform specific init function pointer + * @kbdev - kbase_device pointer + * + * Returns 0 on success, negative error code otherwise. + * + * Function pointer for platform specific initialization or NULL if no + * initialization function is required. At the point this the GPU is + * not active and its power and clocks are in unknown (platform specific + * state) as kbase doesn't yet have control of power and clocks. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly initialized) in here. + */ + int (*platform_init_func)(struct kbase_device *kbdev); + /** + * platform_term_func - platform specific termination function pointer + * @kbdev - kbase_device pointer + * + * Function pointer for platform specific termination or NULL if no + * termination function is required. At the point this the GPU will be + * idle but still powered and clocked. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly terminated) in here. + */ + void (*platform_term_func)(struct kbase_device *kbdev); +}; + +/* + * @brief Specifies the callbacks for power management + * + * By default no callbacks will be made and the GPU must not be powered off. + */ +struct kbase_pm_callback_conf { + /** Callback for when the GPU is idle and the power to it can be switched off. + * + * The system integrator can decide whether to either do nothing, just switch off + * the clocks to the GPU, or to completely power down the GPU. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + */ + void (*power_off_callback)(struct kbase_device *kbdev); + + /** Callback for when the GPU is about to become active and power must be supplied. + * + * This function must not return until the GPU is powered and clocked sufficiently for register access to + * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. + * If the GPU state has been lost then this function must return 1, otherwise it should return 0. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * + * The return value of the first call to this function is ignored. + * + * @return 1 if the GPU state may have been lost, 0 otherwise. + */ + int (*power_on_callback)(struct kbase_device *kbdev); + + /** Callback for when the system is requesting a suspend and GPU power + * must be switched off. + * + * Note that if this callback is present, then this may be called + * without a preceding call to power_off_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_off_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_suspend_callback)(struct kbase_device *kbdev); + + /** Callback for when the system is resuming from a suspend and GPU + * power must be switched on. + * + * Note that if this callback is present, then this may be called + * without a following call to power_on_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_on_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_resume_callback)(struct kbase_device *kbdev); + + /** Callback for handling runtime power management initialization. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * will become active from calls made to the OS from within this function. + * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else int error code. + */ + int (*power_runtime_init_callback)(struct kbase_device *kbdev); + + /** Callback for handling runtime power management termination. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * should no longer be called by the OS on completion of this function. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + void (*power_runtime_term_callback)(struct kbase_device *kbdev); + + /** Callback for runtime power-off power management callback + * + * For linux this callback will be called by the kernel runtime_suspend callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else OS error code. + */ + void (*power_runtime_off_callback)(struct kbase_device *kbdev); + + /** Callback for runtime power-on power management callback + * + * For linux this callback will be called by the kernel runtime_resume callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + int (*power_runtime_on_callback)(struct kbase_device *kbdev); + + /* + * Optional callback for checking if GPU can be suspended when idle + * + * This callback will be called by the runtime power management core + * when the reference count goes to 0 to provide notification that the + * GPU now seems idle. + * + * If this callback finds that the GPU can't be powered off, or handles + * suspend by powering off directly or queueing up a power off, a + * non-zero value must be returned to prevent the runtime PM core from + * also triggering a suspend. + * + * Returning 0 will cause the runtime PM core to conduct a regular + * autosuspend. + * + * This callback is optional and if not provided regular autosuspend + * will be triggered. + * + * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use + * this feature. + * + * Return 0 if GPU can be suspended, positive value if it can not be + * suspeneded by runtime PM, else OS error code + */ + int (*power_runtime_idle_callback)(struct kbase_device *kbdev); + + /* + * Optional callback for software reset + * + * This callback will be called by the power management core to trigger + * a GPU soft reset. + * + * Return 0 if the soft reset was successful and the RESET_COMPLETED + * interrupt will be raised, or a positive value if the interrupt won't + * be raised. On error, return the corresponding OS error code. + */ + int (*soft_reset_callback)(struct kbase_device *kbdev); +}; + +#ifdef CONFIG_OF +struct kbase_platform_config { +}; +#else + +/* + * @brief Specifies start and end of I/O memory region. + */ +struct kbase_io_memory_region { + u64 start; + u64 end; +}; + +/* + * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. + */ +struct kbase_io_resources { + u32 job_irq_number; + u32 mmu_irq_number; + u32 gpu_irq_number; + struct kbase_io_memory_region io_memory_region; +}; + +struct kbase_platform_config { + const struct kbase_io_resources *io_resources; +}; + +#endif /* CONFIG_OF */ + +/** + * @brief Gets the pointer to platform config. + * + * @return Pointer to the platform config + */ +struct kbase_platform_config *kbase_get_platform_config(void); + +/** + * kbasep_platform_device_init: - Platform specific call to initialize hardware + * @kbdev: kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can initialize any hardware and context state that + * is required for the GPU block to function. + * + * Return: 0 if no errors have been found in the config. + * Negative error code otherwise. + */ +int kbasep_platform_device_init(struct kbase_device *kbdev); + +/** + * kbasep_platform_device_term - Platform specific call to terminate hardware + * @kbdev: Kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can destroy any platform specific context state and + * shut down any hardware functionality that are outside of the Power Management + * callbacks. + * + */ +void kbasep_platform_device_term(struct kbase_device *kbdev); + +#ifndef CONFIG_OF +/** + * kbase_platform_register - Register a platform device for the GPU + * + * This can be used to register a platform device on systems where device tree + * is not enabled and the platform initialisation code in the kernel doesn't + * create the GPU device. Where possible device tree should be used instead. + * + * Return: 0 for success, any other fail causes module initialisation to fail + */ +int kbase_platform_register(void); + +/** + * kbase_platform_unregister - Unregister a fake platform device + * + * Unregister the platform device created with kbase_platform_register() + */ +void kbase_platform_unregister(void); +#endif + + /** @} *//* end group kbase_config */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_CONFIG_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h new file mode 100644 index 0000000..e079281 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -0,0 +1,213 @@ +/* + * + * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_config_defaults.h + * + * Default values for configuration settings + * + */ + +#ifndef _KBASE_CONFIG_DEFAULTS_H_ +#define _KBASE_CONFIG_DEFAULTS_H_ + +/* Include mandatory definitions per platform */ +#include + +enum { + /** + * Use unrestricted Address ID width on the AXI bus. + */ + KBASE_AID_32 = 0x0, + + /** + * Restrict GPU to a half of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_16 = 0x3, + + /** + * Restrict GPU to a quarter of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_8 = 0x2, + + /** + * Restrict GPU to an eighth of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_4 = 0x1 +}; + +enum { + /** + * Use unrestricted Address ID width on the AXI bus. + * Restricting ID width will reduce performance & bus load due to GPU. + */ + KBASE_3BIT_AID_32 = 0x0, + + /* Restrict GPU to 7/8 of maximum Address ID count. */ + KBASE_3BIT_AID_28 = 0x1, + + /* Restrict GPU to 3/4 of maximum Address ID count. */ + KBASE_3BIT_AID_24 = 0x2, + + /* Restrict GPU to 5/8 of maximum Address ID count. */ + KBASE_3BIT_AID_20 = 0x3, + + /* Restrict GPU to 1/2 of maximum Address ID count. */ + KBASE_3BIT_AID_16 = 0x4, + + /* Restrict GPU to 3/8 of maximum Address ID count. */ + KBASE_3BIT_AID_12 = 0x5, + + /* Restrict GPU to 1/4 of maximum Address ID count. */ + KBASE_3BIT_AID_8 = 0x6, + + /* Restrict GPU to 1/8 of maximum Address ID count. */ + KBASE_3BIT_AID_4 = 0x7 +}; + +/** + * Default period for DVFS sampling + */ +#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ + +/** + * Power Management poweroff tick granuality. This is in nanoseconds to + * allow HR timer support. + * + * On each scheduling tick, the power manager core may decide to: + * -# Power off one or more shader cores + * -# Power off the entire GPU + */ +#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ + +/** + * Power Manager number of ticks before shader cores are powered off + */ +#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ + +/** + * Default scheduling tick granuality + */ +#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ + +/** + * Default minimum number of scheduling ticks before jobs are soft-stopped. + * + * This defines the time-slice for a job (which may be different from that of a + * context) + */ +#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ + +/** + * Default minimum number of scheduling ticks before CL jobs are soft-stopped. + */ +#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ + +/** + * Default minimum number of scheduling ticks before jobs are hard-stopped + */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ + +/** + * Default minimum number of scheduling ticks before CL jobs are hard-stopped. + */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ + +/** + * Default minimum number of scheduling ticks before jobs are hard-stopped + * during dumping + */ +#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ + +/** + * Default timeout for some software jobs, after which the software event wait + * jobs will be cancelled. + */ +#define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */ + +/** + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" job + */ +#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ + +/** + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" CL job. + */ +#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ + +/** + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" job during dumping. + */ +#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ + +/** + * Default number of milliseconds given for other jobs on the GPU to be + * soft-stopped when the GPU needs to be reset. + */ +#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ + +/** + * Default timeslice that a context is scheduled in for, in nanoseconds. + * + * When a context has used up this amount of time across its jobs, it is + * scheduled out to let another run. + * + * @note the resolution is nanoseconds (ns) here, because that's the format + * often used by the OS. + */ +#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ + +/** + * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms + * this isn't available, so we simply define a dummy value here. If devfreq + * is enabled the value will be read from there, otherwise this should be + * overridden by defining GPU_FREQ_KHZ_MAX in the platform file. + */ +#define DEFAULT_GPU_FREQ_KHZ_MAX (5000) + +/** + * Default timeout for task execution on an endpoint + * + * Number of GPU clock cycles before the driver terminates a task that is + * making no forward progress on an endpoint (e.g. shader core). + * Value chosen is equivalent to the time after which a job is hard stopped + * which is 5 seconds (assuming the GPU is usually clocked at ~500 MHZ). + */ +#define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024) + +/** + * Default threshold at which to switch to incremental rendering + * + * Fraction of the maximum size of an allocation that grows on GPU page fault + * that can be used up before the driver switches to incremental rendering, + * in 256ths. 0 means disable incremental rendering. + */ +#define DEFAULT_IR_THRESHOLD (192) + +#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c new file mode 100644 index 0000000..fd5b8f0 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -0,0 +1,4548 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_MALI_DEVFREQ +#include +#include +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* CONFIG_MALI_DEVFREQ */ +#ifdef CONFIG_MALI_NO_MALI +#include "mali_kbase_model_linux.h" +#include +#endif /* CONFIG_MALI_NO_MALI */ +#include "mali_kbase_mem_profile_debugfs_buf_size.h" +#include "mali_kbase_debug_mem_view.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_debugfs_helper.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_regs_dump_debugfs.h" +#endif /* !MALI_CUSTOMER_RELEASE */ +#include "mali_kbase_regs_history_debugfs.h" +#include +#include +#include +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#include +#endif +#include +#include +#include +#include "mali_kbase_ioctl.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_vinstr.h" +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include "arbiter/mali_kbase_arbiter_pm.h" +#endif + +#include "mali_kbase_cs_experimental.h" + +#ifdef CONFIG_MALI_CINSTR_GWT +#include "mali_kbase_gwt.h" +#endif +#include "mali_kbase_pm_internal.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* is_compat_task/in_compat_syscall */ +#include +#include +#include +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#include +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#include +#include +#include +#include + +#include + + +#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif + +#include + +#include + +#include +#include +#include + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" + +/** + * kbase_file_new - Create an object representing a device file + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * + * In its initial state, the device file has no context (i.e. no GPU + * address space) and no API version number. Both must be assigned before + * kbase_file_get_kctx_if_setup_complete() can be used successfully. + * + * @return Address of an object representing a simulated device file, or NULL + * on failure. + */ +static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, + struct file *const filp) +{ + struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL); + + if (kfile) { + kfile->kbdev = kbdev; + kfile->filp = filp; + kfile->kctx = NULL; + kfile->api_version = 0; + atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); + } + return kfile; +} + +/** + * kbase_file_get_api_version - Set the application programmer interface version + * + * @kfile: A device file created by kbase_file_new() + * @major: Major version number (must not exceed 12 bits) + * @minor: Major version number (must not exceed 12 bits) + * + * An application programmer interface (API) version must be specified + * before calling kbase_file_create_kctx(), otherwise an error is returned. + * + * If a version number was already set for the given @kfile (or is in the + * process of being set by another thread) then an error is returned. + * + * Return: 0 if successful, otherwise a negative error code. + */ +static int kbase_file_set_api_version(struct kbase_file *const kfile, + u16 const major, u16 const minor) +{ + if (WARN_ON(!kfile)) + return -EINVAL; + + /* setup pending, try to signal that we'll do the setup, + * if setup was already in progress, err this call + */ + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN, + KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN) + return -EPERM; + + /* save the proposed version number for later use */ + kfile->api_version = KBASE_API_VERSION(major, minor); + + atomic_set(&kfile->setup_state, KBASE_FILE_NEED_CTX); + return 0; +} + +/** + * kbase_file_get_api_version - Get the application programmer interface version + * + * @kfile: A device file created by kbase_file_new() + * + * Return: The version number (encoded with KBASE_API_VERSION) or 0 if none has + * been set. + */ +static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile) +{ + if (WARN_ON(!kfile)) + return 0; + + if (atomic_read(&kfile->setup_state) < KBASE_FILE_NEED_CTX) + return 0; + + return kfile->api_version; +} + +/** + * kbase_file_create_kctx - Create a kernel base context + * + * @kfile: A device file created by kbase_file_new() + * @flags: Flags to set, which can be any combination of + * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. + * + * This creates a new context for the GPU platform device instance that was + * specified when kbase_file_new() was called. Each context has its own GPU + * address space. If a context was already created for the given @kfile (or is + * in the process of being created for it by another thread) then an error is + * returned. + * + * An API version number must have been set by kbase_file_set_api_version() + * before calling this function, otherwise an error is returned. + * + * Return: 0 if a new context was created, otherwise a negative error code. + */ +static int kbase_file_create_kctx(struct kbase_file *kfile, + base_context_create_flags flags); + +/** + * kbase_file_get_kctx_if_setup_complete - Get a kernel base context + * pointer from a device file + * + * @kfile: A device file created by kbase_file_new() + * + * This function returns an error code (encoded with ERR_PTR) if no context + * has been created for the given @kfile. This makes it safe to use in + * circumstances where the order of initialization cannot be enforced, but + * only if the caller checks the return value. + * + * Return: Address of the kernel base context associated with the @kfile, or + * NULL if no context exists. + */ +static struct kbase_context *kbase_file_get_kctx_if_setup_complete( + struct kbase_file *const kfile) +{ + if (WARN_ON(!kfile) || + atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE || + WARN_ON(!kfile->kctx)) + return NULL; + + return kfile->kctx; +} + +/** + * kbase_file_delete - Destroy an object representing a device file + * + * @kfile: A device file created by kbase_file_new() + * + * If any context was created for the @kfile then it is destroyed. + */ +static void kbase_file_delete(struct kbase_file *const kfile) +{ + struct kbase_device *kbdev = NULL; + + if (WARN_ON(!kfile)) + return; + + kfile->filp->private_data = NULL; + kbdev = kfile->kbdev; + + if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { + struct kbase_context *kctx = kfile->kctx; + +#ifdef CONFIG_DEBUG_FS + kbasep_mem_profile_debugfs_remove(kctx); +#endif + + mutex_lock(&kctx->legacy_hwcnt_lock); + /* If this client was performing hardware counter dumping and + * did not explicitly detach itself, destroy it now + */ + kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + mutex_unlock(&kctx->legacy_hwcnt_lock); + + kbase_context_debugfs_term(kctx); + + kbase_destroy_context(kctx); + + dev_dbg(kbdev->dev, "deleted base context\n"); + } + + kbase_release_device(kbdev); + + kfree(kfile); +} + +static int kbase_api_handshake(struct kbase_file *kfile, + struct kbase_ioctl_version_check *version) +{ + int err = 0; + + switch (version->major) { + case BASE_UK_VERSION_MAJOR: + /* set minor to be the lowest common */ + version->minor = min_t(int, BASE_UK_VERSION_MINOR, + (int)version->minor); + break; + default: + /* We return our actual version regardless if it + * matches the version returned by userspace - + * userspace can bail if it can't handle this + * version + */ + version->major = BASE_UK_VERSION_MAJOR; + version->minor = BASE_UK_VERSION_MINOR; + break; + } + + /* save the proposed version number for later use */ + err = kbase_file_set_api_version(kfile, version->major, version->minor); + if (unlikely(err)) + return err; + + /* For backward compatibility, we may need to create the context before + * the flags have been set. Originally it was created on file open + * (with job submission disabled) but we don't support that usage. + */ + if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15)) + err = kbase_file_create_kctx(kfile, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); + + return err; +} + +/** + * enum mali_error - Mali error codes shared with userspace + * + * This is subset of those common Mali errors that can be returned to userspace. + * Values of matching user and kernel space enumerators MUST be the same. + * MALI_ERROR_NONE is guaranteed to be 0. + * + * @MALI_ERROR_NONE: Success + * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver + * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure + * @MALI_ERROR_FUNCTION_FAILED: Generic error code + */ +enum mali_error { + MALI_ERROR_NONE = 0, + MALI_ERROR_OUT_OF_GPU_MEMORY, + MALI_ERROR_OUT_OF_MEMORY, + MALI_ERROR_FUNCTION_FAILED, +}; + +static struct kbase_device *to_kbase_device(struct device *dev) +{ + return dev_get_drvdata(dev); +} + +int assign_irqs(struct kbase_device *kbdev) +{ + struct platform_device *pdev; + int i; + + if (!kbdev) + return -ENODEV; + + pdev = to_platform_device(kbdev->dev); + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + return -ENOENT; + } + +#ifdef CONFIG_OF + if (!strncasecmp(irq_res->name, "JOB", 4)) { + irqtag = JOB_IRQ_TAG; + } else if (!strncasecmp(irq_res->name, "MMU", 4)) { + irqtag = MMU_IRQ_TAG; + } else if (!strncasecmp(irq_res->name, "GPU", 4)) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + return -EINVAL; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; + } + + return 0; +} + +/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ +struct kbase_device *kbase_find_device(int minor) +{ + struct kbase_device *kbdev = NULL; + struct list_head *entry; + const struct list_head *dev_list = kbase_device_get_list(); + + list_for_each(entry, dev_list) { + struct kbase_device *tmp; + + tmp = list_entry(entry, struct kbase_device, entry); + if (tmp->mdev.minor == minor || minor == -1) { + kbdev = tmp; + get_device(kbdev->dev); + break; + } + } + kbase_device_put_list(dev_list); + + return kbdev; +} +EXPORT_SYMBOL(kbase_find_device); + +void kbase_release_device(struct kbase_device *kbdev) +{ + put_device(kbdev->dev); +} +EXPORT_SYMBOL(kbase_release_device); + +#ifdef CONFIG_DEBUG_FS +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \ + !(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)) +/* + * Older versions, before v4.6, of the kernel doesn't have + * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 + */ +static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + char buf[4]; + + count = min(count, sizeof(buf) - 1); + + if (copy_from_user(buf, s, count)) + return -EFAULT; + buf[count] = '\0'; + + return strtobool(buf, res); +} +#endif + +static ssize_t write_ctx_infinite_cache(struct file *f, const char __user *ubuf, size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + int err; + bool value; + + err = kstrtobool_from_user(ubuf, size, &value); + if (err) + return err; + + if (value) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + else + kbase_ctx_flag_clear(kctx, KCTX_INFINITE_CACHE); + + return size; +} + +static ssize_t read_ctx_infinite_cache(struct file *f, char __user *ubuf, size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + char buf[32]; + int count; + bool value; + + value = kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE); + + count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + + return simple_read_from_buffer(ubuf, size, off, buf, count); +} + +static const struct file_operations kbase_infinite_cache_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = write_ctx_infinite_cache, + .read = read_ctx_infinite_cache, +}; + +static ssize_t write_ctx_force_same_va(struct file *f, const char __user *ubuf, + size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + int err; + bool value; + + err = kstrtobool_from_user(ubuf, size, &value); + if (err) + return err; + + if (value) { +#if defined(CONFIG_64BIT) + /* 32-bit clients cannot force SAME_VA */ + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + return -EINVAL; + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +#else /* defined(CONFIG_64BIT) */ + /* 32-bit clients cannot force SAME_VA */ + return -EINVAL; +#endif /* defined(CONFIG_64BIT) */ + } else { + kbase_ctx_flag_clear(kctx, KCTX_FORCE_SAME_VA); + } + + return size; +} + +static ssize_t read_ctx_force_same_va(struct file *f, char __user *ubuf, + size_t size, loff_t *off) +{ + struct kbase_context *kctx = f->private_data; + char buf[32]; + int count; + bool value; + + value = kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA); + + count = scnprintf(buf, sizeof(buf), "%s\n", value ? "Y" : "N"); + + return simple_read_from_buffer(ubuf, size, off, buf, count); +} + +static const struct file_operations kbase_force_same_va_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = write_ctx_force_same_va, + .read = read_ctx_force_same_va, +}; +#endif /* CONFIG_DEBUG_FS */ + +static int kbase_file_create_kctx(struct kbase_file *const kfile, + base_context_create_flags const flags) +{ + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx = NULL; +#ifdef CONFIG_DEBUG_FS + char kctx_name[64]; +#endif + + if (WARN_ON(!kfile)) + return -EINVAL; + + /* setup pending, try to signal that we'll do the setup, + * if setup was already in progress, err this call + */ + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX, + KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX) + return -EPERM; + + kbdev = kfile->kbdev; + +#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) + kctx = kbase_create_context(kbdev, in_compat_syscall(), + flags, kfile->api_version, kfile->filp); +#else + kctx = kbase_create_context(kbdev, is_compat_task(), + flags, kfile->api_version, kfile->filp); +#endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */ + + /* if bad flags, will stay stuck in setup mode */ + if (!kctx) + return -ENOMEM; + + if (kbdev->infinite_cache_active_default) + kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); + +#ifdef CONFIG_DEBUG_FS + snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + kctx->kctx_dentry = debugfs_create_dir(kctx_name, + kbdev->debugfs_ctx_directory); + + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { + /* we don't treat this as a fail - just warn about it */ + dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n"); + } else { +#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) + /* prevent unprivileged use of debug file system + * in old kernel version + */ + debugfs_create_file("infinite_cache", 0600, kctx->kctx_dentry, + kctx, &kbase_infinite_cache_fops); +#else + debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry, + kctx, &kbase_infinite_cache_fops); +#endif + debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, + kctx, &kbase_force_same_va_fops); + + mutex_init(&kctx->mem_profile_lock); + + kbase_context_debugfs_init(kctx); + } +#endif /* CONFIG_DEBUG_FS */ + + dev_dbg(kbdev->dev, "created base context\n"); + + kfile->kctx = kctx; + atomic_set(&kfile->setup_state, KBASE_FILE_COMPLETE); + + return 0; +} +static int kbase_open(struct inode *inode, struct file *filp) +{ + struct kbase_device *kbdev = NULL; + struct kbase_file *kfile; + int ret = 0; + + kbdev = kbase_find_device(iminor(inode)); + + if (!kbdev) + return -ENODEV; + + kfile = kbase_file_new(kbdev, filp); + if (!kfile) { + ret = -ENOMEM; + goto out; + } + + filp->private_data = kfile; + filp->f_mode |= FMODE_UNSIGNED_OFFSET; + + return 0; + + out: + kbase_release_device(kbdev); + return ret; +} + +static int kbase_release(struct inode *inode, struct file *filp) +{ + struct kbase_file *const kfile = filp->private_data; + + kbase_file_delete(kfile); + return 0; +} + +static int kbase_api_set_flags(struct kbase_file *kfile, + struct kbase_ioctl_set_flags *flags) +{ + int err = 0; + unsigned long const api_version = kbase_file_get_api_version(kfile); + struct kbase_context *kctx = NULL; + + /* Validate flags */ + if (flags->create_flags != + (flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)) + return -EINVAL; + + /* For backward compatibility, the context may have been created before + * the flags were set. + */ + if (api_version >= KBASE_API_VERSION(11, 15)) { + err = kbase_file_create_kctx(kfile, flags->create_flags); + } else { + struct kbasep_js_kctx_info *js_kctx_info = NULL; + unsigned long irq_flags = 0; + + /* If setup is incomplete (e.g. because the API version + * wasn't set) then we have to give up. + */ + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) + return -EPERM; + + js_kctx_info = &kctx->jctx.sched_info; + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + /* Translate the flags */ + if ((flags->create_flags & + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED); + + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } + + return err; +} + +static int kbase_api_job_submit(struct kbase_context *kctx, + struct kbase_ioctl_job_submit *submit) +{ + return kbase_jd_submit(kctx, u64_to_user_ptr(submit->addr), + submit->nr_atoms, + submit->stride, false); +} + +static int kbase_api_get_gpuprops(struct kbase_context *kctx, + struct kbase_ioctl_get_gpuprops *get_props) +{ + struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props; + int err; + + if (get_props->flags != 0) { + dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops"); + return -EINVAL; + } + + if (get_props->size == 0) + return kprops->prop_buffer_size; + if (get_props->size < kprops->prop_buffer_size) + return -EINVAL; + + err = copy_to_user(u64_to_user_ptr(get_props->buffer), + kprops->prop_buffer, + kprops->prop_buffer_size); + if (err) + return -EFAULT; + return kprops->prop_buffer_size; +} + +static int kbase_api_post_term(struct kbase_context *kctx) +{ + kbase_event_close(kctx); + return 0; +} + +static int kbase_api_mem_alloc(struct kbase_context *kctx, + union kbase_ioctl_mem_alloc *alloc) +{ + struct kbase_va_region *reg; + u64 flags = alloc->in.flags; + u64 gpu_va; + + rcu_read_lock(); + /* Don't allow memory allocation until user space has set up the + * tracking page (which sets kctx->process_mm). Also catches when we've + * forked. + */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + rcu_read_unlock(); + return -EINVAL; + } + rcu_read_unlock(); + + if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + /* Force SAME_VA if a 64-bit client. + * The only exception is GPU-executable memory if an EXEC_VA zone + * has been initialized. In that case, GPU-executable memory may + * or may not be SAME_VA. + */ + if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && + kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) { + if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx)) + flags |= BASE_MEM_SAME_VA; + } + + + reg = kbase_mem_alloc(kctx, alloc->in.va_pages, + alloc->in.commit_pages, + alloc->in.extent, + &flags, &gpu_va); + + if (!reg) + return -ENOMEM; + + alloc->out.flags = flags; + alloc->out.gpu_va = gpu_va; + + return 0; +} + +static int kbase_api_mem_query(struct kbase_context *kctx, + union kbase_ioctl_mem_query *query) +{ + return kbase_mem_query(kctx, query->in.gpu_addr, + query->in.query, &query->out.value); +} + +static int kbase_api_mem_free(struct kbase_context *kctx, + struct kbase_ioctl_mem_free *free) +{ + return kbase_mem_free(kctx, free->gpu_addr); +} + +static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) +{ + return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup); +} + +static int kbase_api_hwcnt_enable(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_enable *enable) +{ + int ret; + + mutex_lock(&kctx->legacy_hwcnt_lock); + if (enable->dump_buffer != 0) { + /* Non-zero dump buffer, so user wants to create the client */ + if (kctx->legacy_hwcnt_cli == NULL) { + ret = kbase_hwcnt_legacy_client_create( + kctx->kbdev->hwcnt_gpu_virt, + enable, + &kctx->legacy_hwcnt_cli); + } else { + /* This context already has a client */ + ret = -EBUSY; + } + } else { + /* Zero dump buffer, so user wants to destroy the client */ + if (kctx->legacy_hwcnt_cli != NULL) { + kbase_hwcnt_legacy_client_destroy( + kctx->legacy_hwcnt_cli); + kctx->legacy_hwcnt_cli = NULL; + ret = 0; + } else { + /* This context has no client to destroy */ + ret = -EINVAL; + } + } + mutex_unlock(&kctx->legacy_hwcnt_lock); + + return ret; +} + +static int kbase_api_hwcnt_dump(struct kbase_context *kctx) +{ + int ret; + + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); + + return ret; +} + +static int kbase_api_hwcnt_clear(struct kbase_context *kctx) +{ + int ret; + + mutex_lock(&kctx->legacy_hwcnt_lock); + ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli); + mutex_unlock(&kctx->legacy_hwcnt_lock); + + return ret; +} + +static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, + union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo) +{ + u32 flags = timeinfo->in.request_flags; + struct timespec64 ts; + u64 timestamp; + u64 cycle_cnt; + + kbase_pm_context_active(kctx->kbdev); + + kbase_backend_get_gpu_time(kctx->kbdev, + (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL, + (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? ×tamp : NULL, + (flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL); + + if (flags & BASE_TIMEINFO_TIMESTAMP_FLAG) + timeinfo->out.timestamp = timestamp; + + if (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) + timeinfo->out.cycle_counter = cycle_cnt; + + if (flags & BASE_TIMEINFO_MONOTONIC_FLAG) { + timeinfo->out.sec = ts.tv_sec; + timeinfo->out.nsec = ts.tv_nsec; + } + + kbase_pm_context_idle(kctx->kbdev); + + return 0; +} + +#ifdef CONFIG_MALI_NO_MALI +static int kbase_api_hwcnt_set(struct kbase_context *kctx, + struct kbase_ioctl_hwcnt_values *values) +{ + gpu_model_set_dummy_prfcnt_sample( + (u32 __user *)(uintptr_t)values->data, + values->size); + + return 0; +} +#endif + +static int kbase_api_disjoint_query(struct kbase_context *kctx, + struct kbase_ioctl_disjoint_query *query) +{ + query->counter = kbase_disjoint_event_get(kctx->kbdev); + + return 0; +} + +static int kbase_api_get_ddk_version(struct kbase_context *kctx, + struct kbase_ioctl_get_ddk_version *version) +{ + int ret; + int len = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + + if (version->version_buffer == 0) + return len; + + if (version->size < len) + return -EOVERFLOW; + + ret = copy_to_user(u64_to_user_ptr(version->version_buffer), + KERNEL_SIDE_DDK_VERSION_STRING, + sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + + if (ret) + return -EFAULT; + + return len; +} + +/* Defaults for legacy just-in-time memory allocator initialization + * kernel calls + */ +#define DEFAULT_MAX_JIT_ALLOCATIONS 255 +#define JIT_LEGACY_TRIM_LEVEL (0) /* No trimming */ + +static int kbase_api_mem_jit_init_10_2(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_10_2 *jit_init) +{ + kctx->jit_version = 1; + + /* since no phys_pages parameter, use the maximum: va_pages */ + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + DEFAULT_MAX_JIT_ALLOCATIONS, + JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT, + jit_init->va_pages); +} + +static int kbase_api_mem_jit_init_11_5(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init_11_5 *jit_init) +{ + int i; + + kctx->jit_version = 2; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + /* since no phys_pages parameter, use the maximum: va_pages */ + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level, + jit_init->group_id, jit_init->va_pages); +} + +static int kbase_api_mem_jit_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_jit_init *jit_init) +{ + int i; + + kctx->jit_version = 3; + + for (i = 0; i < sizeof(jit_init->padding); i++) { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + if (jit_init->padding[i]) + return -EINVAL; + } + + return kbase_region_tracker_init_jit(kctx, jit_init->va_pages, + jit_init->max_allocations, jit_init->trim_level, + jit_init->group_id, jit_init->phys_pages); +} + +static int kbase_api_mem_exec_init(struct kbase_context *kctx, + struct kbase_ioctl_mem_exec_init *exec_init) +{ + return kbase_region_tracker_init_exec(kctx, exec_init->va_pages); +} + +static int kbase_api_mem_sync(struct kbase_context *kctx, + struct kbase_ioctl_mem_sync *sync) +{ + struct basep_syncset sset = { + .mem_handle.basep.handle = sync->handle, + .user_addr = sync->user_addr, + .size = sync->size, + .type = sync->type + }; + + return kbase_sync_now(kctx, &sset); +} + +static int kbase_api_mem_find_cpu_offset(struct kbase_context *kctx, + union kbase_ioctl_mem_find_cpu_offset *find) +{ + return kbasep_find_enclosing_cpu_mapping_offset( + kctx, + find->in.cpu_addr, + find->in.size, + &find->out.offset); +} + +static int kbase_api_mem_find_gpu_start_and_offset(struct kbase_context *kctx, + union kbase_ioctl_mem_find_gpu_start_and_offset *find) +{ + return kbasep_find_enclosing_gpu_mapping_start_and_offset( + kctx, + find->in.gpu_addr, + find->in.size, + &find->out.start, + &find->out.offset); +} + +static int kbase_api_get_context_id(struct kbase_context *kctx, + struct kbase_ioctl_get_context_id *info) +{ + info->id = kctx->id; + + return 0; +} + +static int kbase_api_tlstream_acquire(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_acquire *acquire) +{ + return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags); +} + +static int kbase_api_tlstream_flush(struct kbase_context *kctx) +{ + kbase_timeline_streams_flush(kctx->kbdev->timeline); + + return 0; +} + +static int kbase_api_mem_commit(struct kbase_context *kctx, + struct kbase_ioctl_mem_commit *commit) +{ + return kbase_mem_commit(kctx, commit->gpu_addr, commit->pages); +} + +static int kbase_api_mem_alias(struct kbase_context *kctx, + union kbase_ioctl_mem_alias *alias) +{ + struct base_mem_aliasing_info *ai; + u64 flags; + int err; + + if (alias->in.nents == 0 || alias->in.nents > 2048) + return -EINVAL; + + if (alias->in.stride > (U64_MAX / 2048)) + return -EINVAL; + + ai = vmalloc(sizeof(*ai) * alias->in.nents); + if (!ai) + return -ENOMEM; + + err = copy_from_user(ai, + u64_to_user_ptr(alias->in.aliasing_info), + sizeof(*ai) * alias->in.nents); + if (err) { + vfree(ai); + return -EFAULT; + } + + flags = alias->in.flags; + if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) { + vfree(ai); + return -EINVAL; + } + + alias->out.gpu_va = kbase_mem_alias(kctx, &flags, + alias->in.stride, alias->in.nents, + ai, &alias->out.va_pages); + + alias->out.flags = flags; + + vfree(ai); + + if (alias->out.gpu_va == 0) + return -ENOMEM; + + return 0; +} + +static int kbase_api_mem_import(struct kbase_context *kctx, + union kbase_ioctl_mem_import *import) +{ + int ret; + u64 flags = import->in.flags; + + if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + ret = kbase_mem_import(kctx, + import->in.type, + u64_to_user_ptr(import->in.phandle), + import->in.padding, + &import->out.gpu_va, + &import->out.va_pages, + &flags); + + import->out.flags = flags; + + return ret; +} + +static int kbase_api_mem_flags_change(struct kbase_context *kctx, + struct kbase_ioctl_mem_flags_change *change) +{ + if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY) + return -ENOMEM; + + return kbase_mem_flags_change(kctx, change->gpu_va, + change->flags, change->mask); +} + +static int kbase_api_stream_create(struct kbase_context *kctx, + struct kbase_ioctl_stream_create *stream) +{ +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + int fd, ret; + + /* Name must be NULL-terminated and padded with NULLs, so check last + * character is NULL + */ + if (stream->name[sizeof(stream->name)-1] != 0) + return -EINVAL; + + ret = kbase_sync_fence_stream_create(stream->name, &fd); + + if (ret) + return ret; + return fd; +#else + return -ENOENT; +#endif +} + +static int kbase_api_fence_validate(struct kbase_context *kctx, + struct kbase_ioctl_fence_validate *validate) +{ +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + return kbase_sync_fence_validate(validate->fd); +#else + return -ENOENT; +#endif +} + +static int kbase_api_mem_profile_add(struct kbase_context *kctx, + struct kbase_ioctl_mem_profile_add *data) +{ + char *buf; + int err; + + if (data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { + dev_err(kctx->kbdev->dev, "mem_profile_add: buffer too big\n"); + return -EINVAL; + } + + buf = kmalloc(data->len, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(buf)) + return -ENOMEM; + + err = copy_from_user(buf, u64_to_user_ptr(data->buffer), + data->len); + if (err) { + kfree(buf); + return -EFAULT; + } + + return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); +} + +static int kbase_api_soft_event_update(struct kbase_context *kctx, + struct kbase_ioctl_soft_event_update *update) +{ + if (update->flags != 0) + return -EINVAL; + + return kbase_soft_event_update(kctx, update->event, update->new_status); +} + +static int kbase_api_sticky_resource_map(struct kbase_context *kctx, + struct kbase_ioctl_sticky_resource_map *map) +{ + int ret; + u64 i; + u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; + + if (!map->count || map->count > BASE_EXT_RES_COUNT_MAX) + return -EOVERFLOW; + + ret = copy_from_user(gpu_addr, u64_to_user_ptr(map->address), + sizeof(u64) * map->count); + + if (ret != 0) + return -EFAULT; + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < map->count; i++) { + if (!kbase_sticky_resource_acquire(kctx, gpu_addr[i])) { + /* Invalid resource */ + ret = -EINVAL; + break; + } + } + + if (ret != 0) { + while (i > 0) { + i--; + kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i]); + } + } + + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, + struct kbase_ioctl_sticky_resource_unmap *unmap) +{ + int ret; + u64 i; + u64 gpu_addr[BASE_EXT_RES_COUNT_MAX]; + + if (!unmap->count || unmap->count > BASE_EXT_RES_COUNT_MAX) + return -EOVERFLOW; + + ret = copy_from_user(gpu_addr, u64_to_user_ptr(unmap->address), + sizeof(u64) * unmap->count); + + if (ret != 0) + return -EFAULT; + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < unmap->count; i++) { + if (!kbase_sticky_resource_release_force(kctx, NULL, gpu_addr[i])) { + /* Invalid resource, but we keep going anyway */ + ret = -EINVAL; + } + } + + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +#if MALI_UNIT_TEST +static int kbase_api_tlstream_test(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_test *test) +{ + kbase_timeline_test( + kctx->kbdev, + test->tpw_count, + test->msg_delay, + test->msg_count, + test->aux_msg); + + return 0; +} + +static int kbase_api_tlstream_stats(struct kbase_context *kctx, + struct kbase_ioctl_tlstream_stats *stats) +{ + kbase_timeline_stats(kctx->kbdev->timeline, + &stats->bytes_collected, + &stats->bytes_generated); + + return 0; +} +#endif /* MALI_UNIT_TEST */ + + +#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ + do { \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ + return function(arg); \ + } while (0) + +#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ + do { \ + type param; \ + int err; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + return function(arg, ¶m); \ + } while (0) + +#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + memset(¶m, 0, sizeof(param)); \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + return ret; \ + } while (0) + +#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + return ret; \ + } while (0) + +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *kctx = NULL; + struct kbase_device *kbdev = kfile->kbdev; + void __user *uarg = (void __user *)arg; + + /* Only these ioctls are available until setup is complete */ + switch (cmd) { + case KBASE_IOCTL_VERSION_CHECK: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK, + kbase_api_handshake, + struct kbase_ioctl_version_check, + kfile); + break; + + case KBASE_IOCTL_SET_FLAGS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, + kbase_api_set_flags, + struct kbase_ioctl_set_flags, + kfile); + break; + } + + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) + return -EPERM; + + /* Normal ioctls */ + switch (cmd) { + case KBASE_IOCTL_JOB_SUBMIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, + kbase_api_job_submit, + struct kbase_ioctl_job_submit, + kctx); + break; + case KBASE_IOCTL_GET_GPUPROPS: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, + kbase_api_get_gpuprops, + struct kbase_ioctl_get_gpuprops, + kctx); + break; + case KBASE_IOCTL_POST_TERM: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, + kbase_api_post_term, + kctx); + break; + case KBASE_IOCTL_MEM_ALLOC: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, + kbase_api_mem_alloc, + union kbase_ioctl_mem_alloc, + kctx); + break; + case KBASE_IOCTL_MEM_QUERY: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY, + kbase_api_mem_query, + union kbase_ioctl_mem_query, + kctx); + break; + case KBASE_IOCTL_MEM_FREE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE, + kbase_api_mem_free, + struct kbase_ioctl_mem_free, + kctx); + break; + case KBASE_IOCTL_DISJOINT_QUERY: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY, + kbase_api_disjoint_query, + struct kbase_ioctl_disjoint_query, + kctx); + break; + case KBASE_IOCTL_GET_DDK_VERSION: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION, + kbase_api_get_ddk_version, + struct kbase_ioctl_get_ddk_version, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT_10_2: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_10_2, + kbase_api_mem_jit_init_10_2, + struct kbase_ioctl_mem_jit_init_10_2, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT_11_5: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_11_5, + kbase_api_mem_jit_init_11_5, + struct kbase_ioctl_mem_jit_init_11_5, + kctx); + break; + case KBASE_IOCTL_MEM_JIT_INIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT, + kbase_api_mem_jit_init, + struct kbase_ioctl_mem_jit_init, + kctx); + break; + case KBASE_IOCTL_MEM_EXEC_INIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT, + kbase_api_mem_exec_init, + struct kbase_ioctl_mem_exec_init, + kctx); + break; + case KBASE_IOCTL_MEM_SYNC: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC, + kbase_api_mem_sync, + struct kbase_ioctl_mem_sync, + kctx); + break; + case KBASE_IOCTL_MEM_FIND_CPU_OFFSET: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET, + kbase_api_mem_find_cpu_offset, + union kbase_ioctl_mem_find_cpu_offset, + kctx); + break; + case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET, + kbase_api_mem_find_gpu_start_and_offset, + union kbase_ioctl_mem_find_gpu_start_and_offset, + kctx); + break; + case KBASE_IOCTL_GET_CONTEXT_ID: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID, + kbase_api_get_context_id, + struct kbase_ioctl_get_context_id, + kctx); + break; + case KBASE_IOCTL_TLSTREAM_ACQUIRE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE, + kbase_api_tlstream_acquire, + struct kbase_ioctl_tlstream_acquire, + kctx); + break; + case KBASE_IOCTL_TLSTREAM_FLUSH: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH, + kbase_api_tlstream_flush, + kctx); + break; + case KBASE_IOCTL_MEM_COMMIT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT, + kbase_api_mem_commit, + struct kbase_ioctl_mem_commit, + kctx); + break; + case KBASE_IOCTL_MEM_ALIAS: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS, + kbase_api_mem_alias, + union kbase_ioctl_mem_alias, + kctx); + break; + case KBASE_IOCTL_MEM_IMPORT: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT, + kbase_api_mem_import, + union kbase_ioctl_mem_import, + kctx); + break; + case KBASE_IOCTL_MEM_FLAGS_CHANGE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE, + kbase_api_mem_flags_change, + struct kbase_ioctl_mem_flags_change, + kctx); + break; + case KBASE_IOCTL_STREAM_CREATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE, + kbase_api_stream_create, + struct kbase_ioctl_stream_create, + kctx); + break; + case KBASE_IOCTL_FENCE_VALIDATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE, + kbase_api_fence_validate, + struct kbase_ioctl_fence_validate, + kctx); + break; + case KBASE_IOCTL_MEM_PROFILE_ADD: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD, + kbase_api_mem_profile_add, + struct kbase_ioctl_mem_profile_add, + kctx); + break; + + case KBASE_IOCTL_SOFT_EVENT_UPDATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, + kbase_api_soft_event_update, + struct kbase_ioctl_soft_event_update, + kctx); + break; + + case KBASE_IOCTL_STICKY_RESOURCE_MAP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, + kbase_api_sticky_resource_map, + struct kbase_ioctl_sticky_resource_map, + kctx); + break; + case KBASE_IOCTL_STICKY_RESOURCE_UNMAP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP, + kbase_api_sticky_resource_unmap, + struct kbase_ioctl_sticky_resource_unmap, + kctx); + break; + + /* Instrumentation. */ + case KBASE_IOCTL_HWCNT_READER_SETUP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, + kbase_api_hwcnt_reader_setup, + struct kbase_ioctl_hwcnt_reader_setup, + kctx); + break; + case KBASE_IOCTL_HWCNT_ENABLE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE, + kbase_api_hwcnt_enable, + struct kbase_ioctl_hwcnt_enable, + kctx); + break; + case KBASE_IOCTL_HWCNT_DUMP: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP, + kbase_api_hwcnt_dump, + kctx); + break; + case KBASE_IOCTL_HWCNT_CLEAR: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR, + kbase_api_hwcnt_clear, + kctx); + break; + case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO, + kbase_api_get_cpu_gpu_timeinfo, + union kbase_ioctl_get_cpu_gpu_timeinfo, + kctx); + break; +#ifdef CONFIG_MALI_NO_MALI + case KBASE_IOCTL_HWCNT_SET: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, + kbase_api_hwcnt_set, + struct kbase_ioctl_hwcnt_values, + kctx); + break; +#endif +#ifdef CONFIG_MALI_CINSTR_GWT + case KBASE_IOCTL_CINSTR_GWT_START: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, + kbase_gpu_gwt_start, + kctx); + break; + case KBASE_IOCTL_CINSTR_GWT_STOP: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP, + kbase_gpu_gwt_stop, + kctx); + break; + case KBASE_IOCTL_CINSTR_GWT_DUMP: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP, + kbase_gpu_gwt_dump, + union kbase_ioctl_cinstr_gwt_dump, + kctx); + break; +#endif +#if MALI_UNIT_TEST + case KBASE_IOCTL_TLSTREAM_TEST: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, + kbase_api_tlstream_test, + struct kbase_ioctl_tlstream_test, + kctx); + break; + case KBASE_IOCTL_TLSTREAM_STATS: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, + kbase_api_tlstream_stats, + struct kbase_ioctl_tlstream_stats, + kctx); + break; +#endif /* MALI_UNIT_TEST */ + } + + dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); + + return -ENOIOCTLCMD; +} + +static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + struct base_jd_event_v2 uevent; + int out_count = 0; + + if (unlikely(!kctx)) + return -EPERM; + + if (count < sizeof(uevent)) + return -ENOBUFS; + + do { + while (kbase_event_dequeue(kctx, &uevent)) { + if (out_count > 0) + goto out; + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(kctx->event_queue, + kbase_event_pending(kctx)) != 0) + return -ERESTARTSYS; + } + if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { + if (out_count == 0) + return -EPIPE; + goto out; + } + + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) + return -EFAULT; + + buf += sizeof(uevent); + out_count++; + count -= sizeof(uevent); + } while (count >= sizeof(uevent)); + + out: + return out_count * sizeof(uevent); +} + +static unsigned int kbase_poll(struct file *filp, poll_table *wait) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + + if (unlikely(!kctx)) + return POLLERR; + + poll_wait(filp, &kctx->event_queue, wait); + if (kbase_event_pending(kctx)) + return POLLIN | POLLRDNORM; + + return 0; +} + +void kbase_event_wakeup(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + wake_up_interruptible(&kctx->event_queue); +} + +KBASE_EXPORT_TEST_API(kbase_event_wakeup); + +int kbase_event_pending(struct kbase_context *ctx) +{ + KBASE_DEBUG_ASSERT(ctx); + + return (atomic_read(&ctx->event_count) != 0) || + (atomic_read(&ctx->event_closed) != 0); +} + +KBASE_EXPORT_TEST_API(kbase_event_pending); + +static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + + if (unlikely(!kctx)) + return -EPERM; + + return kbase_context_mmap(kctx, vma); +} + +static int kbase_check_flags(int flags) +{ + /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always + * closes the file descriptor in a child process. + */ + if (0 == (flags & O_CLOEXEC)) + return -EINVAL; + + return 0; +} + +static unsigned long kbase_get_unmapped_area(struct file *const filp, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + + if (unlikely(!kctx)) + return -EPERM; + + return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); +} + +static const struct file_operations kbase_fops = { + .owner = THIS_MODULE, + .open = kbase_open, + .release = kbase_release, + .read = kbase_read, + .poll = kbase_poll, + .unlocked_ioctl = kbase_ioctl, + .compat_ioctl = kbase_ioctl, + .mmap = kbase_mmap, + .check_flags = kbase_check_flags, + .get_unmapped_area = kbase_get_unmapped_area, +}; + +/** + * show_policy - Show callback for the power_policy sysfs file. + * + * This function is called to get the contents of the power_policy sysfs + * file. This is a list of the available policies with the currently active one + * surrounded by square brackets. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *current_policy; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_get_policy(kbdev); + + policy_count = kbase_pm_list_policies(kbdev, &policy_list); + + for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + if (policy_list[i] == current_policy) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * set_policy - Store callback for the power_policy sysfs file. + * + * This function is called when the power_policy sysfs file is written to. + * It matches the requested policy against the available policies and if a + * matching policy is found calls kbase_pm_set_policy() to change the + * policy. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *new_policy = NULL; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + policy_count = kbase_pm_list_policies(kbdev, &policy_list); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + new_policy = policy_list[i]; + break; + } + } + + if (!new_policy) { + dev_err(dev, "power_policy: policy not found\n"); + return -EINVAL; + } + + kbase_pm_set_policy(kbdev, new_policy); + + return count; +} + +/* + * The sysfs file power_policy. + * + * This is used for obtaining information about the available policies, + * determining which policy is currently active, and changing the active + * policy. + */ +static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); + +/* + * show_core_mask - Show callback for the core_mask sysfs file. + * + * This function is called to get the contents of the core_mask sysfs file. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS0) : 0x%llX\n", + kbdev->pm.debug_core_mask[0]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS1) : 0x%llX\n", + kbdev->pm.debug_core_mask[1]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current core mask (JS2) : 0x%llX\n", + kbdev->pm.debug_core_mask[2]); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Available core mask : 0x%llX\n", + kbdev->gpu_props.props.raw_props.shader_present); + + return ret; +} + +/** + * set_core_mask - Store callback for the core_mask sysfs file. + * + * This function is called when the core_mask sysfs file is written to. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u64 new_core_mask[3]; + int items, i; + ssize_t err = count; + unsigned long flags; + u64 shader_present, group0_core_mask; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%llx %llx %llx", + &new_core_mask[0], &new_core_mask[1], + &new_core_mask[2]); + + if (items != 1 && items != 3) { + dev_err(kbdev->dev, "Couldn't process core mask write operation.\n" + "Use format \n" + "or \n"); + err = -EINVAL; + goto end; + } + + if (items == 1) + new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; + + mutex_lock(&kbdev->pm.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + shader_present = kbdev->gpu_props.props.raw_props.shader_present; + group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + for (i = 0; i < 3; ++i) { + if ((new_core_mask[i] & shader_present) != new_core_mask[i]) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)", + new_core_mask[i], i, shader_present); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + new_core_mask[i], i, + kbdev->gpu_props.props.raw_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask[i] & group0_core_mask)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n", + new_core_mask[i], i, group0_core_mask); + err = -EINVAL; + goto unlock; + } + } + + if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] || + kbdev->pm.debug_core_mask[1] != + new_core_mask[1] || + kbdev->pm.debug_core_mask[2] != + new_core_mask[2]) { + + kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], + new_core_mask[1], new_core_mask[2]); + } + +unlock: + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->pm.lock); +end: + return err; +} + +/* + * The sysfs file core_mask. + * + * This is used to restrict shader core availability for debugging purposes. + * Reading it will show the current core mask and the mask of cores available. + * Writing to it will set the current core mask. + */ +static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + +static ssize_t show_gpu_memory(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret = 0; + struct list_head *entry; + const struct list_head *kbdev_list; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbdev_list = kbase_device_get_list(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%-16s %-16s %10u\n", + kbdev->devname, + "total used_pages", + atomic_read(&(kbdev->memdev.used_pages))); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "----------------------------------------------------\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%-16s %-16s %-16s\n", + "kctx", "pid", "used_pages"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "----------------------------------------------------\n"); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory usage and cap for each kctx + * opened on this device */ + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%p %10u %10u\n", + kctx, + kctx->tgid, + atomic_read(&(kctx->used_pages))); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + + kbase_device_put_list(kbdev_list); + + + return ret; +} + +static ssize_t set_gpu_memory(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + ssize_t err = count; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return err; +} + +static DEVICE_ATTR(gpu_memory, S_IRUGO | S_IWUSR, show_gpu_memory, set_gpu_memory); + +static ssize_t show_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct list_head *entry; + const struct list_head *kbdev_list; + ssize_t ret = 0; + int i = 0; + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbdev_list = kbase_device_get_list(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%-16s %-16s %-16s\n", + "kctx", "pid", "cached_pages"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "----------------------------------------------------\n"); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory cached and cap for each kctx + * opened on this device */ + unsigned long cached_mem = 0; + for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) + //pr_info("[%d]:kctx->mem_pools.small[%d] = %d", kctx->tgid, i, kctx->mem_pools.small[i].cur_size); + cached_mem += kctx->mem_pools.small[i].cur_size; + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%p %10u %10lu\n", + kctx, + kctx->tgid, + cached_mem); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + + kbase_device_put_list(kbdev_list); + + return ret; +} + +static ssize_t set_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + ssize_t err = count; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return err; +} + +static DEVICE_ATTR(ctx_mem_pool_size, S_IRUGO | S_IWUSR, show_ctx_mem_pool_size, set_ctx_mem_pool_size); + +/** + * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs + * file. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This allows setting the timeout for software jobs. Waiting soft event wait + * jobs will be cancelled after this period expires, while soft fence wait jobs + * will print debug information if the fence debug feature is enabled. + * + * This is expressed in milliseconds. + * + * Return: count if the function succeeded. An error code on failure. + */ +static ssize_t set_soft_job_timeout(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int soft_job_timeout_ms; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || + (soft_job_timeout_ms <= 0)) + return -EINVAL; + + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + soft_job_timeout_ms); + + return count; +} + +/** + * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs + * file. + * + * This will return the timeout for the software jobs. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_soft_job_timeout(struct device *dev, + struct device_attribute *attr, + char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%i\n", + atomic_read(&kbdev->js_data.soft_job_timeout_ms)); +} + +static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, + show_soft_job_timeout, set_soft_job_timeout); + +static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, + int default_ticks, u32 old_ticks) +{ + if (timeout_ms > 0) { + u64 ticks = timeout_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + if (!ticks) + return 1; + return ticks; + } else if (timeout_ms < 0) { + return default_ticks; + } else { + return old_ticks; + } +} + +/** + * set_js_timeouts - Store callback for the js_timeouts sysfs file. + * + * This function is called to get the contents of the js_timeouts sysfs + * file. This file contains five values separated by whitespace. The values + * are basically the same as %JS_SOFT_STOP_TICKS, %JS_HARD_STOP_TICKS_SS, + * %JS_HARD_STOP_TICKS_DUMPING, %JS_RESET_TICKS_SS, %JS_RESET_TICKS_DUMPING + * configuration values (in that order), with the difference that the js_timeout + * values are expressed in MILLISECONDS. + * + * The js_timeouts sysfile file allows the current values in + * use by the job scheduler to get override. Note that a value needs to + * be other than 0 for it to override the current job scheduler value. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int items; + long js_soft_stop_ms; + long js_soft_stop_ms_cl; + long js_hard_stop_ms_ss; + long js_hard_stop_ms_cl; + long js_hard_stop_ms_dumping; + long js_reset_ms_ss; + long js_reset_ms_cl; + long js_reset_ms_dumping; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", + &js_soft_stop_ms, &js_soft_stop_ms_cl, + &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, + &js_hard_stop_ms_dumping, &js_reset_ms_ss, + &js_reset_ms_cl, &js_reset_ms_dumping); + + if (items == 8) { + struct kbasep_js_device_data *js_data = &kbdev->js_data; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + +#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ + js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ + default, js_data->ticks_name); \ + dev_dbg(kbdev->dev, "Overriding " #ticks_name \ + " with %lu ticks (%lu ms)\n", \ + (unsigned long)js_data->ticks_name, \ + ms_name); \ + } while (0) + + UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, + DEFAULT_JS_SOFT_STOP_TICKS); + UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, + DEFAULT_JS_SOFT_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, + DEFAULT_JS_HARD_STOP_TICKS_SS); + UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, + DEFAULT_JS_HARD_STOP_TICKS_CL); + UPDATE_TIMEOUT(hard_stop_ticks_dumping, + js_hard_stop_ms_dumping, + DEFAULT_JS_HARD_STOP_TICKS_DUMPING); + UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, + DEFAULT_JS_RESET_TICKS_SS); + UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, + DEFAULT_JS_RESET_TICKS_CL); + UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, + DEFAULT_JS_RESET_TICKS_DUMPING); + + kbase_js_set_timeouts(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return count; + } + + dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" + "Use format \n" + "Write 0 for no change, -1 to restore default timeout\n"); + return -EINVAL; +} + +static unsigned long get_js_timeout_in_ms( + u32 scheduling_period_ns, + u32 ticks) +{ + u64 ms = (u64)ticks * scheduling_period_ns; + + do_div(ms, 1000000UL); + return ms; +} + +/** + * show_js_timeouts - Show callback for the js_timeouts sysfs file. + * + * This function is called to get the contents of the js_timeouts sysfs + * file. It returns the last set values written to the js_timeouts sysfs file. + * If the file didn't get written yet, the values will be current setting in + * use. + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + unsigned long js_soft_stop_ms; + unsigned long js_soft_stop_ms_cl; + unsigned long js_hard_stop_ms_ss; + unsigned long js_hard_stop_ms_cl; + unsigned long js_hard_stop_ms_dumping; + unsigned long js_reset_ms_ss; + unsigned long js_reset_ms_cl; + unsigned long js_reset_ms_dumping; + u32 scheduling_period_ns; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + +#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ + scheduling_period_ns, \ + kbdev->js_data.name) + + js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); + js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); + js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); + js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); + js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); + js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); + js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); + js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); + +#undef GET_TIMEOUT + + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", + js_soft_stop_ms, js_soft_stop_ms_cl, + js_hard_stop_ms_ss, js_hard_stop_ms_cl, + js_hard_stop_ms_dumping, js_reset_ms_ss, + js_reset_ms_cl, js_reset_ms_dumping); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/* + * The sysfs file js_timeouts. + * + * This is used to override the current job scheduler values for + * JS_STOP_STOP_TICKS_SS + * JS_STOP_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_SS + * JS_HARD_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_DUMPING + * JS_RESET_TICKS_SS + * JS_RESET_TICKS_CL + * JS_RESET_TICKS_DUMPING. + */ +static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); + +static u32 get_new_js_timeout( + u32 old_period, + u32 old_ticks, + u32 new_scheduling_period_ns) +{ + u64 ticks = (u64)old_period * (u64)old_ticks; + do_div(ticks, new_scheduling_period_ns); + return ticks?ticks:1; +} + +/** + * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs + * file + * @dev: The device the sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the js_scheduling_period sysfs file is written + * to. It checks the data written, and if valid updates the js_scheduling_period + * value + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_scheduling_period(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + unsigned int js_scheduling_period; + u32 new_scheduling_period_ns; + u32 old_period; + struct kbasep_js_device_data *js_data; + unsigned long flags; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + js_data = &kbdev->js_data; + + ret = kstrtouint(buf, 0, &js_scheduling_period); + if (ret || !js_scheduling_period) { + dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" + "Use format \n"); + return -EINVAL; + } + + new_scheduling_period_ns = js_scheduling_period * 1000000; + + /* Update scheduling timeouts */ + mutex_lock(&js_data->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* If no contexts have been scheduled since js_timeouts was last written + * to, the new timeouts might not have been latched yet. So check if an + * update is pending and use the new values if necessary. */ + + /* Use previous 'new' scheduling period as a base if present. */ + old_period = js_data->scheduling_period_ns; + +#define SET_TIMEOUT(name) \ + (js_data->name = get_new_js_timeout(\ + old_period, \ + kbdev->js_data.name, \ + new_scheduling_period_ns)) + + SET_TIMEOUT(soft_stop_ticks); + SET_TIMEOUT(soft_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_ss); + SET_TIMEOUT(hard_stop_ticks_cl); + SET_TIMEOUT(hard_stop_ticks_dumping); + SET_TIMEOUT(gpu_reset_ticks_ss); + SET_TIMEOUT(gpu_reset_ticks_cl); + SET_TIMEOUT(gpu_reset_ticks_dumping); + +#undef SET_TIMEOUT + + js_data->scheduling_period_ns = new_scheduling_period_ns; + + kbase_js_set_timeouts(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_data->runpool_mutex); + + dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", + js_scheduling_period); + + return count; +} + +/** + * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the JS scheduling + * period. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_scheduling_period(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + u32 period; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + period = kbdev->js_data.scheduling_period_ns; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", + period / 1000000); + + return ret; +} + +static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR, + show_js_scheduling_period, set_js_scheduling_period); + + +#ifdef CONFIG_MALI_DEBUG +static ssize_t set_js_softstop_always(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int softstop_always; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &softstop_always); + if (ret || ((softstop_always != 0) && (softstop_always != 1))) { + dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbdev->js_data.softstop_always = (bool) softstop_always; + dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", + (kbdev->js_data.softstop_always) ? + "Enabled" : "Disabled"); + return count; +} + +static ssize_t show_js_softstop_always(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/* + * By default, soft-stops are disabled when only a single context is present. + * The ability to enable soft-stop when only a single context is present can be + * used for debug and unit-testing purposes. + * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) + */ +static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_DEBUG +typedef void (kbasep_debug_command_func) (struct kbase_device *); + +enum kbasep_debug_command_code { + KBASEP_DEBUG_COMMAND_DUMPTRACE, + + /* This must be the last enum */ + KBASEP_DEBUG_COMMAND_COUNT +}; + +struct kbasep_debug_command { + char *str; + kbasep_debug_command_func *func; +}; + +void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev) +{ + KBASE_KTRACE_DUMP(kbdev); +} + +/* Debug commands supported by the driver */ +static const struct kbasep_debug_command debug_commands[] = { + { + .str = "dumptrace", + .func = &kbasep_ktrace_dump_wrapper, + } +}; + +/** + * show_debug - Show callback for the debug_command sysfs file. + * + * This function is called to get the contents of the debug_command sysfs + * file. This is a list of the available debug commands, separated by newlines. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * issue_debug - Store callback for the debug_command sysfs file. + * + * This function is called when the debug_command sysfs file is written to. + * It matches the requested command against the available commands, and if + * a matching command is found calls the associated function from + * @debug_commands to issue the command. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { + if (sysfs_streq(debug_commands[i].str, buf)) { + debug_commands[i].func(kbdev); + return count; + } + } + + /* Debug Command not found */ + dev_err(dev, "debug_command: command not known\n"); + return -EINVAL; +} + +/* The sysfs file debug_command. + * + * This is used to issue general debug commands to the device driver. + * Reading it will produce a list of debug commands, separated by newlines. + * Writing to it with one of those commands will issue said command. + */ +static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); +#endif /* CONFIG_MALI_DEBUG */ + +/** + * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get a description of the present Mali + * GPU via the gpuinfo sysfs entry. This includes the GPU family, the + * number of cores, the hardware version and the raw product id. For + * example + * + * Mali-T60x MP4 r0p0 0x6956 + * + * Return: The number of bytes output to @buf. + */ +static ssize_t kbase_show_gpuinfo(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const struct gpu_product_id_name { + unsigned id; + char *name; + } gpu_product_id_names[] = { + { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G71" }, + { .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G72" }, + { .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G51" }, + { .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G76" }, + { .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G31" }, + { .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G52" }, + { .id = GPU_ID2_PRODUCT_TTRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G77" }, + { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G78" }, + { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G68" }, + { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G57" }, + { .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TODX" }, + { .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TGRX" }, + { .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TVAX" }, + { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-LODX" }, + { .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TTUX" }, + { .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-LTUX" }, + { .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-TE2X" }, + }; + const char *product_name = "(Unknown Mali GPU)"; + struct kbase_device *kbdev; + u32 gpu_id; + unsigned product_id, product_id_mask; + unsigned i; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + product_id_mask = GPU_ID2_PRODUCT_MODEL >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { + const struct gpu_product_id_name *p = &gpu_product_id_names[i]; + + if ((p->id & product_id_mask) == + (product_id & product_id_mask)) { + product_name = p->name; + break; + } + } + + return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", + product_name, kbdev->gpu_props.num_cores, + (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, + product_id); +} +static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL); + +/** + * set_dvfs_period - Store callback for the dvfs_period sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the dvfs_period sysfs file is written to. It + * checks the data written, and if valid updates the DVFS period variable, + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_dvfs_period(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int dvfs_period; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &dvfs_period); + if (ret || dvfs_period <= 0) { + dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbdev->pm.dvfs_period = dvfs_period; + dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); + + return count; +} + +/** + * show_dvfs_period - Show callback for the dvfs_period sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the DVFS sample + * timer. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_dvfs_period(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); + + return ret; +} + +static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period, + set_dvfs_period); + +/** + * set_pm_poweroff - Store callback for the pm_poweroff sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the pm_poweroff sysfs file is written to. + * + * This file contains three values separated by whitespace. The values + * are gpu_poweroff_time (the period of the poweroff timer, in ns), + * poweroff_shader_ticks (the number of poweroff timer ticks before an idle + * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer + * ticks before the GPU is powered off), in that order. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_pm_poweroff(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; + int items; + u64 gpu_poweroff_time; + unsigned int poweroff_shader_ticks, poweroff_gpu_ticks; + unsigned long flags; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, + &poweroff_shader_ticks, + &poweroff_gpu_ticks); + if (items != 3) { + dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" + "Use format \n"); + return -EINVAL; + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); + stt->configured_ticks = poweroff_shader_ticks; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (poweroff_gpu_ticks != 0) + dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n"); + + return count; +} + +/** + * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the DVFS sample + * timer. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_pm_poweroff(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + struct kbasep_pm_tick_timer_state *stt; + ssize_t ret; + unsigned long flags; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + stt = &kbdev->pm.backend.shader_tick_timer; + ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", + ktime_to_ns(stt->configured_interval), + stt->configured_ticks); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, + set_pm_poweroff); + +/** + * set_reset_timeout - Store callback for the reset_timeout sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the reset_timeout sysfs file is written to. It + * checks the data written, and if valid updates the reset timeout. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_reset_timeout(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int reset_timeout; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &reset_timeout); + if (ret || reset_timeout <= 0) { + dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbdev->reset_timeout_ms = reset_timeout; + dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); + + return count; +} + +/** + * show_reset_timeout - Show callback for the reset_timeout sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current reset timeout. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_reset_timeout(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); + + return ret; +} + +static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, + set_reset_timeout); + + +static ssize_t show_mem_pool_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); +} + +static ssize_t set_mem_pool_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); + + return err ? err : count; +} + +static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size, + set_mem_pool_size); + +static ssize_t show_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +static ssize_t set_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + + return err ? err : count; +} + +static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, + set_mem_pool_max_size); + +/** + * show_lp_mem_pool_size - Show size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the pool size. + * + * This function is called to get the number of large memory pages which currently populate the kbdev pool. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_lp_mem_pool_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_size); +} + +/** + * set_lp_mem_pool_size - Set size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called to set the number of large memory pages which should populate the kbdev pool. + * This may cause existing pages to be removed from the pool, or new pages to be created and then added to the pool. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_lp_mem_pool_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_trim); + + return err ? err : count; +} + +static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size, + set_lp_mem_pool_size); + +/** + * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the pool size. + * + * This function is called to get the maximum number of large memory pages that the kbdev pool can possibly contain. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_lp_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +/** + * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called to set the maximum number of large memory pages that the kbdev pool can possibly contain. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_lp_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + + return err ? err : count; +} + +static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, + set_lp_mem_pool_max_size); + +/** + * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the context scheduling mode information. + * + * This function is called to get the context scheduling mode being used by JS. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_js_ctx_scheduling_mode(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + return scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->js_ctx_scheduling_mode); +} + +/** + * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called when the js_ctx_scheduling_mode sysfs file is written + * to. It checks the data written, and if valid updates the ctx scheduling mode + * being by JS. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_ctx_scheduling_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_context *kctx; + u32 new_js_ctx_scheduling_mode; + struct kbase_device *kbdev; + unsigned long flags; + int ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtouint(buf, 0, &new_js_ctx_scheduling_mode); + if (ret || new_js_ctx_scheduling_mode >= KBASE_JS_PRIORITY_MODE_COUNT) { + dev_err(kbdev->dev, "Couldn't process js_ctx_scheduling_mode" + " write operation.\n" + "Use format \n"); + return -EINVAL; + } + + if (new_js_ctx_scheduling_mode == kbdev->js_ctx_scheduling_mode) + return count; + + mutex_lock(&kbdev->kctx_list_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the context priority mode */ + kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode; + + /* Adjust priority of all the contexts as per the new mode */ + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) + kbase_js_update_ctx_priority(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->kctx_list_lock); + + dev_dbg(kbdev->dev, "JS ctx scheduling mode: %u\n", new_js_ctx_scheduling_mode); + + return count; +} + +static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, + show_js_ctx_scheduling_mode, + set_js_ctx_scheduling_mode); + +#ifdef MALI_KBASE_BUILD +#ifdef CONFIG_DEBUG_FS + +/* Number of entries in serialize_jobs_settings[] */ +#define NR_SERIALIZE_JOBS_SETTINGS 5 +/* Maximum string length in serialize_jobs_settings[].name */ +#define MAX_SERIALIZE_JOBS_NAME_LEN 16 + +static struct +{ + char *name; + u8 setting; +} serialize_jobs_settings[NR_SERIALIZE_JOBS_SETTINGS] = { + {"none", 0}, + {"intra-slot", KBASE_SERIALIZE_INTRA_SLOT}, + {"inter-slot", KBASE_SERIALIZE_INTER_SLOT}, + {"full", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT}, + {"full-reset", KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT | + KBASE_SERIALIZE_RESET} +}; + +/** + * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs + * file + * @sfile: seq_file pointer + * @data: Private callback data + * + * This function is called to get the contents of the serialize_jobs debugfs + * file. This is a list of the available settings with the currently active one + * surrounded by square brackets. + * + * Return: 0 on success, or an error code on error + */ +static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) +{ + struct kbase_device *kbdev = sfile->private; + int i; + + CSTD_UNUSED(data); + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (kbdev->serialize_jobs == serialize_jobs_settings[i].setting) + seq_printf(sfile, "[%s] ", + serialize_jobs_settings[i].name); + else + seq_printf(sfile, "%s ", + serialize_jobs_settings[i].name); + } + + seq_puts(sfile, "\n"); + + return 0; +} + +/** + * kbasep_serialize_jobs_debugfs_write - Store callback for the serialize_jobs + * debugfs file. + * @file: File pointer + * @ubuf: User buffer containing data to store + * @count: Number of bytes in user buffer + * @ppos: File position + * + * This function is called when the serialize_jobs debugfs file is written to. + * It matches the requested setting against the available settings and if a + * matching setting is found updates kbdev->serialize_jobs. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct kbase_device *kbdev = s->private; + char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; + int i; + bool valid = false; + + CSTD_UNUSED(ppos); + + count = min_t(size_t, sizeof(buf) - 1, count); + if (copy_from_user(buf, ubuf, count)) + return -EFAULT; + + buf[count] = 0; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { + kbdev->serialize_jobs = + serialize_jobs_settings[i].setting; + valid = true; + break; + } + } + + if (!valid) { + dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); + return -EINVAL; + } + + return count; +} + +/** + * kbasep_serialize_jobs_debugfs_open - Open callback for the serialize_jobs + * debugfs file + * @in: inode pointer + * @file: file pointer + * + * Return: Zero on success, error code on failure + */ +static int kbasep_serialize_jobs_debugfs_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); +} + +static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_serialize_jobs_debugfs_open, + .read = seq_read, + .write = kbasep_serialize_jobs_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_DEBUG_FS */ +#endif /* MALI_KBASE_BUILD */ + +static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + protected_mode_hwcnt_disable_work); + unsigned long flags; + + bool do_disable; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!do_disable) + return; + + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + do_disable = !kbdev->protected_mode_hwcnt_desired && + !kbdev->protected_mode_hwcnt_disabled; + + if (do_disable) { + /* Protected mode state did not change while we were doing the + * disable, so commit the work we just performed and continue + * the state machine. + */ + kbdev->protected_mode_hwcnt_disabled = true; + kbase_backend_slot_update(kbdev); + } else { + /* Protected mode state was updated while we were doing the + * disable, so we need to undo the disable we just performed. + */ + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; + + return kbase_pm_protected_mode_enable(kbdev); +} + +static int kbasep_protected_mode_disable(struct protected_mode_device *pdev) +{ + struct kbase_device *kbdev = pdev->data; + + return kbase_pm_protected_mode_disable(kbdev); +} + +static const struct protected_mode_ops kbasep_native_protected_ops = { + .protected_mode_enable = kbasep_protected_mode_enable, + .protected_mode_disable = kbasep_protected_mode_disable +}; + +int kbase_protected_mode_init(struct kbase_device *kbdev) +{ + /* Use native protected ops */ + kbdev->protected_dev = kzalloc(sizeof(*kbdev->protected_dev), + GFP_KERNEL); + if (!kbdev->protected_dev) + return -ENOMEM; + kbdev->protected_dev->data = kbdev; + kbdev->protected_ops = &kbasep_native_protected_ops; + INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, + kbasep_protected_mode_hwcnt_disable_worker); + kbdev->protected_mode_hwcnt_desired = true; + kbdev->protected_mode_hwcnt_disabled = false; + return 0; +} + +void kbase_protected_mode_term(struct kbase_device *kbdev) +{ + cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work); + kfree(kbdev->protected_dev); +} + +#ifdef CONFIG_MALI_NO_MALI +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + return 0; +} +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ +} +#else /* CONFIG_MALI_NO_MALI */ +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + int err = 0; + + if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { + dev_err(kbdev->dev, "Register window unavailable\n"); + err = -EIO; + goto out_region; + } + + kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + if (!kbdev->reg) { + dev_err(kbdev->dev, "Can't remap register window\n"); + err = -EINVAL; + goto out_ioremap; + } + + return err; + +out_ioremap: + release_mem_region(kbdev->reg_start, kbdev->reg_size); +out_region: + return err; +} + +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ + if (kbdev->reg) { + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); + kbdev->reg = NULL; + kbdev->reg_start = 0; + kbdev->reg_size = 0; + } +} +#endif /* CONFIG_MALI_NO_MALI */ + +int registers_map(struct kbase_device * const kbdev) +{ + /* the first memory resource is the physical address of the GPU + * registers. + */ + struct platform_device *pdev = to_platform_device(kbdev->dev); + struct resource *reg_res; + int err; + + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + return -ENOENT; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + + err = kbase_common_reg_map(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed to map registers\n"); + return err; + } + + return 0; +} + +void registers_unmap(struct kbase_device *kbdev) +{ + kbase_common_reg_unmap(kbdev); +} + +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + +static bool kbase_is_pm_enabled(const struct device_node *gpu_node) +{ + const struct device_node *power_model_node; + const void *cooling_cells_node; + const void *operating_point_node; + bool is_pm_enable = false; + + power_model_node = of_get_child_by_name(gpu_node, + "power_model"); + if (power_model_node) + is_pm_enable = true; + + cooling_cells_node = of_get_property(gpu_node, + "#cooling-cells", NULL); + if (cooling_cells_node) + is_pm_enable = true; + + operating_point_node = of_get_property(gpu_node, + "operating-points", NULL); + if (operating_point_node) + is_pm_enable = true; + + return is_pm_enable; +} + +static bool kbase_is_pv_enabled(const struct device_node *gpu_node) +{ + const void *arbiter_if_node; + + arbiter_if_node = of_get_property(gpu_node, + "arbiter_if", NULL); + + return arbiter_if_node ? true : false; +} + +static bool kbase_is_full_coherency_enabled(const struct device_node *gpu_node) +{ + const void *coherency_dts; + u32 coherency; + + coherency_dts = of_get_property(gpu_node, + "system-coherency", + NULL); + if (coherency_dts) { + coherency = be32_to_cpup(coherency_dts); + if (coherency == COHERENCY_ACE) + return true; + } + return false; +} + +#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ + +int kbase_device_pm_init(struct kbase_device *kbdev) +{ + int err = 0; + +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + + u32 gpu_id; + u32 product_id; + u32 gpu_model_id; + + if (kbase_is_pv_enabled(kbdev->dev->of_node)) { + if (kbase_is_pm_enabled(kbdev->dev->of_node)) { + /* Arbitration AND power management invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); + return -EPERM; + } + if (kbase_is_full_coherency_enabled(kbdev->dev->of_node)) { + /* Arbitration AND full coherency invalid */ + dev_err(kbdev->dev, "Invalid combination of arbitration AND full coherency\n"); + return -EPERM; + } + err = kbase_arbiter_pm_early_init(kbdev); + if (err == 0) { + /* Check if Arbitration is running on + * supported GPU platform + */ + kbase_pm_register_access_enable(kbdev); + gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + kbase_pm_register_access_disable(kbdev); + product_id = KBASE_UBFX32(gpu_id, + GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16); + gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); + + if (gpu_model_id != GPU_ID2_PRODUCT_TGOX + && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { + kbase_arbiter_pm_early_term(kbdev); + dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); + return -EPERM; + } + } + } else { + err = power_control_init(kbdev); + } +#else + err = power_control_init(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT && CONFIG_OF */ + return err; +} + +void kbase_device_pm_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#ifdef CONFIG_OF + if (kbase_is_pv_enabled(kbdev->dev->of_node)) + kbase_arbiter_pm_early_term(kbdev); + else + power_control_term(kbdev); +#endif /* CONFIG_OF */ +#else + power_control_term(kbdev); +#endif +} + +int power_control_init(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) + /* Power control initialization requires at least the capability to get + * regulators and clocks from the device tree, as well as parsing + * arrays of unsigned integer values. + * + * The whole initialization process shall simply be skipped if the + * minimum capability is not available. + */ + return 0; +#else + struct platform_device *pdev; + int err = 0; + unsigned int i; +#if defined(CONFIG_REGULATOR) + static const char *regulator_names[] = { + "mali", "shadercores" + }; + BUILD_BUG_ON(ARRAY_SIZE(regulator_names) < BASE_MAX_NR_CLOCKS_REGULATORS); +#endif /* CONFIG_REGULATOR */ + + if (!kbdev) + return -ENODEV; + + pdev = to_platform_device(kbdev->dev); + +#if defined(CONFIG_REGULATOR) + /* Since the error code EPROBE_DEFER causes the entire probing + * procedure to be restarted from scratch at a later time, + * all regulators will be released before returning. + * + * Any other error is ignored and the driver will continue + * operating with a partial initialization of regulators. + */ + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + kbdev->regulators[i] = regulator_get_optional(kbdev->dev, + regulator_names[i]); + if (IS_ERR_OR_NULL(kbdev->regulators[i])) { + err = PTR_ERR(kbdev->regulators[i]); + kbdev->regulators[i] = NULL; + break; + } + } + if (err == -EPROBE_DEFER) { + while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) + regulator_put(kbdev->regulators[--i]); + return err; + } + + kbdev->nr_regulators = i; + dev_dbg(&pdev->dev, "Regulators probed: %u\n", kbdev->nr_regulators); +#endif + + /* Having more clocks than regulators is acceptable, while the + * opposite shall not happen. + * + * Since the error code EPROBE_DEFER causes the entire probing + * procedure to be restarted from scratch at a later time, + * all clocks and regulators will be released before returning. + * + * Any other error is ignored and the driver will continue + * operating with a partial initialization of clocks. + */ + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i); + if (IS_ERR_OR_NULL(kbdev->clocks[i])) { + err = PTR_ERR(kbdev->clocks[i]); + kbdev->clocks[i] = NULL; + break; + } + + err = clk_prepare_enable(kbdev->clocks[i]); + if (err) { + dev_err(kbdev->dev, + "Failed to prepare and enable clock (%d)\n", + err); + clk_put(kbdev->clocks[i]); + break; + } + } + if (err == -EPROBE_DEFER) { + while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) { + clk_disable_unprepare(kbdev->clocks[--i]); + clk_put(kbdev->clocks[i]); + } + goto clocks_probe_defer; + } + + kbdev->nr_clocks = i; + dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks); + + /* Any error in parsing the OPP table from the device file + * shall be ignored. The fact that the table may be absent or wrong + * on the device tree of the platform shouldn't prevent the driver + * from completing its initialization. + */ +#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ + !defined(LSK_OPPV2_BACKPORT)) + err = of_init_opp_table(kbdev->dev); + CSTD_UNUSED(err); +#else + +#if defined(CONFIG_PM_OPP) +#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ + defined(CONFIG_REGULATOR)) + if (kbdev->nr_regulators > 0) { + kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev, + regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS); + } +#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ + err = dev_pm_opp_of_add_table(kbdev->dev); + CSTD_UNUSED(err); +#endif /* CONFIG_PM_OPP */ + +#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ + return 0; + +clocks_probe_defer: +#if defined(CONFIG_REGULATOR) + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) + regulator_put(kbdev->regulators[i]); +#endif + return err; +#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ +} + +void power_control_term(struct kbase_device *kbdev) +{ + unsigned int i; + +#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ + !defined(LSK_OPPV2_BACKPORT)) +#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE + of_free_opp_table(kbdev->dev); +#endif +#else + +#if defined(CONFIG_PM_OPP) + dev_pm_opp_of_remove_table(kbdev->dev); +#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ + defined(CONFIG_REGULATOR)) + if (!IS_ERR_OR_NULL(kbdev->opp_table)) + dev_pm_opp_put_regulators(kbdev->opp_table); +#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* CONFIG_PM_OPP */ + +#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->clocks[i]) { + if (__clk_is_enabled(kbdev->clocks[i])) + clk_disable_unprepare(kbdev->clocks[i]); + clk_put(kbdev->clocks[i]); + kbdev->clocks[i] = NULL; + } else + break; + } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->regulators[i]) { + regulator_put(kbdev->regulators[i]); + kbdev->regulators[i] = NULL; + } + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +} + +#ifdef MALI_KBASE_BUILD +#ifdef CONFIG_DEBUG_FS + +static void trigger_reset(struct kbase_device *kbdev) +{ + kbase_pm_context_active(kbdev); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + kbase_pm_context_idle(kbdev); +} + +#define MAKE_QUIRK_ACCESSORS(type) \ +static int type##_quirks_set(void *data, u64 val) \ +{ \ + struct kbase_device *kbdev; \ + kbdev = (struct kbase_device *)data; \ + kbdev->hw_quirks_##type = (u32)val; \ + trigger_reset(kbdev); \ + return 0;\ +} \ +\ +static int type##_quirks_get(void *data, u64 *val) \ +{ \ + struct kbase_device *kbdev;\ + kbdev = (struct kbase_device *)data;\ + *val = kbdev->hw_quirks_##type;\ + return 0;\ +} \ +DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ + type##_quirks_set, "%llu\n") + +MAKE_QUIRK_ACCESSORS(sc); +MAKE_QUIRK_ACCESSORS(tiler); +MAKE_QUIRK_ACCESSORS(mmu); +MAKE_QUIRK_ACCESSORS(jm); + +static ssize_t kbase_device_debugfs_reset_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + struct kbase_device *kbdev = file->private_data; + CSTD_UNUSED(ubuf); + CSTD_UNUSED(count); + CSTD_UNUSED(ppos); + + trigger_reset(kbdev); + + return count; +} + +static const struct file_operations fops_trigger_reset = { + .owner = THIS_MODULE, + .open = simple_open, + .write = kbase_device_debugfs_reset_write, + .llseek = default_llseek, +}; + +/** + * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read + * @file: File object to read is for + * @buf: User buffer to populate with data + * @len: Length of user buffer + * @ppos: Offset within file object + * + * Retrieves the current status of protected debug mode + * (0 = disabled, 1 = enabled) + * + * Return: Number of bytes added to user buffer + */ +static ssize_t debugfs_protected_debug_mode_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_device *kbdev = (struct kbase_device *)file->private_data; + u32 gpu_status; + ssize_t ret_val; + + kbase_pm_context_active(kbdev); + gpu_status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)); + kbase_pm_context_idle(kbdev); + + if (gpu_status & GPU_DBGEN) + ret_val = simple_read_from_buffer(buf, len, ppos, "1\n", 2); + else + ret_val = simple_read_from_buffer(buf, len, ppos, "0\n", 2); + + return ret_val; +} + +/* + * struct fops_protected_debug_mode - "protected_debug_mode" debugfs fops + * + * Contains the file operations for the "protected_debug_mode" debugfs file + */ +static const struct file_operations fops_protected_debug_mode = { + .owner = THIS_MODULE, + .open = simple_open, + .read = debugfs_protected_debug_mode_read, + .llseek = default_llseek, +}; + +static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile, + void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_max_size); +} + +static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err = 0; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_config_debugfs_set_max_size); + + return err ? err : count; +} + +static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbase_device_debugfs_mem_pool_max_size_show, + in->i_private); +} + +static const struct file_operations + kbase_device_debugfs_mem_pool_max_size_fops = { + .owner = THIS_MODULE, + .open = kbase_device_debugfs_mem_pool_max_size_open, + .read = seq_read, + .write = kbase_device_debugfs_mem_pool_max_size_write, + .llseek = seq_lseek, + .release = single_release, +}; + +int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *debugfs_ctx_defaults_directory; + int err; + /* prevent unprivileged use of debug file system + * in old kernel version + */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0644; +#else + const mode_t mode = 0600; +#endif + + kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, + NULL); + if (!kbdev->mali_debugfs_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + err = -ENOMEM; + goto out; + } + + kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", + kbdev->mali_debugfs_directory); + if (!kbdev->debugfs_ctx_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); + err = -ENOMEM; + goto out; + } + + debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", + kbdev->debugfs_ctx_directory); + if (!debugfs_ctx_defaults_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); + err = -ENOMEM; + goto out; + } + +#if !MALI_CUSTOMER_RELEASE + kbasep_regs_dump_debugfs_init(kbdev); +#endif /* !MALI_CUSTOMER_RELEASE */ + kbasep_regs_history_debugfs_init(kbdev); + + kbase_debug_job_fault_debugfs_init(kbdev); + + kbasep_gpu_memory_debugfs_init(kbdev); + kbase_as_fault_debugfs_init(kbdev); +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS + kbase_instr_backend_debugfs_init(kbdev); +#endif + /* fops_* variables created by invocations of macro + * MAKE_QUIRK_ACCESSORS() above. */ + debugfs_create_file("quirks_sc", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_sc_quirks); + debugfs_create_file("quirks_tiler", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_tiler_quirks); + debugfs_create_file("quirks_mmu", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_mmu_quirks); + debugfs_create_file("quirks_jm", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_jm_quirks); + + debugfs_create_bool("infinite_cache", mode, + debugfs_ctx_defaults_directory, + &kbdev->infinite_cache_active_default); + + debugfs_create_file("mem_pool_max_size", mode, + debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.small, + &kbase_device_debugfs_mem_pool_max_size_fops); + + debugfs_create_file("lp_mem_pool_max_size", mode, + debugfs_ctx_defaults_directory, + &kbdev->mem_pool_defaults.large, + &kbase_device_debugfs_mem_pool_max_size_fops); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + debugfs_create_file("protected_debug_mode", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &fops_protected_debug_mode); + } + + debugfs_create_file("reset", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_trigger_reset); + + kbase_ktrace_debugfs_init(kbdev); + +#ifdef CONFIG_MALI_DEVFREQ +#ifdef CONFIG_DEVFREQ_THERMAL + if (kbdev->devfreq) + kbase_ipa_debugfs_init(kbdev); +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* CONFIG_MALI_DEVFREQ */ + + debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_serialize_jobs_debugfs_fops); + + return 0; + +out: + debugfs_remove_recursive(kbdev->mali_debugfs_directory); + return err; +} + +void kbase_device_debugfs_term(struct kbase_device *kbdev) +{ + debugfs_remove_recursive(kbdev->mali_debugfs_directory); +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* MALI_KBASE_BUILD */ + +int kbase_device_coherency_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_OF + u32 supported_coherency_bitmap = + kbdev->gpu_props.props.raw_props.coherency_mode; + const void *coherency_override_dts; + u32 override_coherency, gpu_id; + unsigned int prod_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + /* Only for tMIx : + * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly + * documented for tMIx so force correct value here. + */ + if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == + GPU_ID2_PRODUCT_TMIX) + if (supported_coherency_bitmap == + COHERENCY_FEATURE_BIT(COHERENCY_ACE)) + supported_coherency_bitmap |= + COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); + +#endif /* CONFIG_OF */ + + kbdev->system_coherency = COHERENCY_NONE; + + /* device tree may override the coherency */ +#ifdef CONFIG_OF + coherency_override_dts = of_get_property(kbdev->dev->of_node, + "system-coherency", + NULL); + if (coherency_override_dts) { + + override_coherency = be32_to_cpup(coherency_override_dts); + + if ((override_coherency <= COHERENCY_NONE) && + (supported_coherency_bitmap & + COHERENCY_FEATURE_BIT(override_coherency))) { + + kbdev->system_coherency = override_coherency; + + dev_info(kbdev->dev, + "Using coherency mode %u set from dtb", + override_coherency); + } else + dev_warn(kbdev->dev, + "Ignoring unsupported coherency mode %u set from dtb", + override_coherency); + } + +#endif /* CONFIG_OF */ + + kbdev->gpu_props.props.raw_props.coherency_mode = + kbdev->system_coherency; + + return 0; +} + +#ifdef CONFIG_MALI_BUSLOG + +/* Callback used by the kbase bus logger client, to initiate a GPU reset + * when the bus log is restarted. GPU reset is used as reference point + * in HW bus log analyses. + */ +static void kbase_logging_started_cb(void *data) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); +} + +int buslog_init(struct kbase_device *kbdev) +{ + int err = 0; + + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); + if (err == 0) + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + + return err; +} + +void buslog_term(struct kbase_device *kbdev) +{ + bl_core_client_unregister(kbdev->buslogger); +} +#endif + +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_soft_job_timeout.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_mask.attr, + &dev_attr_gpu_memory.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + &dev_attr_lp_mem_pool_size.attr, + &dev_attr_lp_mem_pool_max_size.attr, + &dev_attr_js_ctx_scheduling_mode.attr, + NULL +}; + +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; + +static struct attribute *ctx_attrs[] = { + &dev_attr_ctx_mem_pool_size.attr, + NULL +}; + +static const struct attribute_group kbase_ctx_attr_group = { + .attrs = ctx_attrs, +}; + +int kbase_sysfs_init(struct kbase_device *kbdev) +{ + int err = 0; + + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + kbdev->mdev.mode = 0666; + + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); + err += sysfs_create_group(&kbdev->dev->kobj, &kbase_ctx_attr_group); + + return err; +} + +void kbase_sysfs_term(struct kbase_device *kbdev) +{ + sysfs_remove_group(&kbdev->dev->kobj, &kbase_ctx_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); + put_device(kbdev->dev); +} + +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + + if (!kbdev) + return -ENODEV; + + kbase_device_term(kbdev); + dev_set_drvdata(kbdev->dev, NULL); + kbase_device_free(kbdev); + + return 0; +} + +void kbase_backend_devfreq_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + if (kbdev->devfreq) + kbase_devfreq_term(kbdev); +#endif +} + +int kbase_backend_devfreq_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_DEVFREQ + /* Devfreq uses hardware counters, so must be initialized after it. */ + int err = kbase_devfreq_init(kbdev); + + if (err) + dev_err(kbdev->dev, "Continuing without devfreq\n"); +#endif /* CONFIG_MALI_DEVFREQ */ + return 0; +} + +static int kbase_platform_device_probe(struct platform_device *pdev) +{ + struct kbase_device *kbdev; + int err = 0; + + mali_kbase_print_cs_experimental(); + + kbdev = kbase_device_alloc(); + if (!kbdev) { + dev_err(&pdev->dev, "Allocate device failed\n"); + return -ENOMEM; + } + + kbdev->dev = &pdev->dev; + dev_set_drvdata(kbdev->dev, kbdev); + + err = kbase_device_init(kbdev); + + if (err) { + if (err == -EPROBE_DEFER) + dev_err(kbdev->dev, "Device initialization Deferred\n"); + else + dev_err(kbdev->dev, "Device initialization failed\n"); + + dev_set_drvdata(kbdev->dev, NULL); + kbase_device_free(kbdev); + } else { +#ifdef MALI_KBASE_BUILD + dev_info(kbdev->dev, + "Probed as %s\n", dev_name(kbdev->mdev.this_device)); +#endif /* MALI_KBASE_BUILD */ + kbase_increment_device_id(); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + mutex_lock(&kbdev->pm.lock); + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_INITIALIZED_EVT); + mutex_unlock(&kbdev->pm.lock); +#endif + } + + return err; +} + +#undef KBASEP_DEFAULT_REGISTER_HISTORY_SIZE + +/** + * kbase_device_suspend - Suspend callback from the OS. + * + * This is called by Linux when the device should suspend. + * + * @dev: The device to suspend + * + * Return: A standard Linux error code + */ +static int kbase_device_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbase_pm_suspend(kbdev); + +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->devfreq) { + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); + flush_workqueue(kbdev->devfreq_queue.workq); + } +#endif + return 0; +} + +/** + * kbase_device_resume - Resume callback from the OS. + * + * This is called by Linux when the device should resume from suspension. + * + * @dev: The device to resume + * + * Return: A standard Linux error code + */ +static int kbase_device_resume(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbase_pm_resume(kbdev); + +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->devfreq) { + mutex_lock(&kbdev->pm.lock); + if (kbdev->pm.active_count > 0) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); + mutex_unlock(&kbdev->pm.lock); + flush_workqueue(kbdev->devfreq_queue.workq); + } +#endif + return 0; +} + +/** + * kbase_device_runtime_suspend - Runtime suspend callback from the OS. + * + * This is called by Linux when the device should prepare for a condition in + * which it will not be able to communicate with the CPU(s) and RAM due to + * power management. + * + * @dev: The device to suspend + * + * Return: A standard Linux error code + */ +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); +#endif + + if (kbdev->pm.backend.callback_power_runtime_off) { + kbdev->pm.backend.callback_power_runtime_off(kbdev); + dev_dbg(dev, "runtime suspend\n"); + } + return 0; +} +#endif /* KBASE_PM_RUNTIME */ + +/** + * kbase_device_runtime_resume - Runtime resume callback from the OS. + * + * This is called by Linux when the device should go into a fully active state. + * + * @dev: The device to suspend + * + * Return: A standard Linux error code + */ + +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_resume(struct device *dev) +{ + int ret = 0; + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); + if (kbdev->pm.backend.callback_power_runtime_on) { + ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); + dev_dbg(dev, "runtime resume\n"); + } + +#if defined(CONFIG_MALI_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + if (kbdev->devfreq) + kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); +#endif + + return ret; +} +#endif /* KBASE_PM_RUNTIME */ + + +#ifdef KBASE_PM_RUNTIME +/** + * kbase_device_runtime_idle - Runtime idle callback from the OS. + * @dev: The device to suspend + * + * This is called by Linux when the device appears to be inactive and it might + * be placed into a low power state. + * + * Return: 0 if device can be suspended, non-zero to avoid runtime autosuspend, + * otherwise a standard Linux error code + */ +static int kbase_device_runtime_idle(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + dev_dbg(dev, "Callback %s\n", __func__); + /* Use platform specific implementation if it exists. */ + if (kbdev->pm.backend.callback_power_runtime_idle) + return kbdev->pm.backend.callback_power_runtime_idle(kbdev); + + /* Just need to update the device's last busy mark. Kernel will respect + * the autosuspend delay and so won't suspend the device immediately. + */ + pm_runtime_mark_last_busy(kbdev->dev); + return 0; +} +#endif /* KBASE_PM_RUNTIME */ + +/* The power management operations for the platform driver. + */ +static const struct dev_pm_ops kbase_pm_ops = { + .suspend = kbase_device_suspend, + .resume = kbase_device_resume, +#ifdef KBASE_PM_RUNTIME + .runtime_suspend = kbase_device_runtime_suspend, + .runtime_resume = kbase_device_runtime_resume, + .runtime_idle = kbase_device_runtime_idle, +#endif /* KBASE_PM_RUNTIME */ +}; + +#ifdef CONFIG_OF +static const struct of_device_id kbase_dt_ids[] = { + { .compatible = "arm,malit6xx" }, + { .compatible = "arm,mali-midgard" }, + { .compatible = "arm,mali-bifrost" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, kbase_dt_ids); +#endif + +static struct platform_driver kbase_platform_driver = { + .probe = kbase_platform_device_probe, + .remove = kbase_platform_device_remove, + .driver = { + .name = kbase_drv_name, + .owner = THIS_MODULE, + .pm = &kbase_pm_ops, + .of_match_table = of_match_ptr(kbase_dt_ids), + }, +}; + +/* + * The driver will not provide a shortcut to create the Mali platform device + * anymore when using Device Tree. + */ +#ifdef CONFIG_OF +module_platform_driver(kbase_platform_driver); +#else + +static int __init kbase_driver_init(void) +{ + int ret; + + ret = kbase_platform_register(); + if (ret) + return ret; + + ret = platform_driver_register(&kbase_platform_driver); + + if (ret) + kbase_platform_unregister(); + + return ret; +} + +static void __exit kbase_driver_exit(void) +{ + platform_driver_unregister(&kbase_platform_driver); + kbase_platform_unregister(); +} + +module_init(kbase_driver_init); +module_exit(kbase_driver_exit); + +#endif /* CONFIG_OF */ + +MODULE_LICENSE("GPL"); +MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ + __stringify(BASE_UK_VERSION_MAJOR) "." \ + __stringify(BASE_UK_VERSION_MINOR) ")"); + +#define CREATE_TRACE_POINTS +/* Create the trace points (otherwise we just get code to call a tracepoint) */ +#include "mali_linux_trace.h" + +#ifdef CONFIG_MALI_GATOR_SUPPORT +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); + +void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value) +{ + trace_mali_pm_status(dev_id, event, value); +} + +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id) +{ + trace_mali_job_slots_event(dev_id, event, + (kctx != NULL ? kctx->tgid : 0), + (kctx != NULL ? kctx->pid : 0), + atom_id); +} + +void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value) +{ + trace_mali_page_fault_insert_pages(dev_id, event, value); +} + +void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event) +{ + trace_mali_total_alloc_pages_change(dev_id, event); +} +#endif /* CONFIG_MALI_GATOR_SUPPORT */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h new file mode 100644 index 0000000..e1fffc3 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h @@ -0,0 +1,54 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ + +/* + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + */ + +#ifndef _KBASE_CS_EXPERIMENTAL_H_ +#define _KBASE_CS_EXPERIMENTAL_H_ + +#include + +/** + * mali_kbase_print_cs_experimental() - Print a string for every Core Services + * experimental feature that is enabled + */ +static inline void mali_kbase_print_cs_experimental(void) +{ +#if MALI_JIT_PRESSURE_LIMIT + pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled"); +#endif /* MALI_JIT_PRESSURE_LIMIT */ +#if MALI_INCREMENTAL_RENDERING + pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); +#endif /* MALI_INCREMENTAL_RENDERING */ +} + +#endif /* _KBASE_CS_EXPERIMENTAL_H_ */ + + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c new file mode 100644 index 0000000..cea91bc --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c @@ -0,0 +1,344 @@ +/* + * + * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +#include +#include "mali_kbase_ctx_sched.h" +#include "tl/mali_kbase_tracepoints.h" + +/* Helper for ktrace */ +#if KBASE_KTRACE_ENABLE +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} +#else /* KBASE_KTRACE_ENABLE */ +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_KTRACE_ENABLE */ + +int kbase_ctx_sched_init(struct kbase_device *kbdev) +{ + int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + + /* These two must be recalculated if nr_hw_address_spaces changes + * (e.g. for HW workarounds) */ + kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; + kbdev->as_free = as_present; /* All ASs initially free */ + + memset(kbdev->as_to_kctx, 0, sizeof(kbdev->as_to_kctx)); + + return 0; +} + +void kbase_ctx_sched_term(struct kbase_device *kbdev) +{ + s8 i; + + /* Sanity checks */ + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { + WARN_ON(kbdev->as_to_kctx[i] != NULL); + WARN_ON(!(kbdev->as_free & (1u << i))); + } +} + +/* kbasep_ctx_sched_find_as_for_ctx - Find a free address space + * + * @kbdev: The context for which to find a free address space + * + * Return: A valid AS if successful, otherwise KBASEP_AS_NR_INVALID + * + * This function returns an address space available for use. It would prefer + * returning an AS that has been previously assigned to the context to + * avoid having to reprogram the MMU. + */ +static int kbasep_ctx_sched_find_as_for_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int free_as; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* First check if the previously assigned AS is available */ + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && + (kbdev->as_free & (1u << kctx->as_nr))) + return kctx->as_nr; + + /* The previously assigned AS was taken, we'll be returning any free + * AS at this point. + */ + free_as = ffs(kbdev->as_free) - 1; + if (free_as >= 0 && free_as < kbdev->nr_hw_address_spaces) + return free_as; + + return KBASEP_AS_NR_INVALID; +} + +int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->pm.backend.gpu_powered); + + if (atomic_inc_return(&kctx->refcount) == 1) { + int const free_as = kbasep_ctx_sched_find_as_for_ctx(kctx); + + if (free_as != KBASEP_AS_NR_INVALID) { + kbdev->as_free &= ~(1u << free_as); + /* Only program the MMU if the context has not been + * assigned the same address space before. + */ + if (free_as != kctx->as_nr) { + struct kbase_context *const prev_kctx = + kbdev->as_to_kctx[free_as]; + + if (prev_kctx) { + WARN_ON(atomic_read(&prev_kctx->refcount) != 0); + kbase_mmu_disable(prev_kctx); + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, prev_kctx->id); + prev_kctx->as_nr = KBASEP_AS_NR_INVALID; + } + + kctx->as_nr = free_as; + kbdev->as_to_kctx[free_as] = kctx; + KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( + kbdev, kctx->id, free_as); + kbase_mmu_update(kbdev, &kctx->mmu, + kctx->as_nr); + } + } else { + atomic_dec(&kctx->refcount); + + /* Failed to find an available address space, we must + * be returning an error at this point. + */ + WARN_ON(kctx->as_nr != KBASEP_AS_NR_INVALID); + } + } + + return kctx->as_nr; +} + +void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON(atomic_read(&kctx->refcount) == 0); + WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID); + WARN_ON(kbdev->as_to_kctx[kctx->as_nr] != kctx); + + atomic_inc(&kctx->refcount); +} + +void kbase_ctx_sched_release_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int new_ref_count; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + new_ref_count = atomic_dec_return(&kctx->refcount); + if (new_ref_count == 0) { + kbdev->as_free |= (1u << kctx->as_nr); + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kbase_ctx_flag_clear(kctx, KCTX_AS_DISABLED_ON_FAULT); + } + } + + KBASE_KTRACE_ADD(kbdev, SCHED_RELEASE_CTX, kctx, new_ref_count); +} + +void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(atomic_read(&kctx->refcount) != 0); + + if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + if (kbdev->pm.backend.gpu_powered) + kbase_mmu_disable(kctx); + + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + } +} + +void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) +{ + s8 i; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(!kbdev->pm.backend.gpu_powered); + + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { + struct kbase_context *kctx; + + kctx = kbdev->as_to_kctx[i]; + if (kctx) { + if (atomic_read(&kctx->refcount)) { + WARN_ON(kctx->as_nr != i); + + kbase_mmu_update(kbdev, &kctx->mmu, + kctx->as_nr); + kbase_ctx_flag_clear(kctx, + KCTX_AS_DISABLED_ON_FAULT); + } else { + /* This context might have been assigned an + * AS before, clear it. + */ + if (kctx->as_nr != KBASEP_AS_NR_INVALID) { + KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( + kbdev, kctx->id); + kbdev->as_to_kctx[kctx->as_nr] = NULL; + kctx->as_nr = KBASEP_AS_NR_INVALID; + } + } + } else { + kbase_mmu_disable_as(kbdev, i); + } + } +} + +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_device *kbdev, size_t as_nr) +{ + unsigned long flags; + struct kbase_context *found_kctx = NULL; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (found_kctx != NULL) + kbase_ctx_sched_retain_ctx_refcount(found_kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return found_kctx; +} + +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr) +{ + struct kbase_context *found_kctx; + + if (WARN_ON(kbdev == NULL)) + return NULL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return NULL; + + found_kctx = kbdev->as_to_kctx[as_nr]; + + if (WARN_ON(!found_kctx)) + return NULL; + + if (WARN_ON(atomic_read(&found_kctx->refcount) <= 0)) + return NULL; + + return found_kctx; +} + +bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx) +{ + bool result = false; + int as_nr; + + if (WARN_ON(kctx == NULL)) + return result; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + as_nr = kctx->as_nr; + if (atomic_read(&kctx->refcount) > 0) { + KBASE_DEBUG_ASSERT(as_nr >= 0); + + kbase_ctx_sched_retain_ctx_refcount(kctx); + KBASE_KTRACE_ADD(kctx->kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, + kbase_ktrace_get_ctx_refcnt(kctx)); + result = true; + } + + return result; +} + +bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx) +{ + unsigned long flags; + bool result = false; + + if (WARN_ON(kctx == NULL)) + return result; + + if (WARN_ON(kctx->kbdev == NULL)) + return result; + + mutex_lock(&kctx->kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + result = kbase_ctx_sched_inc_refcount_nolock(kctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->mmu_hw_mutex); + + return result; +} + +void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) +{ + unsigned long flags; + + if (WARN_ON(!kctx)) + return; + + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + + if (!WARN_ON(kctx->as_nr == KBASEP_AS_NR_INVALID) && + !WARN_ON(atomic_read(&kctx->refcount) <= 0)) + kbase_ctx_sched_release_ctx(kctx); + + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h new file mode 100644 index 0000000..1affa71 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h @@ -0,0 +1,209 @@ +/* + * + * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_CTX_SCHED_H_ +#define _KBASE_CTX_SCHED_H_ + +#include + +/** + * The Context Scheduler manages address space assignment and reference + * counting to kbase_context. The interface has been designed to minimise + * interactions between the Job Scheduler and Power Management/MMU to support + * the existing Job Scheduler interface. + * + * The initial implementation of the Context Scheduler does not schedule + * contexts. Instead it relies on the Job Scheduler to make decisions of + * when to schedule/evict contexts if address spaces are starved. In the + * future, once an interface between the CS and JS has been devised to + * provide enough information about how each context is consuming GPU resources, + * those decisions can be made in the CS itself, thereby reducing duplicated + * code. + */ + +/** + * kbase_ctx_sched_init - Initialise the context scheduler + * @kbdev: The device for which the context scheduler needs to be initialised + * + * This must be called during device initialisation. The number of hardware + * address spaces must already be established before calling this function. + * + * Return: 0 for success, otherwise failure + */ +int kbase_ctx_sched_init(struct kbase_device *kbdev); + +/** + * kbase_ctx_sched_term - Terminate the context scheduler + * @kbdev: The device for which the context scheduler needs to be terminated + * + * This must be called during device termination after all contexts have been + * destroyed. + */ +void kbase_ctx_sched_term(struct kbase_device *kbdev); + +/** + * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context + * @kctx: The context to which to retain a reference + * + * This function should be called whenever an address space should be assigned + * to a context and programmed onto the MMU. It should typically be called + * when jobs are ready to be submitted to the GPU. + * + * It can be called as many times as necessary. The address space will be + * assigned to the context for as long as there is a reference to said context. + * + * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be + * held whilst calling this function. + * + * Return: The address space that the context has been assigned to or + * KBASEP_AS_NR_INVALID if no address space was available. + */ +int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_retain_ctx_refcount + * @kctx: The context to which to retain a reference + * + * This function only retains a reference to the context. It must be called + * only when the context already has a reference. + * + * This is typically called inside an atomic session where we know the context + * is already scheduled in but want to take an extra reference to ensure that + * it doesn't get descheduled. + * + * The kbase_device::hwaccess_lock must be held whilst calling this function + */ +void kbase_ctx_sched_retain_ctx_refcount(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_release_ctx - Release a reference to the @ref kbase_context + * @kctx: The context from which to release a reference + * + * This function should be called whenever an address space could be unassigned + * from a context. When there are no more references to said context, the + * address space previously assigned to this context shall be reassigned to + * other contexts as needed. + * + * The kbase_device::hwaccess_lock must be held whilst calling this function + */ +void kbase_ctx_sched_release_ctx(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_remove_ctx - Unassign previously assigned address space + * @kctx: The context to be removed + * + * This function should be called when a context is being destroyed. The + * context must no longer have any reference. If it has been assigned an + * address space before then the AS will be unprogrammed. + * + * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be + * held whilst calling this function. + */ +void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_restore_all_as - Reprogram all address spaces + * @kbdev: The device for which address spaces to be reprogrammed + * + * This function shall reprogram all address spaces previously assigned to + * contexts. It can be used after the GPU is reset. + * + * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be + * held whilst calling this function. + */ +void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev); + +/** + * kbase_ctx_sched_as_to_ctx_refcount - Lookup a context based on its current + * address space and ensure that is stays scheduled in + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * The context is refcounted as being busy to prevent it from scheduling + * out. It must be released with kbase_ctx_sched_release_ctx() when it is no + * longer required to stay scheduled in. + * + * This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * * it must not hold the kbase_device::hwaccess_lock, because it will be used + * internally. + * + * Return: a valid struct kbase_context on success, which has been refcounted + * as being busy or return NULL on failure, indicating that no context was found + * in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( + struct kbase_device *kbdev, size_t as_nr); + +/** + * kbase_ctx_sched_as_to_ctx - Lookup a context based on its current address + * space + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * Return: a valid struct kbase_context on success or NULL on failure, + * indicating that no context was found in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, + size_t as_nr); + +/** + * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, + * preventing it from being scheduled out. + * @kctx: Context to be refcounted + * + * The following locks must be held by the caller: + * * kbase_device::mmu_hw_mutex + * * kbase_device::hwaccess_lock + * + * Return: true if refcount succeeded, and the context will not be scheduled + * out, false if the refcount failed (because the context is being/has been + * scheduled out). + */ +bool kbase_ctx_sched_inc_refcount_nolock(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_inc_refcount - Refcount a context as being busy, preventing + * it from being scheduled out. + * @kctx: Context to be refcounted + * + * The following locking conditions are made on the caller: + * * it must not hold kbase_device::mmu_hw_mutex and + * kbase_device::hwaccess_lock, because they will be used internally. + * + * Return: true if refcount succeeded, and the context will not be scheduled + * out, false if the refcount failed (because the context is being/has been + * scheduled out). + */ +bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); + +/** + * kbase_ctx_sched_release_ctx_lock - Release a reference count of a context + * @kctx: Context for which refcount should be decreased + * + * Effectivelly, this is a wrapper for kbase_ctx_sched_release_ctx, but + * kbase_device::hwaccess_lock is required NOT to be locked. + */ +void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); + +#endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c new file mode 100644 index 0000000..118f787 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include + +static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { + NULL, + NULL +}; + +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) +{ + kbasep_debug_assert_registered_cb.func = func; + kbasep_debug_assert_registered_cb.param = param; +} + +void kbasep_debug_assert_call_hook(void) +{ + if (kbasep_debug_assert_registered_cb.func != NULL) + kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); +} +KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h new file mode 100644 index 0000000..2fdb72d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h @@ -0,0 +1,169 @@ +/* + * + * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_DEBUG_H +#define _KBASE_DEBUG_H + +#include + +/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ +#define KBASE_DEBUG_SKIP_TRACE 0 + +/** @brief If different from 0, the trace will only contain the file and line. */ +#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 + +/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ +#ifndef KBASE_DEBUG_DISABLE_ASSERTS +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_DISABLE_ASSERTS 0 +#else +#define KBASE_DEBUG_DISABLE_ASSERTS 1 +#endif +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ +typedef void (kbase_debug_assert_hook) (void *); + +struct kbasep_debug_assert_cb { + kbase_debug_assert_hook *func; + void *param; +}; + +/** + * @def KBASEP_DEBUG_PRINT_TRACE + * @brief Private macro containing the format of the trace to display before every message + * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME + */ +#if !KBASE_DEBUG_SKIP_TRACE +#define KBASEP_DEBUG_PRINT_TRACE \ + "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) +#if !KBASE_DEBUG_SKIP_FUNCTION_NAME +#define KBASEP_DEBUG_PRINT_FUNCTION __func__ +#else +#define KBASEP_DEBUG_PRINT_FUNCTION "" +#endif +#else +#define KBASEP_DEBUG_PRINT_TRACE "" +#endif + +/** + * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) + * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. + * @param trace location in the code from where the message is printed + * @param function function from where the message is printed + * @param ... Format string followed by format arguments. + * @note function parameter cannot be concatenated with other strings + */ +/* Select the correct system output function*/ +#ifdef CONFIG_MALI_DEBUG +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ + do { \ + pr_err("Mali: %s function:%s ", trace, function);\ + pr_err(__VA_ARGS__);\ + pr_err("\n");\ + } while (false) +#else +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() +#endif + +#ifdef CONFIG_MALI_DEBUG +#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() +#else +#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() +#endif + +/** + * @def KBASE_DEBUG_ASSERT(expr) + * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false + * + * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + */ +#define KBASE_DEBUG_ASSERT(expr) \ + KBASE_DEBUG_ASSERT_MSG(expr, #expr) + +#if KBASE_DEBUG_DISABLE_ASSERTS +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() +#else + /** + * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) + * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false + * + * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + * @param ... Message to display when @a expr is false, as a format string followed by format arguments. + */ +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ + do { \ + if (!(expr)) { \ + KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ + KBASE_CALL_ASSERT_HOOK();\ + BUG();\ + } \ + } while (false) +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** + * @def KBASE_DEBUG_CODE( X ) + * @brief Executes the code inside the macro only in debug mode + * + * @param X Code to compile only in debug mode. + */ +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_CODE(X) X +#else +#define KBASE_DEBUG_CODE(X) CSTD_NOP() +#endif /* CONFIG_MALI_DEBUG */ + +/** @} */ + +/** + * @brief Register a function to call on ASSERT + * + * Such functions will \b only be called during Debug mode, and for debugging + * features \b only. Do not rely on them to be called in general use. + * + * To disable the hook, supply NULL to \a func. + * + * @note This function is not thread-safe, and should only be used to + * register/deregister once in the module's lifetime. + * + * @param[in] func the function to call when an assert is triggered. + * @param[in] param the parameter to pass to \a func when calling it + */ +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + +/** + * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() + * + * @note This function is not thread-safe with respect to multiple threads + * registering functions and parameters with + * kbase_debug_assert_register_hook(). Otherwise, thread safety is the + * responsibility of the registered hook. + */ +void kbasep_debug_assert_call_hook(void); + +#endif /* _KBASE_DEBUG_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c new file mode 100644 index 0000000..dbc774d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -0,0 +1,566 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#ifdef CONFIG_DEBUG_FS + +static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) +{ + struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + bool ret; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + ret = !list_empty(event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + return ret; +} + +static void kbase_ctx_remove_pending_event(struct kbase_context *kctx) +{ + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags); + list_for_each_entry(event, event_list, head) { + if (event->katom->kctx == kctx) { + list_del(&event->head); + spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); + + wake_up(&kctx->kbdev->job_fault_resume_wq); + flush_work(&event->job_fault_work); + + /* job_fault_event_list can only have a single atom for + * each context. + */ + return; + } + } + spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags); +} + +static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct list_head *event_list = &kctx->kbdev->job_fault_event_list; + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + if (list_empty(event_list)) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return true; + } + list_for_each_entry(event, event_list, head) { + if (event->katom->kctx == kctx) { + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, + flags); + return false; + } + } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + return true; +} + +static int wait_for_job_fault(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE + int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq, + kbase_is_job_fault_event_pending(kbdev), + msecs_to_jiffies(2000)); + if (ret == 0) + return -EAGAIN; + else if (ret > 0) + return 0; + else + return ret; +#else + return wait_event_interruptible(kbdev->job_fault_wq, + kbase_is_job_fault_event_pending(kbdev)); +#endif +} + +/* wait until the fault happen and copy the event */ +static int kbase_job_fault_event_wait(struct kbase_device *kbdev, + struct base_job_fault_event *event) +{ + struct list_head *event_list = &kbdev->job_fault_event_list; + struct base_job_fault_event *event_in; + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + while (list_empty(event_list)) { + int err; + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + err = wait_for_job_fault(kbdev); + if (err) + return err; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + } + + event_in = list_entry(event_list->next, + struct base_job_fault_event, head); + event->event_code = event_in->event_code; + event->katom = event_in->katom; + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + return 0; + +} + +/* remove the event from the queue */ +static struct base_job_fault_event *kbase_job_fault_event_dequeue( + struct kbase_device *kbdev, struct list_head *event_list) +{ + struct base_job_fault_event *event; + + event = list_entry(event_list->next, + struct base_job_fault_event, head); + list_del(event_list->next); + + return event; + +} + +/* Remove all the following atoms after the failed atom in the same context + * Call the postponed bottom half of job done. + * Then, this context could be rescheduled. + */ +static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) +{ + struct list_head *event_list = &kctx->job_fault_resume_event_list; + + while (!list_empty(event_list)) { + struct base_job_fault_event *event; + + event = kbase_job_fault_event_dequeue(kctx->kbdev, + &kctx->job_fault_resume_event_list); + kbase_jd_done_worker(&event->katom->work); + } + +} + +static void kbase_job_fault_resume_worker(struct work_struct *data) +{ + struct base_job_fault_event *event = container_of(data, + struct base_job_fault_event, job_fault_work); + struct kbase_context *kctx; + struct kbase_jd_atom *katom; + + katom = event->katom; + kctx = katom->kctx; + + dev_info(kctx->kbdev->dev, "Job dumping wait\n"); + + /* When it was waked up, it need to check if queue is empty or the + * failed atom belongs to different context. If yes, wake up. Both + * of them mean the failed job has been dumped. Please note, it + * should never happen that the job_fault_event_list has the two + * atoms belong to the same context. + */ + wait_event(kctx->kbdev->job_fault_resume_wq, + kbase_ctx_has_no_event_pending(kctx)); + + atomic_set(&kctx->job_fault_count, 0); + kbase_jd_done_worker(&katom->work); + + /* In case the following atoms were scheduled during failed job dump + * the job_done_worker was held. We need to rerun it after the dump + * was finished + */ + kbase_job_fault_resume_event_cleanup(kctx); + + dev_info(kctx->kbdev->dev, "Job dumping finish, resume scheduler\n"); +} + +static struct base_job_fault_event *kbase_job_fault_event_queue( + struct list_head *event_list, + struct kbase_jd_atom *atom, + u32 completion_code) +{ + struct base_job_fault_event *event; + + event = &atom->fault_event; + + event->katom = atom; + event->event_code = completion_code; + + list_add_tail(&event->head, event_list); + + return event; + +} + +static void kbase_job_fault_event_post(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, u32 completion_code) +{ + struct base_job_fault_event *event; + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, + katom, completion_code); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + wake_up_interruptible(&kbdev->job_fault_wq); + + INIT_WORK(&event->job_fault_work, kbase_job_fault_resume_worker); + queue_work(kbdev->job_fault_resume_workq, &event->job_fault_work); + + dev_info(katom->kctx->kbdev->dev, "Job fault happen, start dump: %d_%d", + katom->kctx->tgid, katom->kctx->id); + +} + +/* + * This function will process the job fault + * Get the register copy + * Send the failed job dump event + * Create a Wait queue to wait until the job dump finish + */ + +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, + u32 completion_code) +{ + struct kbase_context *kctx = katom->kctx; + + /* Check if dumping is in the process + * only one atom of each context can be dumped at the same time + * If the atom belongs to different context, it can be dumped + */ + if (atomic_read(&kctx->job_fault_count) > 0) { + kbase_job_fault_event_queue( + &kctx->job_fault_resume_event_list, + katom, completion_code); + dev_info(kctx->kbdev->dev, "queue:%d\n", + kbase_jd_atom_id(kctx, katom)); + return true; + } + + if (kbase_ctx_flag(kctx, KCTX_DYING)) + return false; + + if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) { + + if (completion_code != BASE_JD_EVENT_DONE) { + + if (kbase_job_fault_get_reg_snapshot(kctx) == false) { + dev_warn(kctx->kbdev->dev, "get reg dump failed\n"); + return false; + } + + kbase_job_fault_event_post(kctx->kbdev, katom, + completion_code); + atomic_inc(&kctx->job_fault_count); + dev_info(kctx->kbdev->dev, "post:%d\n", + kbase_jd_atom_id(kctx, katom)); + return true; + + } + } + return false; + +} + +static int debug_job_fault_show(struct seq_file *m, void *v) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event = (struct base_job_fault_event *)v; + struct kbase_context *kctx = event->katom->kctx; + int i; + + dev_info(kbdev->dev, "debug job fault seq show:%d_%d, %d", + kctx->tgid, kctx->id, event->reg_offset); + + if (kctx->reg_dump == NULL) { + dev_warn(kbdev->dev, "reg dump is NULL"); + return -1; + } + + if (kctx->reg_dump[event->reg_offset] == + REGISTER_DUMP_TERMINATION_FLAG) { + /* Return the error here to stop the read. And the + * following next() will not be called. The stop can + * get the real event resource and release it + */ + return -1; + } + + if (event->reg_offset == 0) + seq_printf(m, "%d_%d\n", kctx->tgid, kctx->id); + + for (i = 0; i < 50; i++) { + if (kctx->reg_dump[event->reg_offset] == + REGISTER_DUMP_TERMINATION_FLAG) { + break; + } + seq_printf(m, "%08x: %08x\n", + kctx->reg_dump[event->reg_offset], + kctx->reg_dump[1+event->reg_offset]); + event->reg_offset += 2; + + } + + + return 0; +} +static void *debug_job_fault_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event = (struct base_job_fault_event *)v; + + dev_info(kbdev->dev, "debug job fault seq next:%d, %d", + event->reg_offset, (int)*pos); + + return event; +} + +static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) +{ + struct kbase_device *kbdev = m->private; + struct base_job_fault_event *event; + + dev_info(kbdev->dev, "fault job seq start:%d", (int)*pos); + + /* The condition is trick here. It needs make sure the + * fault hasn't happened and the dumping hasn't been started, + * or the dumping has finished + */ + if (*pos == 0) { + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return NULL; + event->reg_offset = 0; + if (kbase_job_fault_event_wait(kbdev, event)) { + kfree(event); + return NULL; + } + + /* The cache flush workaround is called in bottom half of + * job done but we delayed it. Now we should clean cache + * earlier. Then the GPU memory dump should be correct. + */ + kbase_backend_cache_clean(kbdev, event->katom); + } else + return NULL; + + return event; +} + +static void debug_job_fault_stop(struct seq_file *m, void *v) +{ + struct kbase_device *kbdev = m->private; + + /* here we wake up the kbase_jd_done_worker after stop, it needs + * get the memory dump before the register dump in debug daemon, + * otherwise, the memory dump may be incorrect. + */ + + if (v != NULL) { + kfree(v); + dev_info(kbdev->dev, "debug job fault seq stop stage 1"); + + } else { + unsigned long flags; + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + if (!list_empty(&kbdev->job_fault_event_list)) { + kbase_job_fault_event_dequeue(kbdev, + &kbdev->job_fault_event_list); + wake_up(&kbdev->job_fault_resume_wq); + } + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + dev_info(kbdev->dev, "debug job fault seq stop stage 2"); + } + +} + +static const struct seq_operations ops = { + .start = debug_job_fault_start, + .next = debug_job_fault_next, + .stop = debug_job_fault_stop, + .show = debug_job_fault_show, +}; + +static int debug_job_fault_open(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + if (atomic_cmpxchg(&kbdev->job_fault_debug, 0, 1) == 1) { + dev_warn(kbdev->dev, "debug job fault is busy, only a single client is allowed"); + return -EBUSY; + } + + seq_open(file, &ops); + + ((struct seq_file *)file->private_data)->private = kbdev; + dev_info(kbdev->dev, "debug job fault seq open"); + + + return 0; + +} + +static int debug_job_fault_release(struct inode *in, struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + struct list_head *event_list = &kbdev->job_fault_event_list; + unsigned long flags; + + seq_release(in, file); + + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + + /* Disable job fault dumping. This will let kbase run jobs as normal, + * without blocking waiting for a job_fault client to read failed jobs. + * + * After this a new client may open the file, and may re-enable job + * fault dumping, but the job_fault_event_lock we hold here will block + * that from interfering until after we've completed the cleanup. + */ + atomic_dec(&kbdev->job_fault_debug); + + /* Clean the unprocessed job fault. After that, all the suspended + * contexts could be rescheduled. Remove all the failed atoms that + * belong to different contexts Resume all the contexts that were + * suspend due to failed job. + */ + while (!list_empty(event_list)) { + kbase_job_fault_event_dequeue(kbdev, event_list); + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + wake_up(&kbdev->job_fault_resume_wq); + spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); + } + + spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + + dev_info(kbdev->dev, "debug job fault seq close"); + + return 0; +} + +static const struct file_operations kbasep_debug_job_fault_fops = { + .owner = THIS_MODULE, + .open = debug_job_fault_open, + .read = seq_read, + .llseek = seq_lseek, + .release = debug_job_fault_release, +}; + +/* + * Initialize debugfs entry for job fault dump + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("job_fault", 0400, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_debug_job_fault_fops); +} + + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + + INIT_LIST_HEAD(&kbdev->job_fault_event_list); + + init_waitqueue_head(&(kbdev->job_fault_wq)); + init_waitqueue_head(&(kbdev->job_fault_resume_wq)); + spin_lock_init(&kbdev->job_fault_event_lock); + + kbdev->job_fault_resume_workq = alloc_workqueue( + "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); + if (!kbdev->job_fault_resume_workq) + return -ENOMEM; + + atomic_set(&kbdev->job_fault_debug, 0); + + return 0; +} + +/* + * Release the relevant resource per device + */ +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->job_fault_resume_workq); +} + + +/* + * Initialize the relevant data structure per context + */ +void kbase_debug_job_fault_context_init(struct kbase_context *kctx) +{ + + /* We need allocate double size register range + * Because this memory will keep the register address and value + */ + kctx->reg_dump = vmalloc(0x4000 * 2); + if (kctx->reg_dump == NULL) + return; + + if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { + vfree(kctx->reg_dump); + kctx->reg_dump = NULL; + } + INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); + atomic_set(&kctx->job_fault_count, 0); + +} + +/* + * release the relevant resource per context + */ +void kbase_debug_job_fault_context_term(struct kbase_context *kctx) +{ + vfree(kctx->reg_dump); +} + +void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) +{ + WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + + kbase_ctx_remove_pending_event(kctx); +} + +#else /* CONFIG_DEBUG_FS */ + +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) +{ + return 0; +} + +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h new file mode 100644 index 0000000..ef69627 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -0,0 +1,116 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_JOB_FAULT_H +#define _KBASE_DEBUG_JOB_FAULT_H + +#include +#include + +#define REGISTER_DUMP_TERMINATION_FLAG 0xFFFFFFFF + +/** + * kbase_debug_job_fault_dev_init - Create the fault event wait queue + * per device and initialize the required lists. + * @kbdev: Device pointer + * + * Return: Zero on success or a negative error code. + */ +int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_debugfs_init - Initialize job fault debug sysfs + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_dev_term - Clean up resources created in + * kbase_debug_job_fault_dev_init. + * @kbdev: Device pointer + */ +void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); + +/** + * kbase_debug_job_fault_context_init - Initialize the relevant + * data structure per context + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_context_init(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_context_term - Release the relevant + * resource per context + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_context_term(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault + * dumping on context termination. + * + * This function is called during context termination to unblock the atom for + * which the job fault occurred and also the atoms following it. This is needed + * otherwise the wait for zero jobs could timeout (leading to an assertion + * failure, kernel panic in debug builds) in the pathological case where + * although the thread/daemon capturing the job fault events is running, + * but for some reasons has stopped consuming the events. + * + * @kctx: KBase context pointer + */ +void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx); + +/** + * kbase_debug_job_fault_process - Process the failed job. + * It will send a event and wake up the job fault waiting queue + * Then create a work queue to wait for job dump finish + * This function should be called in the interrupt handler and before + * jd_done that make sure the jd_done_worker will be delayed until the + * job dump finish + * @katom: The failed atom pointer + * @completion_code: the job status + * @return true if dump is going on + */ +bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom, + u32 completion_code); + + +/** + * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers + * address during the job fault process, the relevant registers will + * be saved when a job fault happen + * @kctx: KBase context pointer + * @reg_range: Maximum register address space + * @return true if initializing successfully + */ +bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, + int reg_range); + +/** + * kbase_job_fault_get_reg_snapshot - Read the interested registers for + * failed job dump + * @kctx: KBase context pointer + * @return true if getting registers successfully + */ +bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx); + +#endif /*_KBASE_DEBUG_JOB_FAULT_H*/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c new file mode 100644 index 0000000..4788137 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -0,0 +1,313 @@ +/* + * + * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Debugfs interface to dump the memory visible to the GPU + */ + +#include "mali_kbase_debug_mem_view.h" +#include "mali_kbase.h" + +#include +#include + +#ifdef CONFIG_DEBUG_FS + +#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE) +#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) +#endif + +struct debug_mem_mapping { + struct list_head node; + + struct kbase_mem_phy_alloc *alloc; + unsigned long flags; + + u64 start_pfn; + size_t nr_pages; +}; + +struct debug_mem_data { + struct list_head mapping_list; + struct kbase_context *kctx; +}; + +struct debug_mem_seq_off { + struct list_head *lh; + size_t offset; +}; + +static void *debug_mem_start(struct seq_file *m, loff_t *_pos) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data; + struct debug_mem_mapping *map; + loff_t pos = *_pos; + + list_for_each_entry(map, &mem_data->mapping_list, node) { + if (pos >= map->nr_pages) { + pos -= map->nr_pages; + } else { + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->lh = &map->node; + data->offset = pos; + return data; + } + } + + /* Beyond the end */ + return NULL; +} + +static void debug_mem_stop(struct seq_file *m, void *v) +{ + kfree(v); +} + +static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data = v; + struct debug_mem_mapping *map; + + map = list_entry(data->lh, struct debug_mem_mapping, node); + + if (data->offset < map->nr_pages - 1) { + data->offset++; + ++*pos; + return data; + } + + if (list_is_last(data->lh, &mem_data->mapping_list)) { + kfree(data); + return NULL; + } + + data->lh = data->lh->next; + data->offset = 0; + ++*pos; + + return data; +} + +static int debug_mem_show(struct seq_file *m, void *v) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data = v; + struct debug_mem_mapping *map; + int i, j; + struct page *page; + uint32_t *mapping; + pgprot_t prot = PAGE_KERNEL; + + map = list_entry(data->lh, struct debug_mem_mapping, node); + + kbase_gpu_vm_lock(mem_data->kctx); + + if (data->offset >= map->alloc->nents) { + seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + + data->offset) << PAGE_SHIFT); + goto out; + } + + if (!(map->flags & KBASE_REG_CPU_CACHED)) + prot = pgprot_writecombine(prot); + + page = as_page(map->alloc->pages[data->offset]); + mapping = vmap(&page, 1, VM_MAP, prot); + if (!mapping) + goto out; + + for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { + seq_printf(m, "%016llx:", i + ((map->start_pfn + + data->offset) << PAGE_SHIFT)); + + for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping)) + seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]); + seq_putc(m, '\n'); + } + + vunmap(mapping); + + seq_putc(m, '\n'); + +out: + kbase_gpu_vm_unlock(mem_data->kctx); + return 0; +} + +static const struct seq_operations ops = { + .start = debug_mem_start, + .next = debug_mem_next, + .stop = debug_mem_stop, + .show = debug_mem_show, +}; + +static int debug_mem_zone_open(struct rb_root *rbtree, + struct debug_mem_data *mem_data) +{ + int ret = 0; + struct rb_node *p; + struct kbase_va_region *reg; + struct debug_mem_mapping *mapping; + + for (p = rb_first(rbtree); p; p = rb_next(p)) { + reg = rb_entry(p, struct kbase_va_region, rblink); + + if (reg->gpu_alloc == NULL) + /* Empty region - ignore */ + continue; + + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + ret = -ENOMEM; + goto out; + } + + mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + mapping->start_pfn = reg->start_pfn; + mapping->nr_pages = reg->nr_pages; + mapping->flags = reg->flags; + list_add_tail(&mapping->node, &mem_data->mapping_list); + } + +out: + return ret; +} + +static int debug_mem_open(struct inode *i, struct file *file) +{ + struct kbase_context *const kctx = i->i_private; + struct debug_mem_data *mem_data; + int ret; + + if (get_file_rcu(kctx->filp) == 0) + return -ENOENT; + + ret = seq_open(file, &ops); + if (ret) + goto open_fail; + + mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); + if (!mem_data) { + ret = -ENOMEM; + goto out; + } + + mem_data->kctx = kctx; + + INIT_LIST_HEAD(&mem_data->mapping_list); + + kbase_gpu_vm_lock(kctx); + + ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); + if (0 != ret) { + kbase_gpu_vm_unlock(kctx); + goto out; + } + + kbase_gpu_vm_unlock(kctx); + + ((struct seq_file *)file->private_data)->private = mem_data; + + return 0; + +out: + if (mem_data) { + while (!list_empty(&mem_data->mapping_list)) { + struct debug_mem_mapping *mapping; + + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + kfree(mem_data); + } + seq_release(i, file); +open_fail: + fput(kctx->filp); + + return ret; +} + +static int debug_mem_release(struct inode *inode, struct file *file) +{ + struct kbase_context *const kctx = inode->i_private; + struct seq_file *sfile = file->private_data; + struct debug_mem_data *mem_data = sfile->private; + struct debug_mem_mapping *mapping; + + seq_release(inode, file); + + while (!list_empty(&mem_data->mapping_list)) { + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + + kfree(mem_data); + + fput(kctx->filp); + + return 0; +} + +static const struct file_operations kbase_debug_mem_view_fops = { + .owner = THIS_MODULE, + .open = debug_mem_open, + .release = debug_mem_release, + .read = seq_read, + .llseek = seq_lseek +}; + +void kbase_debug_mem_view_init(struct kbase_context *const kctx) +{ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || + WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx, + &kbase_debug_mem_view_fops); +} + +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h new file mode 100644 index 0000000..b948b7c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUG_MEM_VIEW_H +#define _KBASE_DEBUG_MEM_VIEW_H + +#include + +/** + * kbase_debug_mem_view_init - Initialize the mem_view sysfs file + * @kctx: Pointer to kernel base context + * + * This function creates a "mem_view" file which can be used to get a view of + * the context's memory as the GPU sees it (i.e. using the GPU's page tables). + * + * The file is cleaned up by a call to debugfs_remove_recursive() deleting the + * parent directory. + */ +void kbase_debug_mem_view_init(struct kbase_context *kctx); + +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c new file mode 100644 index 0000000..37e507b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c @@ -0,0 +1,183 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +#include "mali_kbase_debugfs_helper.h" + +/* Arbitrary maximum size to prevent user space allocating too much kernel + * memory + */ +#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u) + +/** + * set_attr_from_string - Parse a string to set elements of an array + * + * This is the core of the implementation of + * kbase_debugfs_helper_set_attr_from_string. The only difference between the + * two functions is that this one requires the input string to be writable. + * + * @buf: Input string to parse. Must be nul-terminated! + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +static int set_attr_from_string( + char *const buf, + void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + size_t index, err = 0; + char *ptr = buf; + + for (index = 0; index < nelems && *ptr; ++index) { + unsigned long new_size; + size_t len; + char sep; + + /* Drop leading spaces */ + while (*ptr == ' ') + ptr++; + + len = strcspn(ptr, "\n "); + if (len == 0) { + /* No more values (allow this) */ + break; + } + + /* Substitute a nul terminator for a space character + * to make the substring valid for kstrtoul. + */ + sep = ptr[len]; + if (sep == ' ') + ptr[len++] = '\0'; + + err = kstrtoul(ptr, 0, &new_size); + if (err) + break; + + /* Skip the substring (including any premature nul terminator) + */ + ptr += len; + + set_attr_fn(array, index, new_size); + } + + return err; +} + +int kbase_debugfs_helper_set_attr_from_string( + const char *const buf, void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + char *const wbuf = kstrdup(buf, GFP_KERNEL); + int err = 0; + + if (!wbuf) + return -ENOMEM; + + err = set_attr_from_string(wbuf, array, nelems, + set_attr_fn); + + kfree(wbuf); + return err; +} + +ssize_t kbase_debugfs_helper_get_attr_to_string( + char *const buf, size_t const size, + void *const array, size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn) +{ + ssize_t total = 0; + size_t index; + + for (index = 0; index < nelems; ++index) { + const char *postfix = " "; + + if (index == (nelems-1)) + postfix = "\n"; + + total += scnprintf(buf + total, size - total, "%zu%s", + get_attr_fn(array, index), postfix); + } + + return total; +} + +int kbase_debugfs_helper_seq_write(struct file *const file, + const char __user *const ubuf, size_t const count, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn) +{ + const struct seq_file *const sfile = file->private_data; + void *const array = sfile->private; + int err = 0; + char *buf; + + if (WARN_ON(!array)) + return -EINVAL; + + if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE)) + return -EINVAL; + + buf = kmalloc(count + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, ubuf, count)) { + kfree(buf); + return -EFAULT; + } + + buf[count] = '\0'; + err = set_attr_from_string(buf, + array, nelems, set_attr_fn); + kfree(buf); + + return err; +} + +int kbase_debugfs_helper_seq_read(struct seq_file *const sfile, + size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn) +{ + void *const array = sfile->private; + size_t index; + + if (WARN_ON(!array)) + return -EINVAL; + + for (index = 0; index < nelems; ++index) { + const char *postfix = " "; + + if (index == (nelems-1)) + postfix = "\n"; + + seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix); + } + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h new file mode 100644 index 0000000..c3c9efa --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h @@ -0,0 +1,141 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DEBUGFS_HELPER_H_ +#define _KBASE_DEBUGFS_HELPER_H_ + +/** + * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an + * attribute value from an array + * + * @array: Address of an object that can be accessed like an array. + * @index: An element index. The valid range depends on the use-case. + * @value: Attribute value to be set. + */ +typedef void (*kbase_debugfs_helper_set_attr_fn)( + void *array, size_t index, size_t value); + +/** + * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an + * array + * + * The given function is called once for each attribute value found in the + * input string. It is not an error if the string specifies fewer attribute + * values than the specified number of array elements. + * + * The number base of each attribute value is detected automatically + * according to the standard rules (e.g. prefix "0x" for hexadecimal). + * Attribute values are separated by one or more space characters. + * Additional leading and trailing spaces are ignored. + * + * @buf: Input string to parse. Must be nul-terminated! + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_set_attr_from_string( + const char *buf, void *array, size_t nelems, + kbase_debugfs_helper_set_attr_fn set_attr_fn); + +/** + * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an + * attribute value from an array + * + * @array: Address of an object that can be accessed like an array. + * @index: An element index. The valid range depends on the use-case. + * + * Return: Value of attribute. + */ +typedef size_t (*kbase_debugfs_helper_get_attr_fn)( + void *array, size_t index); + +/** + * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string + * from elements in an array + * + * The given function is called once for each array element to get the + * value of the attribute to be inspected. The attribute values are + * written to the buffer as a formatted string of decimal numbers + * separated by spaces and terminated by a linefeed. + * + * @buf: Buffer in which to store the formatted output string. + * @size: The size of the buffer, in bytes. + * @array: Address of an object that can be accessed like an array. + * @nelems: Number of elements in the array. + * @get_attr_fn: Function to be called back for each array element. + * + * Return: Number of characters written excluding the nul terminator. + */ +ssize_t kbase_debugfs_helper_get_attr_to_string( + char *buf, size_t size, void *array, size_t nelems, + kbase_debugfs_helper_get_attr_fn get_attr_fn); + +/** + * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an + * array + * + * The virtual file must have been opened by calling single_open and passing + * the address of an object that can be accessed like an array. + * + * The given function is called once for each array element to get the + * value of the attribute to be inspected. The attribute values are + * written to the buffer as a formatted string of decimal numbers + * separated by spaces and terminated by a linefeed. + * + * @sfile: A virtual file previously opened by calling single_open. + * @nelems: Number of elements in the array. + * @get_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_seq_read( + struct seq_file *const sfile, size_t const nelems, + kbase_debugfs_helper_get_attr_fn const get_attr_fn); + +/** + * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an + * array + * + * The virtual file must have been opened by calling single_open and passing + * the address of an object that can be accessed like an array. + * + * The given function is called once for each attribute value found in the + * data written to the virtual file. For further details, refer to the + * description of set_attr_from_string. + * + * @file: A virtual file previously opened by calling single_open. + * @ubuf: Source address in user space. + * @count: Number of bytes written to the virtual file. + * @nelems: Number of elements in the array. + * @set_attr_fn: Function to be called back for each array element. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_debugfs_helper_seq_write(struct file *const file, + const char __user *const ubuf, size_t const count, + size_t const nelems, + kbase_debugfs_helper_set_attr_fn const set_attr_fn); + +#endif /*_KBASE_DEBUGFS_HELPER_H_ */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h new file mode 100644 index 0000000..7056d80 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -0,0 +1,1681 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_defs.h + * + * Defintions (types, defines, etcs) common to Kbase. They are placed here to + * allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_DEFS_H_ +#define _KBASE_DEFS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_MALI_BUSLOG +#include +#endif + +#if defined(CONFIG_SYNC) +#include +#else +#include "mali_kbase_fence_defs.h" +#endif + +#ifdef CONFIG_DEBUG_FS +#include +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_MALI_DEVFREQ +#include +#endif /* CONFIG_MALI_DEVFREQ */ + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#include +#include +#include + +#if defined(CONFIG_PM_RUNTIME) || \ + (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) +#define KBASE_PM_RUNTIME 1 +#endif + +#include "debug/mali_kbase_debug_ktrace_defs.h" + +/** Number of milliseconds before we time out on a GPU soft/hard reset */ +#define RESET_TIMEOUT 500 + +/** + * The maximum number of Job Slots to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of job slots. + */ +#define BASE_JM_MAX_NR_SLOTS 3 + +/** + * The maximum number of Address Spaces to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of Address Spaces + */ +#define BASE_MAX_NR_AS 16 + +/* mmu */ +#define MIDGARD_MMU_LEVEL(x) (x) + +#define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0) + +#define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3) + +#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) + +/** setting in kbase_context::as_nr that indicates it's invalid */ +#define KBASEP_AS_NR_INVALID (-1) + +/** + * Maximum size in bytes of a MMU lock region, as a logarithm + */ +#define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (64) + +/** + * Minimum size in bytes of a MMU lock region, as a logarithm + */ +#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) + +#include "mali_kbase_hwaccess_defs.h" + +/* Maximum number of pages of memory that require a permanent mapping, per + * kbase_context + */ +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ + PAGE_SHIFT) +/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer + * clients, to reduce undesired system load. + * If a virtualizer client requests a dump within this threshold period after + * some other client has performed a dump, a new dump won't be performed and + * the accumulated counter values for that client will be returned instead. + */ +#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) + +/* Maximum number of clock/regulator pairs that may be referenced by + * the device node. + * This is dependent on support for of_property_read_u64_array() in the + * kernel. + */ +#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \ + defined(LSK_OPPV2_BACKPORT) +#define BASE_MAX_NR_CLOCKS_REGULATORS (2) +#else +#define BASE_MAX_NR_CLOCKS_REGULATORS (1) +#endif + +/* Forward declarations */ +struct kbase_context; +struct kbase_device; +struct kbase_as; +struct kbase_mmu_setup; +struct kbase_ipa_model_vinstr_data; + +/** + * struct kbase_io_access - holds information about 1 register access + * + * @addr: first bit indicates r/w (r=0, w=1) + * @value: value written or read + */ +struct kbase_io_access { + uintptr_t addr; + u32 value; +}; + +/** + * struct kbase_io_history - keeps track of all recent register accesses + * + * @enabled: true if register accesses are recorded, false otherwise + * @lock: spinlock protecting kbase_io_access array + * @count: number of registers read/written + * @size: number of elements in kbase_io_access array + * @buf: array of kbase_io_access + */ +struct kbase_io_history { +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool enabled; +#else + u32 enabled; +#endif + + spinlock_t lock; + size_t count; + u16 size; + struct kbase_io_access *buf; +}; + +/** + * struct kbase_debug_copy_buffer - information about the buffer to be copied. + * + * @size: size of the buffer in bytes + * @pages: pointer to an array of pointers to the pages which contain + * the buffer + * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was + * allocated with kcalloc + * @nr_pages: number of pages + * @offset: offset into the pages + * @gpu_alloc: pointer to physical memory allocated by the GPU + * @extres_pages: array of pointers to the pages containing external resources + * for this buffer + * @nr_extres_pages: number of pages in @extres_pages + */ +struct kbase_debug_copy_buffer { + size_t size; + struct page **pages; + bool is_vmalloc; + int nr_pages; + size_t offset; + struct kbase_mem_phy_alloc *gpu_alloc; + + struct page **extres_pages; + int nr_extres_pages; +}; + +struct kbase_device_info { + u32 features; +}; + +struct kbase_mmu_setup { + u64 transtab; + u64 memattr; + u64 transcfg; +}; + +/** + * struct kbase_fault - object containing data relating to a page or bus fault. + * @addr: Records the faulting address. + * @extra_addr: Records the secondary fault address. + * @status: Records the fault status as reported by Hw. + * @protected_mode: Flag indicating whether the fault occurred in protected mode + * or not. + */ +struct kbase_fault { + u64 addr; + u64 extra_addr; + u32 status; + bool protected_mode; +}; + +/** + * struct kbase_as - object representing an address space of GPU. + * @number: Index at which this address space structure is present + * in an array of address space structures embedded inside the + * struct kbase_device. + * @pf_wq: Workqueue for processing work items related to Bus fault + * and Page fault handling. + * @work_pagefault: Work item for the Page fault handling. + * @work_busfault: Work item for the Bus fault handling. + * @pf_data: Data relating to page fault. + * @bf_data: Data relating to bus fault. + * @current_setup: Stores the MMU configuration for this address space. + */ +struct kbase_as { + int number; + struct workqueue_struct *pf_wq; + struct work_struct work_pagefault; + struct work_struct work_busfault; + struct kbase_fault pf_data; + struct kbase_fault bf_data; + struct kbase_mmu_setup current_setup; +}; + +/** + * struct kbase_mmu_table - object representing a set of GPU page tables + * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries + * of top & intermediate level page tables to avoid + * repeated calls to kmap_atomic during the MMU teardown. + * @mmu_lock: Lock to serialize the accesses made to multi level GPU + * page tables + * @pgd: Physical address of the page allocated for the top + * level page table of the context, this is used for + * MMU HW programming as the address translation will + * start from the top level page table. + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @kctx: If this set of MMU tables belongs to a context then + * this is a back-reference to the context, otherwise + * it is NULL + */ +struct kbase_mmu_table { + u64 *mmu_teardown_pages; + struct mutex mmu_lock; + phys_addr_t pgd; + u8 group_id; + struct kbase_context *kctx; +}; + +#include "jm/mali_kbase_jm_defs.h" + +static inline int kbase_as_has_bus_fault(struct kbase_as *as, + struct kbase_fault *fault) +{ + return (fault == &as->bf_data); +} + +static inline int kbase_as_has_page_fault(struct kbase_as *as, + struct kbase_fault *fault) +{ + return (fault == &as->pf_data); +} + +/** + * struct kbasep_mem_device - Data stored per device for memory allocation + * + * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is + * allocated/freed. + * @ir_threshold: Fraction of the maximum size of an allocation that grows + * on GPU page fault that can be used before the driver + * switches to incremental rendering, in 1/256ths. + * 0 means disabled. + */ +struct kbasep_mem_device { + atomic_t used_pages; + atomic_t ir_threshold; +}; + +/** + * Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + */ +struct kbase_pm_device_data { + /** + * The lock protecting Power Management structures accessed outside of + * IRQ. + * + * This lock must also be held whenever the GPU is being powered on or + * off. + */ + struct mutex lock; + + /** + * The reference count of active contexts on this device. Note that + * some code paths keep shaders/the tiler powered whilst this is 0. Use + * kbase_pm_is_active() instead to check for such cases. + */ + int active_count; + /** Flag indicating suspending/suspended */ + bool suspending; +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Flag indicating gpu lost */ + bool gpu_lost; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + /* Wait queue set when active_count == 0 */ + wait_queue_head_t zero_active_count_wait; + + /** + * Bit masks identifying the available shader cores that are specified + * via sysfs. One mask per job slot. + */ + u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; + u64 debug_core_mask_all; + + /** + * Callback for initializing the runtime power management. + * + * @param kbdev The kbase device + * + * @return 0 on success, else error code + */ + int (*callback_power_runtime_init)(struct kbase_device *kbdev); + + /** + * Callback for terminating the runtime power management. + * + * @param kbdev The kbase device + */ + void (*callback_power_runtime_term)(struct kbase_device *kbdev); + + /* Time in milliseconds between each dvfs sample */ + u32 dvfs_period; + + struct kbase_pm_backend_data backend; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /** + * The state of the arbiter VM machine + */ + struct kbase_arbiter_vm_state *arb_vm_state; +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +}; + +/** + * struct kbase_mem_pool - Page based memory pool for kctx/kbdev + * @kbdev: Kbase device where memory is used + * @cur_size: Number of free pages currently in the pool (may exceed + * @max_size in some corner cases) + * @max_size: Maximum number of free pages in the pool + * @order: order = 0 refers to a pool of 4 KB pages + * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. Immutable. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @pool_lock: Lock protecting the pool - must be held when modifying + * @cur_size and @page_list + * @page_list: List of free pages in the pool + * @reclaim: Shrinker for kernel reclaim of free pages + * @next_pool: Pointer to next pool where pages can be allocated when this + * pool is empty. Pages will spill over to the next pool when + * this pool is full. Can be NULL if there is no next pool. + * @dying: true if the pool is being terminated, and any ongoing + * operations should be abandoned + * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from + * this pool, eg during a grow operation + */ +struct kbase_mem_pool { + struct kbase_device *kbdev; + size_t cur_size; + size_t max_size; + u8 order; + u8 group_id; + spinlock_t pool_lock; + struct list_head page_list; + struct shrinker reclaim; + + struct kbase_mem_pool *next_pool; + + bool dying; + bool dont_reclaim; +}; + +/** + * struct kbase_mem_pool_group - a complete set of physical memory pools. + * + * Memory pools are used to allow efficient reallocation of previously-freed + * physical pages. A pair of memory pools is initialized for each physical + * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays + * should be indexed by physical memory group ID, the meaning of which is + * defined by the systems integrator. + * + * @small: Array of objects containing the state for pools of 4 KiB size + * physical pages. + * @large: Array of objects containing the state for pools of 2 MiB size + * physical pages. + */ +struct kbase_mem_pool_group { + struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS]; +}; + +/** + * struct kbase_mem_pool_config - Initial configuration for a physical memory + * pool + * + * @max_size: Maximum number of free pages that the pool can hold. + */ +struct kbase_mem_pool_config { + size_t max_size; +}; + +/** + * struct kbase_mem_pool_group_config - Initial configuration for a complete + * set of physical memory pools + * + * This array should be indexed by physical memory group ID, the meaning + * of which is defined by the systems integrator. + * + * @small: Array of initial configuration for pools of 4 KiB pages. + * @large: Array of initial configuration for pools of 2 MiB pages. + */ +struct kbase_mem_pool_group_config { + struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS]; +}; + +/** + * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP + * frequency, real frequencies and core mask + * @real_freqs: Real GPU frequencies. + * @opp_volts: OPP voltages. + * @opp_freq: Nominal OPP frequency + * @core_mask: Shader core mask + */ +struct kbase_devfreq_opp { + u64 opp_freq; + u64 core_mask; + u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; +}; + +/* MMU mode flags */ +#define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */ + +/** + * struct kbase_mmu_mode - object containing pointer to methods invoked for + * programming the MMU, as per the MMU mode supported + * by Hw. + * @update: enable & setup/configure one of the GPU address space. + * @get_as_setup: retrieve the configuration of one of the GPU address space. + * @disable_as: disable one of the GPU address space. + * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry. + * @ate_is_valid: check if the pte is a valid address translation entry + * encoding the physical address of the actual mapped page. + * @pte_is_valid: check if the pte is a valid entry encoding the physical + * address of the next lower level page table. + * @entry_set_ate: program the pte to be a valid address translation entry to + * encode the physical address of the actual page being mapped. + * @entry_set_pte: program the pte to be a valid entry to encode the physical + * address of the next lower level page table. + * @entry_invalidate: clear out or invalidate the pte. + * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. + */ +struct kbase_mmu_mode { + void (*update)(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr); + void (*get_as_setup)(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup); + void (*disable_as)(struct kbase_device *kbdev, int as_nr); + phys_addr_t (*pte_to_phy_addr)(u64 entry); + int (*ate_is_valid)(u64 ate, int level); + int (*pte_is_valid)(u64 pte, int level); + void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, + unsigned long flags, int level); + void (*entry_set_pte)(u64 *entry, phys_addr_t phy); + void (*entry_invalidate)(u64 *entry); + unsigned long flags; +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); +struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); + +#define DEVNAME_SIZE 16 + +/** + * enum kbase_devfreq_work_type - The type of work to perform in the devfreq + * suspend/resume worker. + * @DEVFREQ_WORK_NONE: Initilisation state. + * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device(). + * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device(). + */ +enum kbase_devfreq_work_type { + DEVFREQ_WORK_NONE, + DEVFREQ_WORK_SUSPEND, + DEVFREQ_WORK_RESUME +}; + +/** + * struct kbase_devfreq_queue_info - Object representing an instance for managing + * the queued devfreq suspend/resume works. + * @workq: Workqueue for devfreq suspend/resume requests + * @work: Work item for devfreq suspend & resume + * @req_type: Requested work type to be performed by the devfreq + * suspend/resume worker + * @acted_type: Work type has been acted on by the worker, i.e. the + * internal recorded state of the suspend/resume + */ +struct kbase_devfreq_queue_info { + struct workqueue_struct *workq; + struct work_struct work; + enum kbase_devfreq_work_type req_type; + enum kbase_devfreq_work_type acted_type; +}; + +/** + * struct kbase_device - Object representing an instance of GPU platform device, + * allocated from the probe method of mali driver. + * @hw_quirks_sc: Configuration to be used for the shader cores as per + * the HW issues present in the GPU. + * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW + * issues present in the GPU. + * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW + * issues present in the GPU. + * @hw_quirks_jm: Configuration to be used for the Job Manager as per + * the HW issues present in the GPU. + * @entry: Links the device instance to the global list of GPU + * devices. The list would have as many entries as there + * are GPU device instances. + * @dev: Pointer to the kernel's generic/base representation + * of the GPU platform device. + * @mdev: Pointer to the miscellaneous device registered to + * provide Userspace access to kernel driver through the + * device file /dev/malixx. + * @reg_start: Base address of the region in physical address space + * where GPU registers have been mapped. + * @reg_size: Size of the region containing GPU registers + * @reg: Kernel virtual address of the region containing GPU + * registers, using which Driver will access the registers. + * @irqs: Array containing IRQ resource info for 3 types of + * interrupts : Job scheduling, MMU & GPU events (like + * power management, cache etc.) + * @clocks: Pointer to the input clock resources referenced by + * the GPU device node. + * @nr_clocks: Number of clocks set in the clocks array. + * @regulators: Pointer to the structs corresponding to the + * regulators referenced by the GPU device node. + * @nr_regulators: Number of regulators set in the regulators array. + * @opp_table: Pointer to the device OPP structure maintaining the + * link to OPPs attached to a device. This is obtained + * after setting regulator names for the device. + * @devname: string containing the name used for GPU device instance, + * miscellaneous device is registered using the same name. + * @id: Unique identifier for the device, indicates the number of + * devices which have been created so far. + * @model: Pointer, valid only when Driver is compiled to not access + * the real GPU Hw, to the dummy model which tries to mimic + * to some extent the state & behavior of GPU Hw in response + * to the register accesses made by the Driver. + * @irq_slab: slab cache for allocating the work items queued when + * model mimics raising of IRQ to cause an interrupt on CPU. + * @irq_workq: workqueue for processing the irq work items. + * @serving_job_irq: function to execute work items queued when model mimics + * the raising of JS irq, mimics the interrupt handler + * processing JS interrupts. + * @serving_gpu_irq: function to execute work items queued when model mimics + * the raising of GPU irq, mimics the interrupt handler + * processing GPU interrupts. + * @serving_mmu_irq: function to execute work items queued when model mimics + * the raising of MMU irq, mimics the interrupt handler + * processing MMU interrupts. + * @reg_op_lock: lock used by model to serialize the handling of register + * accesses made by the driver. + * @pm: Per device object for storing data for power management + * framework. + * @js_data: Per device object encapsulating the current context of + * Job Scheduler, which is global to the device and is not + * tied to any particular struct kbase_context running on + * the device + * @mem_pools: Global pools of free physical memory pages which can + * be used by all the contexts. + * @memdev: keeps track of the in use physical pages allocated by + * the Driver. + * @mmu_mode: Pointer to the object containing methods for programming + * the MMU, depending on the type of MMU supported by Hw. + * @mgm_dev: Pointer to the memory group manager device attached + * to the GPU device. This points to an internal memory + * group manager if no platform-specific memory group + * manager was retrieved through device tree. + * @as: Array of objects representing address spaces of GPU. + * @as_free: Bitpattern of free/available GPU address spaces. + * @as_to_kctx: Array of pointers to struct kbase_context, having + * GPU adrress spaces assigned to them. + * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask + * register used in the handling of Bus & Page faults. + * @gpu_props: Object containing complete information about the + * configuration/properties of GPU HW device in use. + * @hw_issues_mask: List of SW workarounds for HW issues + * @hw_features_mask: List of available HW features. + * @disjoint_event: struct for keeping track of the disjoint information, + * that whether the GPU is in a disjoint state and the + * number of disjoint events that have occurred on GPU. + * @nr_hw_address_spaces: Number of address spaces actually available in the + * GPU, remains constant after driver initialisation. + * @nr_user_address_spaces: Number of address spaces available to user contexts + * @hwcnt: Structure used for instrumentation and HW counters + * dumping + * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. + * @hwcnt_gpu_ctx: Context for GPU hardware counter access. + * @hwaccess_lock must be held when calling + * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx. + * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters. + * @vinstr_ctx: vinstr context created per device. + * @timeline_flags: Bitmask defining which sets of timeline tracepoints + * are enabled. If zero, there is no timeline client and + * therefore timeline is disabled. + * @timeline: Timeline context created per device. + * @trace_lock: Lock to serialize the access to trace buffer. + * @trace_first_out: Index/offset in the trace buffer at which the first + * unread message is present. + * @trace_next_in: Index/offset in the trace buffer at which the new + * message will be written. + * @trace_rbuf: Pointer to the buffer storing debug messages/prints + * tracing the various events in Driver. + * The buffer is filled in circular fashion. + * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to + * complete for the GPU jobs before proceeding with the + * GPU reset. + * @cache_clean_in_progress: Set when a cache clean has been started, and + * cleared when it has finished. This prevents multiple + * cache cleans being done simultaneously. + * @cache_clean_queued: Set if a cache clean is invoked while another is in + * progress. If this happens, another cache clean needs + * to be triggered immediately after completion of the + * current one. + * @cache_clean_wait: Signalled when a cache clean has finished. + * @platform_context: Platform specific private data to be accessed by + * platform specific config files only. + * @kctx_list: List of kbase_contexts created for the device, + * including any contexts that might be created for + * hardware counters. + * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. + * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed + * to devfreq_add_device() to add devfreq feature to Mali + * GPU device. + * @devfreq: Pointer to devfreq structure for Mali GPU device, + * returned on the call to devfreq_add_device(). + * @current_freqs: The real frequencies, corresponding to + * @current_nominal_freq, at which the Mali GPU device + * is currently operating, as retrieved from + * @devfreq_table in the target callback of + * @devfreq_profile. + * @current_nominal_freq: The nominal frequency currently used for the Mali GPU + * device as retrieved through devfreq_recommended_opp() + * using the freq value passed as an argument to target + * callback of @devfreq_profile + * @current_voltages: The voltages corresponding to @current_nominal_freq, + * as retrieved from @devfreq_table in the target + * callback of @devfreq_profile. + * @current_core_mask: bitmask of shader cores that are currently desired & + * enabled, corresponding to @current_nominal_freq as + * retrieved from @devfreq_table in the target callback + * of @devfreq_profile. + * @devfreq_table: Pointer to the lookup table for converting between + * nominal OPP (operating performance point) frequency, + * and real frequency and core mask. This table is + * constructed according to operating-points-v2-mali + * table in devicetree. + * @num_opps: Number of operating performance points available for the Mali + * GPU device. + * @devfreq_queue: Per device object for storing data that manages devfreq + * suspend & resume request queue and the related items. + * @devfreq_cooling: Pointer returned on registering devfreq cooling device + * corresponding to @devfreq. + * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected + * mode. It is a sticky flag which is cleared by IPA + * once it has made use of information that GPU had + * previously entered protected mode. + * @ipa: Top level structure for IPA, containing pointers to both + * configured & fallback models. + * @previous_frequency: Previous frequency of GPU clock used for + * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is + * restored when L2 is powered on. + * @job_fault_debug: Flag to control the dumping of debug data for job faults, + * set when the 'job_fault' debugfs file is opened. + * @mali_debugfs_directory: Root directory for the debugfs files created by the driver + * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing + * a sub-directory for every context. + * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault + * has occurred. + * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the + * occurrence of a job fault. + * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait + * for the job fault dumping to complete before they can + * do bottom half of job done for the atoms which followed + * the faulty atom. + * @job_fault_resume_workq: workqueue to process the work items queued for the faulty + * atoms, whereby the work item function waits for the dumping + * to get completed. + * @job_fault_event_list: List of atoms, each belonging to a different context, which + * generated a job fault. + * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list + * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs + * file "read_register". + * @ctx_num: Total number of contexts created for the device. + * @io_history: Pointer to an object keeping a track of all recent + * register accesses. The history of register accesses + * can be read through "regs_history" debugfs file. + * @hwaccess: Contains a pointer to active kbase context and GPU + * backend specific data for HW access layer. + * @faults_pending: Count of page/bus faults waiting for bottom half processing + * via workqueues. + * @poweroff_pending: Set when power off operation for GPU is started, reset when + * power on for GPU is started. + * @infinite_cache_active_default: Set to enable using infinite cache for all the + * allocations of a new context. + * @mem_pool_defaults: Default configuration for the group of memory pools + * created for a new context. + * @current_gpu_coherency_mode: coherency mode in use, which can be different + * from @system_coherency, when using protected mode. + * @system_coherency: coherency mode as retrieved from the device tree. + * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled. + * @snoop_enable_smc: SMC function ID to call into Trusted firmware to + * enable cache snooping. Value of 0 indicates that it + * is not used. + * @snoop_disable_smc: SMC function ID to call disable cache snooping. + * @protected_ops: Pointer to the methods for switching in or out of the + * protected mode, as per the @protected_dev being used. + * @protected_dev: Pointer to the protected mode switcher device attached + * to the GPU device retrieved through device tree if + * GPU do not support protected mode switching natively. + * @protected_mode: set to TRUE when GPU is put into protected mode + * @protected_mode_transition: set to TRUE when GPU is transitioning into or + * out of protected mode. + * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be + * enabled. Counters must be disabled before transition + * into protected mode. + * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not + * enabled. + * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware + * counters, used if atomic disable is not possible. + * @buslogger: Pointer to the structure required for interfacing + * with the bus logger module to set the size of buffer + * used by the module for capturing bus logs. + * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of + * IRQ + bottom half is being done, to prevent the writes + * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers. + * @inited_subsys: Bitmap of inited sub systems at the time of device probe. + * Used during device remove or for handling error in probe. + * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize + * the updates made to Job dispatcher + scheduler states. + * @mmu_hw_mutex: Protects access to MMU operations and address space + * related state. + * @serialize_jobs: Currently used mode for serialization of jobs, both + * intra & inter slots serialization is supported. + * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken + * when GWT is enabled. Used to restore the original value + * on disabling of GWT. + * @js_ctx_scheduling_mode: Context scheduling mode currently being used by + * Job Scheduler + * @l2_size_override: Used to set L2 cache size via device tree blob + * @l2_hash_override: Used to set L2 cache hash via device tree blob + */ +struct kbase_device { + u32 hw_quirks_sc; + u32 hw_quirks_tiler; + u32 hw_quirks_mmu; + u32 hw_quirks_jm; + + struct list_head entry; + struct device *dev; + struct miscdevice mdev; + u64 reg_start; + size_t reg_size; + void __iomem *reg; + + struct { + int irq; + int flags; + } irqs[3]; + + struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nr_clocks; +#ifdef CONFIG_REGULATOR + struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nr_regulators; +#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + struct opp_table *opp_table; +#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ +#endif /* CONFIG_REGULATOR */ + char devname[DEVNAME_SIZE]; + u32 id; + +#ifdef CONFIG_MALI_NO_MALI + void *model; + struct kmem_cache *irq_slab; + struct workqueue_struct *irq_workq; + atomic_t serving_job_irq; + atomic_t serving_gpu_irq; + atomic_t serving_mmu_irq; + spinlock_t reg_op_lock; +#endif /* CONFIG_MALI_NO_MALI */ + + struct kbase_pm_device_data pm; + + struct kbase_mem_pool_group mem_pools; + struct kbasep_mem_device memdev; + struct kbase_mmu_mode const *mmu_mode; + + struct memory_group_manager_device *mgm_dev; + + struct kbase_as as[BASE_MAX_NR_AS]; + u16 as_free; /* Bitpattern of free Address Spaces */ + struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; + + spinlock_t mmu_mask_change; + + struct kbase_gpu_props gpu_props; + + unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + + struct { + atomic_t count; + atomic_t state; + } disjoint_event; + + s8 nr_hw_address_spaces; + s8 nr_user_address_spaces; + + struct kbase_hwcnt { + /* The lock should be used when accessing any of the following members */ + spinlock_t lock; + + struct kbase_context *kctx; + u64 addr; + u64 addr_bytes; + + struct kbase_instr_backend backend; + } hwcnt; + + struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; + struct kbase_hwcnt_context *hwcnt_gpu_ctx; + struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt; + struct kbase_vinstr_context *vinstr_ctx; + + atomic_t timeline_flags; + struct kbase_timeline *timeline; + +#if KBASE_KTRACE_TARGET_RBUF + struct kbase_ktrace ktrace; +#endif + u32 reset_timeout_ms; + + bool cache_clean_in_progress; + bool cache_clean_queued; + wait_queue_head_t cache_clean_wait; + + void *platform_context; + + struct list_head kctx_list; + struct mutex kctx_list_lock; + +#ifdef CONFIG_MALI_DEVFREQ + struct devfreq_dev_profile devfreq_profile; + struct devfreq *devfreq; + unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned long current_nominal_freq; + unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 current_core_mask; + struct kbase_devfreq_opp *devfreq_table; + int num_opps; + struct kbasep_pm_metrics last_devfreq_metrics; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) + struct kbase_devfreq_queue_info devfreq_queue; +#endif + +#ifdef CONFIG_DEVFREQ_THERMAL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + struct devfreq_cooling_device *devfreq_cooling; +#else + struct thermal_cooling_device *devfreq_cooling; +#endif + bool ipa_protection_mode_switched; + struct { + /* Access to this struct must be with ipa.lock held */ + struct mutex lock; + struct kbase_ipa_model *configured_model; + struct kbase_ipa_model *fallback_model; + + /* Values of the PM utilization metrics from last time the + * power model was invoked. The utilization is calculated as + * the difference between last_metrics and the current values. + */ + struct kbasep_pm_metrics last_metrics; + /* Model data to pass to ipa_gpu_active/idle() */ + struct kbase_ipa_model_vinstr_data *model_data; + + /* true if use of fallback model has been forced by the User */ + bool force_fallback_model; + } ipa; +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* CONFIG_MALI_DEVFREQ */ + unsigned long previous_frequency; + + atomic_t job_fault_debug; + +#ifdef CONFIG_DEBUG_FS + struct dentry *mali_debugfs_directory; + struct dentry *debugfs_ctx_directory; + +#ifdef CONFIG_MALI_DEBUG + u64 debugfs_as_read_bitmap; +#endif /* CONFIG_MALI_DEBUG */ + + wait_queue_head_t job_fault_wq; + wait_queue_head_t job_fault_resume_wq; + struct workqueue_struct *job_fault_resume_workq; + struct list_head job_fault_event_list; + spinlock_t job_fault_event_lock; + +#if !MALI_CUSTOMER_RELEASE + struct { + u16 reg_offset; + } regs_dump_debugfs_data; +#endif /* !MALI_CUSTOMER_RELEASE */ +#endif /* CONFIG_DEBUG_FS */ + + atomic_t ctx_num; + +#ifdef CONFIG_DEBUG_FS + struct kbase_io_history io_history; +#endif /* CONFIG_DEBUG_FS */ + + struct kbase_hwaccess_data hwaccess; + + atomic_t faults_pending; + + bool poweroff_pending; + + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) + bool infinite_cache_active_default; +#else + u32 infinite_cache_active_default; +#endif + struct kbase_mem_pool_group_config mem_pool_defaults; + + u32 current_gpu_coherency_mode; + u32 system_coherency; + + bool cci_snoop_enabled; + + u32 snoop_enable_smc; + u32 snoop_disable_smc; + + const struct protected_mode_ops *protected_ops; + + struct protected_mode_device *protected_dev; + + bool protected_mode; + + bool protected_mode_transition; + + bool protected_mode_hwcnt_desired; + + bool protected_mode_hwcnt_disabled; + + struct work_struct protected_mode_hwcnt_disable_work; + +#ifdef CONFIG_MALI_BUSLOG + struct bus_logger_client *buslogger; +#endif + + bool irq_reset_flush; + + u32 inited_subsys; + + spinlock_t hwaccess_lock; + + struct mutex mmu_hw_mutex; + + u8 l2_size_override; + u8 l2_hash_override; + + struct kbasep_js_device_data js_data; + + /* See KBASE_JS_*_PRIORITY_MODE for details. */ + u32 js_ctx_scheduling_mode; + + /* See KBASE_SERIALIZE_* for details */ + u8 serialize_jobs; + +#ifdef CONFIG_MALI_CINSTR_GWT + u8 backup_serialize_jobs; +#endif /* CONFIG_MALI_CINSTR_GWT */ + + + struct { + struct kbase_context *ctx; + u64 jc; + int slot; + u64 flags; + } dummy_job_wa; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Pointer to the arbiter device */ + struct kbase_arbiter_device arb; +#endif +}; + +#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ + (((minor) & 0xFFF) << 8) | \ + ((0 & 0xFF) << 0)) + +/** + * enum kbase_file_state - Initialization state of a file opened by @kbase_open + * + * @KBASE_FILE_NEED_VSN: Initial state, awaiting API version. + * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in + * progress and other setup calls shall be + * rejected. + * @KBASE_FILE_NEED_CTX: Indicates if the API version handshake has + * completed, awaiting context creation flags. + * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress + * and other setup calls shall be rejected. + * @KBASE_FILE_COMPLETE: Indicates if the setup for context has + * completed, i.e. flags have been set for the + * context. + * + * The driver allows only limited interaction with user-space until setup + * is complete. + */ +enum kbase_file_state { + KBASE_FILE_NEED_VSN, + KBASE_FILE_VSN_IN_PROGRESS, + KBASE_FILE_NEED_CTX, + KBASE_FILE_CTX_IN_PROGRESS, + KBASE_FILE_COMPLETE +}; + +/** + * struct kbase_file - Object representing a file opened by @kbase_open + * + * @kbdev: Object representing an instance of GPU platform device, + * allocated from the probe method of the Mali driver. + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * @kctx: Object representing an entity, among which GPU is + * scheduled and which gets its own GPU address space. + * Invalid until @setup_state is KBASE_FILE_COMPLETE. + * @api_version: Contains the version number for User/kernel interface, + * used for compatibility check. Invalid until + * @setup_state is KBASE_FILE_NEED_CTX. + * @setup_state: Initialization state of the file. Values come from + * the kbase_file_state enumeration. + */ +struct kbase_file { + struct kbase_device *kbdev; + struct file *filp; + struct kbase_context *kctx; + unsigned long api_version; + atomic_t setup_state; +}; + +/** + * enum kbase_context_flags - Flags for kbase contexts + * + * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit + * process on a 64-bit kernel. + * + * @KCTX_RUNNABLE_REF: Set when context is counted in + * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * + * @KCTX_ACTIVE: Set when the context is active. + * + * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this + * context. + * + * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been + * initialized. + * + * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new + * allocations. Existing allocations will not change. + * + * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. + * + * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept + * scheduled in. + * + * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. + * This is only ever updated whilst the jsctx_mutex is held. + * + * @KCTX_DYING: Set when the context process is in the process of being evicted. + * + * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this + * context, to disable use of implicit dma-buf fences. This is used to avoid + * potential synchronization deadlocks. + * + * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory + * allocations. For 64-bit clients it is enabled by default, and disabled by + * default on 32-bit clients. Being able to clear this flag is only used for + * testing purposes of the custom zone allocation on 64-bit user-space builds, + * where we also require more control than is available through e.g. the JIT + * allocation mechanism. However, the 64-bit user-space client must still + * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT + * + * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled + * from it for job slot 0. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled + * from it for job slot 1. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled + * from it for job slot 2. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. + * + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. + */ +enum kbase_context_flags { + KCTX_COMPAT = 1U << 0, + KCTX_RUNNABLE_REF = 1U << 1, + KCTX_ACTIVE = 1U << 2, + KCTX_PULLED = 1U << 3, + KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, + KCTX_INFINITE_CACHE = 1U << 5, + KCTX_SUBMIT_DISABLED = 1U << 6, + KCTX_PRIVILEGED = 1U << 7, + KCTX_SCHEDULED = 1U << 8, + KCTX_DYING = 1U << 9, + KCTX_NO_IMPLICIT_SYNC = 1U << 10, + KCTX_FORCE_SAME_VA = 1U << 11, + KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, + KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, + KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, +}; + +struct kbase_sub_alloc { + struct list_head link; + struct page *page; + DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); +}; + +/** + * struct kbase_context - Kernel base context + * + * @filp: Pointer to the struct file corresponding to device file + * /dev/malixx instance, passed to the file's open method. + * @kbdev: Pointer to the Kbase device for which the context is created. + * @kctx_list_link: Node into Kbase device list of contexts. + * @mmu: Structure holding details of the MMU tables for this + * context + * @id: Unique identifier for the context, indicates the number of + * contexts which have been created for the device so far. + * @api_version: contains the version number for User/kernel interface, + * used for compatibility check. + * @event_list: list of posted events about completed atoms, to be sent to + * event handling thread of Userpsace. + * @event_coalesce_list: list containing events corresponding to successive atoms + * which have requested deferred delivery of the completion + * events to Userspace. + * @event_mutex: Lock to protect the concurrent access to @event_list & + * @event_mutex. + * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver + * should stop posting events and also inform event handling + * thread that context termination is in progress. + * @event_workq: Workqueue for processing work items corresponding to atoms + * that do not return an event to userspace. + * @event_count: Count of the posted events to be consumed by Userspace. + * @event_coalesce_count: Count of the events present in @event_coalesce_list. + * @flags: bitmap of enums from kbase_context_flags, indicating the + * state & attributes for the context. + * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations, + * which can alias number of memory regions. The page is + * represent a region where it is mapped with a write-alloc + * cache setup, typically used when the write result of the + * GPU isn't needed, but the GPU must write anyway. + * @mem_partials_lock: Lock for protecting the operations done on the elements + * added to @mem_partials list. + * @mem_partials: List head for the list of large pages, 2MB in size, which + * which have been split into 4 KB pages and are used + * partially for the allocations >= 2 MB in size. + * @reg_lock: Lock used for GPU virtual address space management operations, + * like adding/freeing a memory region in the address space. + * Can be converted to a rwlock ?. + * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA + * zone of the GPU virtual address space. Used for allocations + * having the same value for GPU & CPU virtual address. + * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA + * zone of the GPU virtual address space. + * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA + * zone of the GPU virtual address space. Used for GPU-executable + * allocations which don't need the SAME_VA property. + * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for + * SAME_VA allocations to defer the reservation of memory region + * (from the GPU virtual address space) from base_mem_alloc + * ioctl to mmap system call. This helps returning unique + * handles, disguised as GPU VA, to Userspace from base_mem_alloc + * and later retrieving the pointer to memory region structure + * in the mmap handler. + * @pending_regions: Array containing pointers to memory region structures, + * used in conjunction with @cookies bitmask mainly for + * providing a mechansim to have the same value for CPU & + * GPU virtual address. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. + * @tgid: Thread group ID of the process whose thread created + * the context (by calling KBASE_IOCTL_VERSION_CHECK or + * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). + * This is usually, but not necessarily, the same as the + * process whose thread opened the device file + * /dev/malixx instance. + * @pid: ID of the thread, corresponding to process @tgid, + * which actually created the context. This is usually, + * but not necessarily, the same as the thread which + * opened the device file /dev/malixx instance. + * @jctx: object encapsulating all the Job dispatcher related state, + * including the array of atoms. + * @used_pages: Keeps a track of the number of 4KB physical pages in use + * for the context. + * @nonmapped_pages: Updated in the same way as @used_pages, except for the case + * when special tracking page is freed by userspace where it + * is reset to 0. + * @permanent_mapped_pages: Usage count of permanently mapped memory + * @mem_pools: Context-specific pools of free physical memory pages. + * @reclaim: Shrinker object registered with the kernel containing + * the pointer to callback function which is invoked under + * low memory conditions. In the callback function Driver + * frees up the memory for allocations marked as + * evictable/reclaimable. + * @evict_list: List head for the list containing the allocations which + * can be evicted or freed up in the shrinker callback. + * @waiting_soft_jobs: List head for the list containing softjob atoms, which + * are either waiting for the event set operation, or waiting + * for the signaling of input fence or waiting for the GPU + * device to powered on so as to dump the CPU/GPU timestamps. + * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent + * accesses. + * @dma_fence: Object containing list head for the list of dma-buf fence + * waiting atoms and the waitqueue to process the work item + * queued for the atoms blocked on the signaling of dma-buf + * fences. + * @as_nr: id of the address space being used for the scheduled in + * context. This is effectively part of the Run Pool, because + * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst + * the context is scheduled in. The hwaccess_lock must be held + * whilst accessing this. + * If the context relating to this value of as_nr is required, + * then the context must be retained to ensure that it doesn't + * disappear whilst it is being used. Alternatively, hwaccess_lock + * can be held to ensure the context doesn't disappear (but this + * has restrictions on what other locks can be taken simutaneously). + * @refcount: Keeps track of the number of users of this context. A user + * can be a job that is available for execution, instrumentation + * needing to 'pin' a context for counter collection, etc. + * If the refcount reaches 0 then this context is considered + * inactive and the previously programmed AS might be cleared + * at any point. + * Generally the reference count is incremented when the context + * is scheduled in and an atom is pulled from the context's per + * slot runnable tree. + * @mm_update_lock: lock used for handling of special tracking page. + * @process_mm: Pointer to the memory descriptor of the process which + * created the context. Used for accounting the physical + * pages used for GPU allocations, done for the context, + * to the memory consumed by the process. + * @same_va_end: End address of the SAME_VA zone (in 4KB page units) + * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) + * or U64_MAX if the EXEC_VA zone is uninitialized. + * @gpu_va_end: End address of the GPU va space (in 4KB page units) + * @jit_va: Indicates if a JIT_VA zone has been created. + * @mem_profile_data: Buffer containing the profiling information provided by + * Userspace, can be read through the mem_profile debugfs file. + * @mem_profile_size: Size of the @mem_profile_data. + * @mem_profile_lock: Lock to serialize the operations related to mem_profile + * debugfs file. + * @kctx_dentry: Pointer to the debugfs directory created for every context, + * inside kbase_device::debugfs_ctx_directory, containing + * context specific files. + * @reg_dump: Buffer containing a register offset & value pair, used + * for dumping job fault debug info. + * @job_fault_count: Indicates that a job fault occurred for the context and + * dumping of its debug info is in progress. + * @job_fault_resume_event_list: List containing atoms completed after the faulty + * atom but before the debug data for faulty atom was dumped. + * @jsctx_queue: Per slot & priority arrays of object containing the root + * of RB-tree holding currently runnable atoms on the job slot + * and the head item of the linked list of atoms blocked on + * cross-slot dependencies. + * @atoms_pulled: Total number of atoms currently pulled from the context. + * @atoms_pulled_slot: Per slot count of the number of atoms currently pulled + * from the context. + * @atoms_pulled_slot_pri: Per slot & priority count of the number of atoms currently + * pulled from the context. hwaccess_lock shall be held when + * accessing it. + * @blocked_js: Indicates if the context is blocked from submitting atoms + * on a slot at a given priority. This is set to true, when + * the atom corresponding to context is soft/hard stopped or + * removed from the HEAD_NEXT register in response to + * soft/hard stop. + * @slots_pullable: Bitmask of slots, indicating the slots for which the + * context has pullable atoms in the runnable tree. + * @work: Work structure used for deferred ASID assignment. + * @legacy_hwcnt_cli: Pointer to the legacy userspace hardware counters + * client, there can be only such client per kbase + * context. + * @legacy_hwcnt_lock: Lock used to prevent concurrent access to + * @legacy_hwcnt_cli. + * @completed_jobs: List containing completed atoms for which base_jd_event is + * to be posted. + * @work_count: Number of work items, corresponding to atoms, currently + * pending on job_done workqueue of @jctx. + * @soft_job_timeout: Timer object used for failing/cancelling the waiting + * soft-jobs which have been blocked for more than the + * timeout value used for the soft-jobs + * @jit_alloc: Array of 256 pointers to GPU memory regions, used for + * just-in-time memory allocations. + * @jit_max_allocations: Maximum allowed number of in-flight + * just-in-time memory allocations. + * @jit_current_allocations: Current number of in-flight just-in-time + * memory allocations. + * @jit_current_allocations_per_bin: Current number of in-flight just-in-time + * memory allocations per bin. + * @jit_version: Version number indicating whether userspace is using + * old or new version of interface for just-in-time + * memory allocations. + * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 + * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 + * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT + * @jit_group_id: A memory group ID to be passed to a platform-specific + * memory group manager. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @jit_phys_pages_limit: Limit of physical pages to apply across all + * just-in-time memory allocations, applied to + * @jit_current_phys_pressure. + * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is + * the sum of the worst case estimate of pages that + * could be used (i.e. the + * &struct_kbase_va_region.nr_pages for all in-use + * just-in-time memory regions that have not yet had + * a usage report) and the actual number of pages + * that were used (i.e. the + * &struct_kbase_va_region.used_pages for regions + * that have had a usage report). + * @jit_active_head: List containing the just-in-time memory allocations + * which are in use. + * @jit_pool_head: List containing the just-in-time memory allocations + * which have been freed up by userspace and so not being + * used by them. + * Driver caches them to quickly fulfill requests for new + * JIT allocations. They are released in case of memory + * pressure as they are put on the @evict_list when they + * are freed up by userspace. + * @jit_destroy_head: List containing the just-in-time memory allocations + * which were moved to it from @jit_pool_head, in the + * shrinker callback, after freeing their backing + * physical pages. + * @jit_evict_lock: Lock used for operations done on just-in-time memory + * allocations and also for accessing @evict_list. + * @jit_work: Work item queued to defer the freeing of a memory + * region when a just-in-time memory allocation is moved + * to @jit_destroy_head. + * @ext_res_meta_head: A list of sticky external resources which were requested to + * be mapped on GPU side, through a softjob atom of type + * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl. + * @age_count: Counter incremented on every call to jd_submit_atom, + * atom is assigned the snapshot of this counter, which + * is used to determine the atom's age when it is added to + * the runnable RB-tree. + * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) + * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by + * kbase_context.reg_lock. + * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. + * @gwt_current_list: A list of addresses for which GPU has generated write faults, + * after the last snapshot of it was sent to userspace. + * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space. + * @priority: Indicates the context priority. Used along with @atoms_count + * for context scheduling, protected by hwaccess_lock. + * @atoms_count: Number of GPU atoms currently in use, per priority + * @create_flags: Flags used in context creation. + * + * A kernel base context is an entity among which the GPU is scheduled. + * Each context has its own GPU address space. + * Up to one context can be created for each client that opens the device file + * /dev/malixx. Context creation is deferred until a special ioctl() system call + * is made on the device file. + */ +struct kbase_context { + struct file *filp; + struct kbase_device *kbdev; + struct list_head kctx_list_link; + struct kbase_mmu_table mmu; + + u32 id; + unsigned long api_version; + struct list_head event_list; + struct list_head event_coalesce_list; + struct mutex event_mutex; + atomic_t event_closed; + struct workqueue_struct *event_workq; + atomic_t event_count; + int event_coalesce_count; + + atomic_t flags; + + struct tagged_addr aliasing_sink_page; + + spinlock_t mem_partials_lock; + struct list_head mem_partials; + + struct mutex reg_lock; + + struct rb_root reg_rbtree_same; + struct rb_root reg_rbtree_custom; + struct rb_root reg_rbtree_exec; + + struct kbase_jd_context jctx; + struct jsctx_queue jsctx_queue + [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + + struct list_head completed_jobs; + atomic_t work_count; + struct timer_list soft_job_timeout; + + atomic_t atoms_pulled; + atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; + int atoms_pulled_slot_pri[BASE_JM_MAX_NR_SLOTS][ + KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + int priority; + bool blocked_js[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + u32 slots_pullable; + u32 age_count; + + DECLARE_BITMAP(cookies, BITS_PER_LONG); + struct kbase_va_region *pending_regions[BITS_PER_LONG]; + + wait_queue_head_t event_queue; + pid_t tgid; + pid_t pid; + atomic_t used_pages; + atomic_t nonmapped_pages; + atomic_t permanent_mapped_pages; + + struct kbase_mem_pool_group mem_pools; + + struct shrinker reclaim; + struct list_head evict_list; + + struct list_head waiting_soft_jobs; + spinlock_t waiting_soft_jobs_lock; +#ifdef CONFIG_MALI_DMA_FENCE + struct { + struct list_head waiting_resource; + struct workqueue_struct *wq; + } dma_fence; +#endif /* CONFIG_MALI_DMA_FENCE */ + + int as_nr; + + atomic_t refcount; + + spinlock_t mm_update_lock; + struct mm_struct __rcu *process_mm; + u64 same_va_end; + u64 exec_va_start; + u64 gpu_va_end; + bool jit_va; + +#ifdef CONFIG_DEBUG_FS + char *mem_profile_data; + size_t mem_profile_size; + struct mutex mem_profile_lock; + struct dentry *kctx_dentry; + + unsigned int *reg_dump; + atomic_t job_fault_count; + struct list_head job_fault_resume_event_list; + +#endif /* CONFIG_DEBUG_FS */ + + struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli; + struct mutex legacy_hwcnt_lock; + + struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT]; + u8 jit_max_allocations; + u8 jit_current_allocations; + u8 jit_current_allocations_per_bin[256]; + u8 jit_version; + u8 jit_group_id; +#if MALI_JIT_PRESSURE_LIMIT + u64 jit_phys_pages_limit; + u64 jit_current_phys_pressure; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + struct list_head jit_active_head; + struct list_head jit_pool_head; + struct list_head jit_destroy_head; + struct mutex jit_evict_lock; + struct work_struct jit_work; + + struct list_head ext_res_meta_head; + + u8 trim_level; + +#ifdef CONFIG_MALI_CINSTR_GWT + bool gwt_enabled; + bool gwt_was_enabled; + struct list_head gwt_current_list; + struct list_head gwt_snapshot_list; +#endif + + base_context_create_flags create_flags; +}; + +#ifdef CONFIG_MALI_CINSTR_GWT +/** + * struct kbasep_gwt_list_element - Structure used to collect GPU + * write faults. + * @link: List head for adding write faults. + * @region: Details of the region where we have the + * faulting page address. + * @page_addr: Page address where GPU write fault occurred. + * @num_pages: The number of pages modified. + * + * Using this structure all GPU write faults are stored in a list. + */ +struct kbasep_gwt_list_element { + struct list_head link; + struct kbase_va_region *region; + u64 page_addr; + u64 num_pages; +}; + +#endif + +/** + * struct kbase_ctx_ext_res_meta - Structure which binds an external resource + * to a @kbase_context. + * @ext_res_node: List head for adding the metadata to a + * @kbase_context. + * @alloc: The physical memory allocation structure + * which is mapped. + * @gpu_addr: The GPU virtual address the resource is + * mapped to. + * @ref: Reference count. + * + * External resources can be mapped into multiple contexts as well as the same + * context multiple times. + * As kbase_va_region itself isn't refcounted we can't attach our extra + * information to it as it could be removed under our feet leaving external + * resources pinned. + * This metadata structure binds a single external resource to a single + * context, ensuring that per context mapping is tracked separately so it can + * be overridden when needed and abuses by the application (freeing the resource + * multiple times) don't effect the refcount of the physical allocation. + */ +struct kbase_ctx_ext_res_meta { + struct list_head ext_res_node; + struct kbase_mem_phy_alloc *alloc; + u64 gpu_addr; + u32 ref; +}; + +enum kbase_reg_access_type { + REG_READ, + REG_WRITE +}; + +enum kbase_share_attr_bits { + /* (1ULL << 8) bit is reserved */ + SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ + SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ +}; + +/** + * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent. + * @kbdev: kbase device + * + * Return: true if the device access are coherent, false if not. + */ +static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) +{ + if ((kbdev->system_coherency == COHERENCY_ACE_LITE) || + (kbdev->system_coherency == COHERENCY_ACE)) + return true; + + return false; +} + +/* Conversion helpers for setting up high resolution timers */ +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) +#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) + +/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ +#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 +/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ +#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 + +/* JobDescriptorHeader - taken from the architecture specifications, the layout + * is currently identical for all GPU archs. */ +struct job_descriptor_header { + u32 exception_status; + u32 first_incomplete_task; + u64 fault_pointer; + u8 job_descriptor_size : 1; + u8 job_type : 7; + u8 job_barrier : 1; + u8 _reserved_01 : 1; + u8 _reserved_1 : 1; + u8 _reserved_02 : 1; + u8 _reserved_03 : 1; + u8 _reserved_2 : 1; + u8 _reserved_04 : 1; + u8 _reserved_05 : 1; + u16 job_index; + u16 job_dependency_index_1; + u16 job_dependency_index_2; + union { + u64 _64; + u32 _32; + } next_job; +}; + +#endif /* _KBASE_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c new file mode 100644 index 0000000..b5ac414 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c @@ -0,0 +1,81 @@ +/* + * + * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Base kernel disjoint events helper functions + */ + +#include + +void kbase_disjoint_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_set(&kbdev->disjoint_event.count, 0); + atomic_set(&kbdev->disjoint_event.state, 0); +} + +/* increment the disjoint event count */ +void kbase_disjoint_event(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_inc(&kbdev->disjoint_event.count); +} + +/* increment the state and the event counter */ +void kbase_disjoint_state_up(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_inc(&kbdev->disjoint_event.state); + + kbase_disjoint_event(kbdev); +} + +/* decrement the state */ +void kbase_disjoint_state_down(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); + + kbase_disjoint_event(kbdev); + + atomic_dec(&kbdev->disjoint_event.state); +} + +/* increments the count only if the state is > 0 */ +void kbase_disjoint_event_potential(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + if (atomic_read(&kbdev->disjoint_event.state)) + kbase_disjoint_event(kbdev); +} + +u32 kbase_disjoint_event_get(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return atomic_read(&kbdev->disjoint_event.count); +} +KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c new file mode 100644 index 0000000..25acbcb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c @@ -0,0 +1,456 @@ +/* + * + * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as + * it will be set there. + */ +#include "mali_kbase_dma_fence.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void +kbase_dma_fence_work(struct work_struct *pwork); + +static void +kbase_dma_fence_waiters_add(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + list_add_tail(&katom->queue, &kctx->dma_fence.waiting_resource); +} + +static void +kbase_dma_fence_waiters_remove(struct kbase_jd_atom *katom) +{ + list_del(&katom->queue); +} + +static int +kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) +{ + struct reservation_object *content_res = NULL; + unsigned int content_res_idx = 0; + unsigned int r; + int err = 0; + + ww_acquire_init(ctx, &reservation_ww_class); + +retry: + for (r = 0; r < info->dma_fence_resv_count; r++) { + if (info->resv_objs[r] == content_res) { + content_res = NULL; + continue; + } + + err = ww_mutex_lock(&info->resv_objs[r]->lock, ctx); + if (err) + goto error; + } + + ww_acquire_done(ctx); + return err; + +error: + content_res_idx = r; + + /* Unlock the locked one ones */ + while (r--) + ww_mutex_unlock(&info->resv_objs[r]->lock); + + if (content_res) + ww_mutex_unlock(&content_res->lock); + + /* If we deadlock try with lock_slow and retry */ + if (err == -EDEADLK) { + content_res = info->resv_objs[content_res_idx]; + ww_mutex_lock_slow(&content_res->lock, ctx); + goto retry; + } + + /* If we are here the function failed */ + ww_acquire_fini(ctx); + return err; +} + +static void +kbase_dma_fence_unlock_reservations(struct kbase_dma_fence_resv_info *info, + struct ww_acquire_ctx *ctx) +{ + unsigned int r; + + for (r = 0; r < info->dma_fence_resv_count; r++) + ww_mutex_unlock(&info->resv_objs[r]->lock); + ww_acquire_fini(ctx); +} + + + +/** + * kbase_dma_fence_queue_work() - Queue work to handle @katom + * @katom: Pointer to atom for which to queue work + * + * Queue kbase_dma_fence_work() for @katom to clean up the fence callbacks and + * submit the atom. + */ +static void +kbase_dma_fence_queue_work(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + bool ret; + + INIT_WORK(&katom->work, kbase_dma_fence_work); + ret = queue_work(kctx->dma_fence.wq, &katom->work); + /* Warn if work was already queued, that should not happen. */ + WARN_ON(!ret); +} + +/** + * kbase_dma_fence_cancel_atom() - Cancels waiting on an atom + * @katom: Katom to cancel + * + * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. + */ +static void +kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&katom->kctx->jctx.lock); + + /* Cancel callbacks and clean up. */ + kbase_fence_free_callbacks(katom); + + /* Mark the atom as handled in case all fences signaled just before + * canceling the callbacks and the worker was queued. + */ + kbase_fence_dep_count_set(katom, -1); + + /* Prevent job_done_nolock from being called twice on an atom when + * there is a race between job completion and cancellation. + */ + + if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { + /* Wait was cancelled - zap the atom */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); + } +} + +/** + * kbase_dma_fence_work() - Worker thread called when a fence is signaled + * @pwork: work_struct containing a pointer to a katom + * + * This function will clean and mark all dependencies as satisfied + */ +static void +kbase_dma_fence_work(struct work_struct *pwork) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_context *ctx; + + katom = container_of(pwork, struct kbase_jd_atom, work); + ctx = &katom->kctx->jctx; + + mutex_lock(&ctx->lock); + if (kbase_fence_dep_count_read(katom) != 0) + goto out; + + kbase_fence_dep_count_set(katom, -1); + + /* Remove atom from list of dma-fence waiting atoms. */ + kbase_dma_fence_waiters_remove(katom); + /* Cleanup callbacks. */ + kbase_fence_free_callbacks(katom); + /* + * Queue atom on GPU, unless it has already completed due to a failing + * dependency. Run jd_done_nolock() on the katom if it is completed. + */ + if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED)) + jd_done_nolock(katom, NULL); + else + kbase_jd_dep_clear_locked(katom); + +out: + mutex_unlock(&ctx->lock); +} + +static void +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) +#else +kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +#endif +{ + struct kbase_fence_cb *kcb = container_of(cb, + struct kbase_fence_cb, + fence_cb); + struct kbase_jd_atom *katom = kcb->katom; + + /* If the atom is zapped dep_count will be forced to a negative number + * preventing this callback from ever scheduling work. Which in turn + * would reschedule the atom. + */ + + if (kbase_fence_dep_count_dec_and_test(katom)) + kbase_dma_fence_queue_work(katom); +} + +static int +kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, + struct reservation_object *resv, + bool exclusive) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *excl_fence = NULL; + struct fence **shared_fences = NULL; +#else + struct dma_fence *excl_fence = NULL; + struct dma_fence **shared_fences = NULL; +#endif + unsigned int shared_count = 0; + int err, i; + + err = reservation_object_get_fences_rcu(resv, + &excl_fence, + &shared_count, + &shared_fences); + if (err) + return err; + + if (excl_fence) { + err = kbase_fence_add_callback(katom, + excl_fence, + kbase_dma_fence_cb); + + /* Release our reference, taken by reservation_object_get_fences_rcu(), + * to the fence. We have set up our callback (if that was possible), + * and it's the fence's owner is responsible for singling the fence + * before allowing it to disappear. + */ + dma_fence_put(excl_fence); + + if (err) + goto out; + } + + if (exclusive) { + for (i = 0; i < shared_count; i++) { + err = kbase_fence_add_callback(katom, + shared_fences[i], + kbase_dma_fence_cb); + if (err) + goto out; + } + } + + /* Release all our references to the shared fences, taken by + * reservation_object_get_fences_rcu(). We have set up our callback (if + * that was possible), and it's the fence's owner is responsible for + * signaling the fence before allowing it to disappear. + */ +out: + for (i = 0; i < shared_count; i++) + dma_fence_put(shared_fences[i]); + kfree(shared_fences); + + if (err) { + /* + * On error, cancel and clean up all callbacks that was set up + * before the error. + */ + kbase_fence_free_callbacks(katom); + } + + return err; +} + +void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive) +{ + unsigned int i; + + for (i = 0; i < info->dma_fence_resv_count; i++) { + /* Duplicate resource, ignore */ + if (info->resv_objs[i] == resv) + return; + } + + info->resv_objs[info->dma_fence_resv_count] = resv; + if (exclusive) + set_bit(info->dma_fence_resv_count, + info->dma_fence_excl_bitmap); + (info->dma_fence_resv_count)++; +} + +int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info) +{ + int err, i; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct ww_acquire_ctx ww_ctx; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + fence = kbase_fence_out_new(katom); + if (!fence) { + err = -ENOMEM; + dev_err(katom->kctx->kbdev->dev, + "Error %d creating fence.\n", err); + return err; + } + + kbase_fence_dep_count_set(katom, 1); + + err = kbase_dma_fence_lock_reservations(info, &ww_ctx); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d locking reservations.\n", err); + kbase_fence_dep_count_set(katom, -1); + kbase_fence_out_remove(katom); + return err; + } + + for (i = 0; i < info->dma_fence_resv_count; i++) { + struct reservation_object *obj = info->resv_objs[i]; + + if (!test_bit(i, info->dma_fence_excl_bitmap)) { + err = reservation_object_reserve_shared(obj); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d reserving space for shared fence.\n", err); + goto end; + } + + err = kbase_dma_fence_add_reservation_callback(katom, obj, false); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d adding reservation to callback.\n", err); + goto end; + } + + reservation_object_add_shared_fence(obj, fence); + } else { + err = kbase_dma_fence_add_reservation_callback(katom, obj, true); + if (err) { + dev_err(katom->kctx->kbdev->dev, + "Error %d adding reservation to callback.\n", err); + goto end; + } + + reservation_object_add_excl_fence(obj, fence); + } + } + +end: + kbase_dma_fence_unlock_reservations(info, &ww_ctx); + + if (likely(!err)) { + /* Test if the callbacks are already triggered */ + if (kbase_fence_dep_count_dec_and_test(katom)) { + kbase_fence_dep_count_set(katom, -1); + kbase_fence_free_callbacks(katom); + } else { + /* Add katom to the list of dma-buf fence waiting atoms + * only if it is still waiting. + */ + kbase_dma_fence_waiters_add(katom); + } + } else { + /* There was an error, cancel callbacks, set dep_count to -1 to + * indicate that the atom has been handled (the caller will + * kill it for us), signal the fence, free callbacks and the + * fence. + */ + kbase_fence_free_callbacks(katom); + kbase_fence_dep_count_set(katom, -1); + kbase_dma_fence_signal(katom); + } + + return err; +} + +void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx) +{ + struct list_head *list = &kctx->dma_fence.waiting_resource; + + while (!list_empty(list)) { + struct kbase_jd_atom *katom; + + katom = list_first_entry(list, struct kbase_jd_atom, queue); + kbase_dma_fence_waiters_remove(katom); + kbase_dma_fence_cancel_atom(katom); + } +} + +void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom) +{ + /* Cancel callbacks and clean up. */ + if (kbase_fence_free_callbacks(katom)) + kbase_dma_fence_queue_work(katom); +} + +void kbase_dma_fence_signal(struct kbase_jd_atom *katom) +{ + if (!katom->dma_fence.fence) + return; + + /* Signal the atom's fence. */ + dma_fence_signal(katom->dma_fence.fence); + + kbase_fence_out_remove(katom); + + kbase_fence_free_callbacks(katom); +} + +void kbase_dma_fence_term(struct kbase_context *kctx) +{ + destroy_workqueue(kctx->dma_fence.wq); + kctx->dma_fence.wq = NULL; +} + +int kbase_dma_fence_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->dma_fence.waiting_resource); + + kctx->dma_fence.wq = alloc_workqueue("mali-fence-%d", + WQ_UNBOUND, 1, kctx->pid); + if (!kctx->dma_fence.wq) + return -ENOMEM; + + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h new file mode 100644 index 0000000..2a4d6fc --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h @@ -0,0 +1,136 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DMA_FENCE_H_ +#define _KBASE_DMA_FENCE_H_ + +#ifdef CONFIG_MALI_DMA_FENCE + +#include +#include +#include + + +/* Forward declaration from mali_kbase_defs.h */ +struct kbase_jd_atom; +struct kbase_context; + +/** + * struct kbase_dma_fence_resv_info - Structure with list of reservation objects + * @resv_objs: Array of reservation objects to attach the + * new fence to. + * @dma_fence_resv_count: Number of reservation objects in the array. + * @dma_fence_excl_bitmap: Specifies which resv_obj are exclusive. + * + * This is used by some functions to pass around a collection of data about + * reservation objects. + */ +struct kbase_dma_fence_resv_info { + struct reservation_object **resv_objs; + unsigned int dma_fence_resv_count; + unsigned long *dma_fence_excl_bitmap; +}; + +/** + * kbase_dma_fence_add_reservation() - Adds a resv to the array of resv_objs + * @resv: Reservation object to add to the array. + * @info: Pointer to struct with current reservation info + * @exclusive: Boolean indicating if exclusive access is needed + * + * The function adds a new reservation_object to an existing array of + * reservation_objects. At the same time keeps track of which objects require + * exclusive access in dma_fence_excl_bitmap. + */ +void kbase_dma_fence_add_reservation(struct reservation_object *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive); + +/** + * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs + * @katom: Katom with the external dependency. + * @info: Pointer to struct with current reservation info + * + * Return: An error code or 0 if succeeds + */ +int kbase_dma_fence_wait(struct kbase_jd_atom *katom, + struct kbase_dma_fence_resv_info *info); + +/** + * kbase_dma_fence_cancel_ctx() - Cancel all dma-fences blocked atoms on kctx + * @kctx: Pointer to kbase context + * + * This function will cancel and clean up all katoms on @kctx that is waiting + * on dma-buf fences. + * + * Locking: jctx.lock needs to be held when calling this function. + */ +void kbase_dma_fence_cancel_all_atoms(struct kbase_context *kctx); + +/** + * kbase_dma_fence_cancel_callbacks() - Cancel only callbacks on katom + * @katom: Pointer to katom whose callbacks are to be canceled + * + * This function cancels all dma-buf fence callbacks on @katom, but does not + * cancel the katom itself. + * + * The caller is responsible for ensuring that jd_done_nolock is called on + * @katom. + * + * Locking: jctx.lock must be held when calling this function. + */ +void kbase_dma_fence_cancel_callbacks(struct kbase_jd_atom *katom); + +/** + * kbase_dma_fence_signal() - Signal katom's fence and clean up after wait + * @katom: Pointer to katom to signal and clean up + * + * This function will signal the @katom's fence, if it has one, and clean up + * the callback data from the katom's wait on earlier fences. + * + * Locking: jctx.lock must be held while calling this function. + */ +void kbase_dma_fence_signal(struct kbase_jd_atom *katom); + +/** + * kbase_dma_fence_term() - Terminate Mali dma-fence context + * @kctx: kbase context to terminate + */ +void kbase_dma_fence_term(struct kbase_context *kctx); + +/** + * kbase_dma_fence_init() - Initialize Mali dma-fence context + * @kctx: kbase context to initialize + */ +int kbase_dma_fence_init(struct kbase_context *kctx); + + +#else /* CONFIG_MALI_DMA_FENCE */ +/* Dummy functions for when dma-buf fence isn't enabled. */ + +static inline int kbase_dma_fence_init(struct kbase_context *kctx) +{ + return 0; +} + +static inline void kbase_dma_fence_term(struct kbase_context *kctx) {} +#endif /* CONFIG_MALI_DMA_FENCE */ +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c new file mode 100644 index 0000000..188e53b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c @@ -0,0 +1,442 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of the dummy job execution workaround for the GPU hang issue. + */ + +#include +#include +#include + +#include +#include + +#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa" + +struct wa_header { + u16 signature; + u16 version; + u32 info_offset; +} __packed; + +struct wa_v2_info { + u64 jc; + u32 js; + u32 blob_offset; + u64 flags; +} __packed; + +struct wa_blob { + u64 base; + u32 size; + u32 map_flags; + u32 payload_offset; + u32 blob_offset; +} __packed; + +static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz) +{ + return !(end - base - off < sz); +} + +static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits) +{ + int loop; + const int timeout = 100; + u32 val; + + for (loop = 0; loop < timeout; loop++) { + val = kbase_reg_read(kbdev, offset); + if (val & bits) + break; + udelay(10); + } + + if (loop == timeout) { + dev_err(kbdev->dev, + "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", + (unsigned long)offset, (unsigned long)bits, + (unsigned long)val); + } + + return (val & bits); +} + +static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set) +{ + int loop; + const int timeout = 100; + u32 val; + u32 target = 0; + + if (set) + target = bits; + + for (loop = 0; loop < timeout; loop++) { + val = kbase_reg_read(kbdev, (offset)); + if ((val & bits) == target) + break; + + udelay(10); + } + + if (loop == timeout) { + dev_err(kbdev->dev, + "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n", + (unsigned long)offset, (unsigned long)bits, + (unsigned long)val); + return -ETIMEDOUT; + } + + return 0; +} + +static inline int run_job(struct kbase_device *kbdev, int as, int slot, + u64 cores, u64 jc) +{ + u32 done; + + /* setup job */ + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO), + jc & U32_MAX); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI), + jc >> 32); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO), + cores & U32_MAX); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI), + cores >> 32); + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT), + JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as); + + /* go */ + kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT), + JS_COMMAND_START); + + /* wait for the slot to finish (done, error) */ + done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), + (1ul << (16+slot)) | (1ul << slot)); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done); + + if (done != (1ul << slot)) { + dev_err(kbdev->dev, + "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n", + slot, (unsigned long long)cores, + (unsigned long)done); + dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n", + kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS))); + + return -EFAULT; + } else { + return 0; + } +} + +/* To be called after power up & MMU init, but before everything else */ +int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) +{ + int as; + int slot; + u64 jc; + int failed = 0; + int runs = 0; + u32 old_gpu_mask; + u32 old_job_mask; + + if (!kbdev) + return -EFAULT; + + if (!kbdev->dummy_job_wa.ctx) + return -EFAULT; + + as = kbdev->dummy_job_wa.ctx->as_nr; + slot = kbdev->dummy_job_wa.slot; + jc = kbdev->dummy_job_wa.jc; + + /* mask off all but MMU IRQs */ + old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); + + /* power up requested cores */ + kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX)); + kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32)); + + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { + /* wait for power-ups */ + wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); + if (cores >> 32) + wait(kbdev, SHADER_READY_HI, (cores >> 32), true); + } + + if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { + int i; + + /* do for each requested core */ + for (i = 0; i < sizeof(cores) * 8; i++) { + u64 affinity; + + affinity = 1ull << i; + + if (!(cores & affinity)) + continue; + + if (run_job(kbdev, as, slot, affinity, jc)) + failed++; + runs++; + } + + } else { + if (run_job(kbdev, as, slot, cores, jc)) + failed++; + runs++; + } + + if (kbdev->dummy_job_wa.flags & + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) { + /* power off shader cores (to reduce any dynamic leakage) */ + kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX)); + kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); + + /* wait for power off complete */ + wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); + wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); + if (cores >> 32) { + wait(kbdev, SHADER_READY_HI, (cores >> 32), false); + wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); + } + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); + } + + /* restore IRQ masks */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask); + + if (failed) + dev_err(kbdev->dev, + "WA complete with %d failures out of %d runs\n", failed, + runs); + + return failed ? -EFAULT : 0; +} + +static ssize_t show_dummy_job_wa_info(struct device * const dev, + struct device_attribute * const attr, char * const buf) +{ + struct kbase_device *const kbdev = dev_get_drvdata(dev); + int err; + + if (!kbdev || !kbdev->dummy_job_wa.ctx) + return -ENODEV; + + err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n", + kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags); + + return err; +} + +static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL); + +static bool wa_blob_load_needed(struct kbase_device *kbdev) +{ + if (of_machine_is_compatible("arm,juno")) + return false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485)) + return true; + + return false; +} + +int kbase_dummy_job_wa_load(struct kbase_device *kbdev) +{ + const struct firmware *firmware; + static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME; + const u32 signature = 0x4157; + const u32 version = 2; + const u8 *fw_end; + const u8 *fw; + const struct wa_header *header; + const struct wa_v2_info *v2_info; + u32 blob_offset; + int err; + struct kbase_context *kctx; + + if (!wa_blob_load_needed(kbdev)) + return 0; + + /* load the wa */ + err = request_firmware(&firmware, wa_name, kbdev->dev); + + if (err) { + dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, " + "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed"); + return -ENODEV; + } + + kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_CREATE_FLAG_NONE, 0, + NULL); + + if (!kctx) { + dev_err(kbdev->dev, "Failed to create WA context\n"); + goto no_ctx; + } + + fw = firmware->data; + fw_end = fw + firmware->size; + + dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n", + firmware->size); + + if (!in_range(fw, fw_end, 0, sizeof(*header))) { + dev_err(kbdev->dev, "WA too small\n"); + goto bad_fw; + } + + header = (const struct wa_header *)(fw + 0); + + if (header->signature != signature) { + dev_err(kbdev->dev, "WA signature failure: 0x%lx\n", + (unsigned long)header->signature); + goto bad_fw; + } + + if (header->version != version) { + dev_err(kbdev->dev, "WA version 0x%lx not supported\n", + (unsigned long)header->version); + goto bad_fw; + } + + if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) { + dev_err(kbdev->dev, "WA info offset out of bounds\n"); + goto bad_fw; + } + + v2_info = (const struct wa_v2_info *)(fw + header->info_offset); + + if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) { + dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n", + (unsigned long long)v2_info->flags); + goto bad_fw; + } + + kbdev->dummy_job_wa.slot = v2_info->js; + kbdev->dummy_job_wa.jc = v2_info->jc; + kbdev->dummy_job_wa.flags = v2_info->flags; + + blob_offset = v2_info->blob_offset; + + while (blob_offset) { + const struct wa_blob *blob; + size_t nr_pages; + u64 flags; + u64 gpu_va; + struct kbase_va_region *va_region; + + if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) { + dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n", + (unsigned long)blob_offset); + goto bad_fw; + } + + blob = (const struct wa_blob *)(fw + blob_offset); + if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) { + dev_err(kbdev->dev, "Payload out-of-bounds\n"); + goto bad_fw; + } + + gpu_va = blob->base; + if (PAGE_ALIGN(gpu_va) != gpu_va) { + dev_err(kbdev->dev, "blob not page aligned\n"); + goto bad_fw; + } + nr_pages = PFN_UP(blob->size); + flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED; + + va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, + 0, &flags, &gpu_va); + + if (!va_region) { + dev_err(kbdev->dev, "Failed to allocate for blob\n"); + } else { + struct kbase_vmap_struct vmap = { 0 }; + const u8 *payload; + void *dst; + + /* copy the payload, */ + payload = fw + blob->payload_offset; + + dst = kbase_vmap(kctx, + va_region->start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT, &vmap); + + if (dst) { + memcpy(dst, payload, blob->size); + kbase_vunmap(kctx, &vmap); + } else { + dev_err(kbdev->dev, + "Failed to copy payload\n"); + } + + } + blob_offset = blob->blob_offset; /* follow chain */ + } + + release_firmware(firmware); + + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + + kbdev->dummy_job_wa.ctx = kctx; + + err = sysfs_create_file(&kbdev->dev->kobj, + &dev_attr_dummy_job_wa_info.attr); + if (err) + dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n"); + + return 0; + +bad_fw: + kbase_destroy_context(kctx); +no_ctx: + release_firmware(firmware); + return -EFAULT; +} + +void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) +{ + struct kbase_context *wa_ctx; + + /* Can be safely called even if the file wasn't created on probe */ + sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); + + wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx); + WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL); + /* make this write visible before we tear down the ctx */ + smp_mb(); + + if (wa_ctx) { + kbasep_js_release_privileged_ctx(kbdev, wa_ctx); + kbase_destroy_context(wa_ctx); + } +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h new file mode 100644 index 0000000..5bbe37d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_DUMMY_JOB_WORKAROUND_ +#define _KBASE_DUMMY_JOB_WORKAROUND_ + +#define KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE (1ull << 0) +#define KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP (1ull << 1) +#define KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER (1ull << 2) + +#define KBASE_DUMMY_JOB_WA_FLAGS (KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE | \ + KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ + KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) + + +int kbase_dummy_job_wa_load(struct kbase_device *kbdev); +void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); +int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores); + +static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) +{ + return (kbdev->dummy_job_wa.ctx != NULL); +} + + +#endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c new file mode 100644 index 0000000..c8b8f22 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -0,0 +1,264 @@ +/* + * + * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include +#include + +static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct base_jd_udata data; + struct kbase_device *kbdev; + + lockdep_assert_held(&kctx->jctx.lock); + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + kbdev = kctx->kbdev; + data = katom->udata; + + KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx); + KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); + + katom->status = KBASE_JD_ATOM_STATE_UNUSED; + dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); + wake_up(&katom->completed); + + return data; +} + +int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) +{ + struct kbase_jd_atom *atom; + + KBASE_DEBUG_ASSERT(ctx); + + mutex_lock(&ctx->event_mutex); + + if (list_empty(&ctx->event_list)) { + if (!atomic_read(&ctx->event_closed)) { + mutex_unlock(&ctx->event_mutex); + return -1; + } + + /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ + mutex_unlock(&ctx->event_mutex); + uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; + memset(&uevent->udata, 0, sizeof(uevent->udata)); + dev_dbg(ctx->kbdev->dev, + "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", + BASE_JD_EVENT_DRV_TERMINATED); + return 0; + } + + /* normal event processing */ + atomic_dec(&ctx->event_count); + atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); + list_del(ctx->event_list.next); + + mutex_unlock(&ctx->event_mutex); + + dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + uevent->event_code = atom->event_code; + + uevent->atom_number = (atom - ctx->jctx.atoms); + + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(atom); + + mutex_lock(&ctx->jctx.lock); + uevent->udata = kbase_event_process(ctx, atom); + mutex_unlock(&ctx->jctx.lock); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_dequeue); + +/** + * kbase_event_process_noreport_worker - Worker for processing atoms that do not + * return an event but do have external + * resources + * @data: Work structure + */ +static void kbase_event_process_noreport_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(katom); + + mutex_lock(&kctx->jctx.lock); + kbase_event_process(kctx, katom); + mutex_unlock(&kctx->jctx.lock); +} + +/** + * kbase_event_process_noreport - Process atoms that do not return an event + * @kctx: Context pointer + * @katom: Atom to be processed + * + * Atoms that do not have external resources will be processed immediately. + * Atoms that do have external resources will be processed on a workqueue, in + * order to avoid locking issues. + */ +static void kbase_event_process_noreport(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + INIT_WORK(&katom->work, kbase_event_process_noreport_worker); + queue_work(kctx->event_workq, &katom->work); + } else { + kbase_event_process(kctx, katom); + } +} + +/** + * kbase_event_coalesce - Move pending events to the main event list + * @kctx: Context pointer + * + * kctx->event_list and kctx->event_coalesce_count must be protected + * by a lock unless this is the last thread using them + * (and we're about to terminate the lock). + * + * Return: The number of pending events moved to the main event list + */ +static int kbase_event_coalesce(struct kbase_context *kctx) +{ + const int event_count = kctx->event_coalesce_count; + + /* Join the list of pending events onto the tail of the main list + and reset it */ + list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); + kctx->event_coalesce_count = 0; + + /* Return the number of events moved */ + return event_count; +} + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) +{ + struct kbase_device *kbdev = ctx->kbdev; + + dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { + if (atom->event_code == BASE_JD_EVENT_DONE) { + dev_dbg(kbdev->dev, "Suppressing event (atom done)\n"); + kbase_event_process_noreport(ctx, atom); + return; + } + } + + if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { + dev_dbg(kbdev->dev, "Suppressing event (never)\n"); + kbase_event_process_noreport(ctx, atom); + return; + } + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED); + if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { + /* Don't report the event until other event(s) have completed */ + dev_dbg(kbdev->dev, "Deferring event (coalesced)\n"); + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); + ++ctx->event_coalesce_count; + mutex_unlock(&ctx->event_mutex); + } else { + /* Report the event and any pending events now */ + int event_count = 1; + + mutex_lock(&ctx->event_mutex); + event_count += kbase_event_coalesce(ctx); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + atomic_add(event_count, &ctx->event_count); + mutex_unlock(&ctx->event_mutex); + dev_dbg(kbdev->dev, "Reporting %d events\n", event_count); + + kbase_event_wakeup(ctx); + + /* Post-completion latency */ + trace_sysgraph(SGR_POST, ctx->id, + kbase_jd_atom_id(ctx, atom)); + } +} +KBASE_EXPORT_TEST_API(kbase_event_post); + +void kbase_event_close(struct kbase_context *kctx) +{ + mutex_lock(&kctx->event_mutex); + atomic_set(&kctx->event_closed, true); + mutex_unlock(&kctx->event_mutex); + kbase_event_wakeup(kctx); +} + +int kbase_event_init(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + INIT_LIST_HEAD(&kctx->event_list); + INIT_LIST_HEAD(&kctx->event_coalesce_list); + mutex_init(&kctx->event_mutex); + kctx->event_coalesce_count = 0; + kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); + + if (NULL == kctx->event_workq) + return -EINVAL; + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_init); + +void kbase_event_cleanup(struct kbase_context *kctx) +{ + int event_count; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(kctx->event_workq); + + flush_workqueue(kctx->event_workq); + destroy_workqueue(kctx->event_workq); + + /* We use kbase_event_dequeue to remove the remaining events as that + * deals with all the cleanup needed for the atoms. + * + * Note: use of kctx->event_list without a lock is safe because this must be the last + * thread using it (because we're about to terminate the lock) + */ + event_count = kbase_event_coalesce(kctx); + atomic_add(event_count, &kctx->event_count); + + while (!list_empty(&kctx->event_list)) { + struct base_jd_event_v2 event; + + kbase_event_dequeue(kctx, &event); + } +} + +KBASE_EXPORT_TEST_API(kbase_event_cleanup); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c new file mode 100644 index 0000000..7a715b3 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c @@ -0,0 +1,214 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include + +/* Spin lock protecting all Mali fences as fence->lock. */ +static DEFINE_SPINLOCK(kbase_fence_lock); + +static const char * +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_get_driver_name(struct fence *fence) +#else +kbase_fence_get_driver_name(struct dma_fence *fence) +#endif +{ + return kbase_drv_name; +} + +static const char * +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_get_timeline_name(struct fence *fence) +#else +kbase_fence_get_timeline_name(struct dma_fence *fence) +#endif +{ + return kbase_timeline_name; +} + +static bool +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_enable_signaling(struct fence *fence) +#else +kbase_fence_enable_signaling(struct dma_fence *fence) +#endif +{ + return true; +} + +static void +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +kbase_fence_fence_value_str(struct fence *fence, char *str, int size) +#else +kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) +#endif +{ +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + snprintf(str, size, "%u", fence->seqno); +#else + snprintf(str, size, "%llu", fence->seqno); +#endif +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +const struct fence_ops kbase_fence_ops = { + .wait = fence_default_wait, +#else +const struct dma_fence_ops kbase_fence_ops = { + .wait = dma_fence_default_wait, +#endif + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, + .fence_value_str = kbase_fence_fence_value_str +}; + + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +struct fence * +kbase_fence_out_new(struct kbase_jd_atom *katom) +#else +struct dma_fence * +kbase_fence_out_new(struct kbase_jd_atom *katom) +#endif +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + WARN_ON(katom->dma_fence.fence); + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + dma_fence_init(fence, + &kbase_fence_ops, + &kbase_fence_lock, + katom->dma_fence.context, + atomic_inc_return(&katom->dma_fence.seqno)); + + katom->dma_fence.fence = fence; + + return fence; +} + +bool +kbase_fence_free_callbacks(struct kbase_jd_atom *katom) +{ + struct kbase_fence_cb *cb, *tmp; + bool res = false; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + /* Clean up and free callbacks. */ + list_for_each_entry_safe(cb, tmp, &katom->dma_fence.callbacks, node) { + bool ret; + + /* Cancel callbacks that hasn't been called yet. */ + ret = dma_fence_remove_callback(cb->fence, &cb->fence_cb); + if (ret) { + int ret; + + /* Fence had not signaled, clean up after + * canceling. + */ + ret = atomic_dec_return(&katom->dma_fence.dep_count); + + if (unlikely(ret == 0)) + res = true; + } + + /* + * Release the reference taken in + * kbase_fence_add_callback(). + */ + dma_fence_put(cb->fence); + list_del(&cb->node); + kfree(cb); + } + + return res; +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +int +kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, + fence_func_t callback) +#else +int +kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct dma_fence *fence, + dma_fence_func_t callback) +#endif +{ + int err = 0; + struct kbase_fence_cb *kbase_fence_cb; + + if (!fence) + return -EINVAL; + + kbase_fence_cb = kmalloc(sizeof(*kbase_fence_cb), GFP_KERNEL); + if (!kbase_fence_cb) + return -ENOMEM; + + kbase_fence_cb->fence = fence; + kbase_fence_cb->katom = katom; + INIT_LIST_HEAD(&kbase_fence_cb->node); + atomic_inc(&katom->dma_fence.dep_count); + + err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb, + callback); + if (err == -ENOENT) { + /* Fence signaled, get the completion result */ + err = dma_fence_get_status(fence); + + /* remap success completion to err code */ + if (err == 1) + err = 0; + + kfree(kbase_fence_cb); + atomic_dec(&katom->dma_fence.dep_count); + } else if (err) { + kfree(kbase_fence_cb); + atomic_dec(&katom->dma_fence.dep_count); + } else { + /* + * Get reference to fence that will be kept until callback gets + * cleaned up in kbase_fence_free_callbacks(). + */ + dma_fence_get(fence); + /* Add callback to katom's list of callbacks */ + list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks); + } + + return err; +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h new file mode 100644 index 0000000..8e7024e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h @@ -0,0 +1,282 @@ +/* + * + * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_FENCE_H_ +#define _KBASE_FENCE_H_ + +/* + * mali_kbase_fence.[hc] has common fence code used by both + * - CONFIG_MALI_DMA_FENCE - implicit DMA fences + * - CONFIG_SYNC_FILE - explicit fences beginning with 4.9 kernel + */ + +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + +#include +#include "mali_kbase_fence_defs.h" +#include "mali_kbase.h" + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +extern const struct fence_ops kbase_fence_ops; +#else +extern const struct dma_fence_ops kbase_fence_ops; +#endif + +/** +* struct kbase_fence_cb - Mali dma-fence callback data struct +* @fence_cb: Callback function +* @katom: Pointer to katom that is waiting on this callback +* @fence: Pointer to the fence object on which this callback is waiting +* @node: List head for linking this callback to the katom +*/ +struct kbase_fence_cb { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence_cb fence_cb; + struct fence *fence; +#else + struct dma_fence_cb fence_cb; + struct dma_fence *fence; +#endif + struct kbase_jd_atom *katom; + struct list_head node; +}; + +/** + * kbase_fence_out_new() - Creates a new output fence and puts it on the atom + * @katom: Atom to create an output fence for + * + * return: A new fence object on success, NULL on failure. + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); +#else +struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); +#endif + +#if defined(CONFIG_SYNC_FILE) +/** + * kbase_fence_fence_in_set() - Assign input fence to atom + * @katom: Atom to assign input fence to + * @fence: Input fence to assign to atom + * + * This function will take ownership of one fence reference! + */ +#define kbase_fence_fence_in_set(katom, fence) \ + do { \ + WARN_ON((katom)->dma_fence.fence_in); \ + (katom)->dma_fence.fence_in = fence; \ + } while (0) +#endif + + +/** + * kbase_fence_out_remove() - Removes the output fence from atom + * @katom: Atom to remove output fence for + * + * This will also release the reference to this fence which the atom keeps + */ +static inline void kbase_fence_out_remove(struct kbase_jd_atom *katom) +{ + if (katom->dma_fence.fence) { + dma_fence_put(katom->dma_fence.fence); + katom->dma_fence.fence = NULL; + } +} + +#if defined(CONFIG_SYNC_FILE) +/** + * kbase_fence_out_remove() - Removes the input fence from atom + * @katom: Atom to remove input fence for + * + * This will also release the reference to this fence which the atom keeps + */ +static inline void kbase_fence_in_remove(struct kbase_jd_atom *katom) +{ + if (katom->dma_fence.fence_in) { + dma_fence_put(katom->dma_fence.fence_in); + katom->dma_fence.fence_in = NULL; + } +} +#endif + +/** + * kbase_fence_out_is_ours() - Check if atom has a valid fence created by us + * @katom: Atom to check output fence for + * + * Return: true if fence exists and is valid, otherwise false + */ +static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) +{ + return katom->dma_fence.fence && + katom->dma_fence.fence->ops == &kbase_fence_ops; +} + +/** + * kbase_fence_out_signal() - Signal output fence of atom + * @katom: Atom to signal output fence for + * @status: Status to signal with (0 for success, < 0 for error) + * + * Return: 0 on success, < 0 on error + */ +static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, + int status) +{ + if (status) { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) + fence_set_error(katom->dma_fence.fence, status); +#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) + dma_fence_set_error(katom->dma_fence.fence, status); +#else + katom->dma_fence.fence->status = status; +#endif + } + return dma_fence_signal(katom->dma_fence.fence); +} + +/** + * kbase_fence_add_callback() - Add callback on @fence to block @katom + * @katom: Pointer to katom that will be blocked by @fence + * @fence: Pointer to fence on which to set up the callback + * @callback: Pointer to function to be called when fence is signaled + * + * Caller needs to hold a reference to @fence when calling this function, and + * the caller is responsible for releasing that reference. An additional + * reference to @fence will be taken when the callback was successfully set up + * and @fence needs to be kept valid until the callback has been called and + * cleanup have been done. + * + * Return: 0 on success: fence was either already signaled, or callback was + * set up. Negative error code is returned on error. + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +int kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct fence *fence, + fence_func_t callback); +#else +int kbase_fence_add_callback(struct kbase_jd_atom *katom, + struct dma_fence *fence, + dma_fence_func_t callback); +#endif + +/** + * kbase_fence_dep_count_set() - Set dep_count value on atom to specified value + * @katom: Atom to set dep_count for + * @val: value to set dep_count to + * + * The dep_count is available to the users of this module so that they can + * synchronize completion of the wait with cancellation and adding of more + * callbacks. For instance, a user could do the following: + * + * dep_count set to 1 + * callback #1 added, dep_count is increased to 2 + * callback #1 happens, dep_count decremented to 1 + * since dep_count > 0, no completion is done + * callback #2 is added, dep_count is increased to 2 + * dep_count decremented to 1 + * callback #2 happens, dep_count decremented to 0 + * since dep_count now is zero, completion executes + * + * The dep_count can also be used to make sure that the completion only + * executes once. This is typically done by setting dep_count to -1 for the + * thread that takes on this responsibility. + */ +static inline void +kbase_fence_dep_count_set(struct kbase_jd_atom *katom, int val) +{ + atomic_set(&katom->dma_fence.dep_count, val); +} + +/** + * kbase_fence_dep_count_dec_and_test() - Decrements dep_count + * @katom: Atom to decrement dep_count for + * + * See @kbase_fence_dep_count_set for general description about dep_count + * + * Return: true if value was decremented to zero, otherwise false + */ +static inline bool +kbase_fence_dep_count_dec_and_test(struct kbase_jd_atom *katom) +{ + return atomic_dec_and_test(&katom->dma_fence.dep_count); +} + +/** + * kbase_fence_dep_count_read() - Returns the current dep_count value + * @katom: Pointer to katom + * + * See @kbase_fence_dep_count_set for general description about dep_count + * + * Return: The current dep_count value + */ +static inline int kbase_fence_dep_count_read(struct kbase_jd_atom *katom) +{ + return atomic_read(&katom->dma_fence.dep_count); +} + +/** + * kbase_fence_free_callbacks() - Free dma-fence callbacks on a katom + * @katom: Pointer to katom + * + * This function will free all fence callbacks on the katom's list of + * callbacks. Callbacks that have not yet been called, because their fence + * hasn't yet signaled, will first be removed from the fence. + * + * Locking: katom->dma_fence.callbacks list assumes jctx.lock is held. + * + * Return: true if dep_count reached 0, otherwise false. + */ +bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); + +#if defined(CONFIG_SYNC_FILE) +/** + * kbase_fence_in_get() - Retrieve input fence for atom. + * @katom: Atom to get input fence from + * + * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * + * Return: The fence, or NULL if there is no input fence for atom + */ +#define kbase_fence_in_get(katom) dma_fence_get((katom)->dma_fence.fence_in) +#endif + +/** + * kbase_fence_out_get() - Retrieve output fence for atom. + * @katom: Atom to get output fence from + * + * A ref will be taken for the fence, so use @kbase_fence_put() to release it + * + * Return: The fence, or NULL if there is no output fence for atom + */ +#define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) + + +/** + * kbase_fence_put() - Releases a reference to a fence + * @fence: Fence to release reference for. + */ +#define kbase_fence_put(fence) dma_fence_put(fence) + + +#endif /* CONFIG_MALI_DMA_FENCE || defined(CONFIG_SYNC_FILE */ + +#endif /* _KBASE_FENCE_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h new file mode 100644 index 0000000..607a95c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h @@ -0,0 +1,68 @@ +/* + * + * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_FENCE_DEFS_H_ +#define _KBASE_FENCE_DEFS_H_ + +/* + * There was a big rename in the 4.10 kernel (fence* -> dma_fence*) + * This file hides the compatibility issues with this for the rest the driver + */ + +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + +#include + +#define dma_fence_context_alloc(a) fence_context_alloc(a) +#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) +#define dma_fence_get(a) fence_get(a) +#define dma_fence_put(a) fence_put(a) +#define dma_fence_signal(a) fence_signal(a) +#define dma_fence_is_signaled(a) fence_is_signaled(a) +#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) +#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) + +#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) +#else +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif + +#else + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) +#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ + (a)->status ?: 1 \ + : 0) +#endif + +#endif /* < 4.10.0 */ + +#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ + +#endif /* _KBASE_FENCE_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h new file mode 100644 index 0000000..6428f08 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h @@ -0,0 +1,53 @@ +/* + * + * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* NB taken from gator */ +/* + * List of possible actions to be controlled by DS-5 Streamline. + * The following numbers are used by gator to control the frame buffer dumping + * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because + * they are unknown inside gator. + */ + +#ifndef _KBASE_GATOR_H_ +#define _KBASE_GATOR_H_ + +#include + +#define GATOR_JOB_SLOT_START 1 +#define GATOR_JOB_SLOT_STOP 2 +#define GATOR_JOB_SLOT_SOFT_STOPPED 3 + +#ifdef CONFIG_MALI_GATOR_SUPPORT + +#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) + +struct kbase_context; + +void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id); +void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value); +void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value); +void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event); + +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +#endif /* _KBASE_GATOR_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c new file mode 100755 index 0000000..93f1565 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -0,0 +1,110 @@ +/* + * + * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +#ifdef CONFIG_DEBUG_FS +/** Show callback for the @c gpu_memory debugfs file. + * + * This function is called to get the contents of the @c gpu_memory debugfs + * file. This is a report of current gpu memory usage. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ + +static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) +{ + struct list_head *entry; + const struct list_head *kbdev_list; + + kbdev_list = kbase_device_get_list(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + seq_printf(sfile, "%-16s %-16s %10u\n", + kbdev->devname, + "total used_pages", + atomic_read(&(kbdev->memdev.used_pages))); + seq_puts(sfile, "----------------------------------------------------\n"); + seq_printf(sfile, "%-16s %-16s %-16s\n", + "kctx", "pid", "used_pages"); + seq_puts(sfile, "----------------------------------------------------\n"); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory usage and cap for each kctx + * opened on this device */ + seq_printf(sfile, "%p %10u %10u\n", + kctx, + kctx->tgid, + atomic_read(&(kctx->used_pages))); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + + kbase_device_put_list(kbdev_list); + return 0; +} + +/* + * File operations related to debugfs entry for gpu_memory + */ +static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_gpu_memory_seq_show, NULL); +} + +static const struct file_operations kbasep_gpu_memory_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_gpu_memory_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * Initialize debugfs entry for gpu_memory + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("gpu_memory", S_IRUGO, + kbdev->mali_debugfs_directory, NULL, + &kbasep_gpu_memory_debugfs_fops); + return; +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ + return; +} +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h new file mode 100644 index 0000000..28a871a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h @@ -0,0 +1,42 @@ +/* + * + * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_gpu_memory_debugfs.h + * Header file for gpu_memory entry in debugfs + * + */ + +#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H +#define _KBASE_GPU_MEMORY_DEBUGFS_H + +#include +#include + +/** + * @brief Initialize gpu_memory debugfs entry + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); + +#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c new file mode 100644 index 0000000..ae2458f --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Base kernel property query APIs + */ + +#include +#include +#include +#include +#include +#include "mali_kbase_ioctl.h" +#include +#include +#include +#include + + +static void kbase_gpuprops_construct_coherent_groups( + struct base_gpu_props * const props) +{ + struct mali_base_gpu_coherent_group *current_group; + u64 group_present; + u64 group_mask; + u64 first_set, first_set_prev; + u32 num_groups = 0; + + KBASE_DEBUG_ASSERT(NULL != props); + + props->coherency_info.coherency = props->raw_props.mem_features; + props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); + + if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { + /* Group is l2 coherent */ + group_present = props->raw_props.l2_present; + } else { + /* Group is l1 coherent */ + group_present = props->raw_props.shader_present; + } + + /* + * The coherent group mask can be computed from the l2 present + * register. + * + * For the coherent group n: + * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) + * where first_set is group_present with only its nth set-bit kept + * (i.e. the position from where a new group starts). + * + * For instance if the groups are l2 coherent and l2_present=0x0..01111: + * The first mask is: + * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) + * = (0x0..010 - 1) & ~(0x0..01 - 1) + * = 0x0..00f + * The second mask is: + * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) + * = (0x0..100 - 1) & ~(0x0..010 - 1) + * = 0x0..0f0 + * And so on until all the bits from group_present have been cleared + * (i.e. there is no group left). + */ + + current_group = props->coherency_info.group; + first_set = group_present & ~(group_present - 1); + + while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { + group_present -= first_set; /* Clear the current group bit */ + first_set_prev = first_set; + + first_set = group_present & ~(group_present - 1); + group_mask = (first_set - 1) & ~(first_set_prev - 1); + + /* Populate the coherent_group structure for each group */ + current_group->core_mask = group_mask & props->raw_props.shader_present; + current_group->num_cores = hweight64(current_group->core_mask); + + num_groups++; + current_group++; + } + + if (group_present != 0) + pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); + + props->coherency_info.num_groups = num_groups; +} + +/** + * kbase_gpuprops_get_props - Get the GPU configuration + * @gpu_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &struct base_gpu_props structure with values from the GPU + * configuration registers. Only the raw properties are filled in this function. + * + * Return: Zero on success, Linux error code on failure + */ +static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, + struct kbase_device *kbdev) +{ + struct kbase_gpuprops_regdump regdump; + int i; + int err; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != gpu_props); + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get(kbdev, ®dump); + if (err) + return err; + + gpu_props->raw_props.gpu_id = regdump.gpu_id; + gpu_props->raw_props.tiler_features = regdump.tiler_features; + gpu_props->raw_props.mem_features = regdump.mem_features; + gpu_props->raw_props.mmu_features = regdump.mmu_features; + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->raw_props.core_features = regdump.core_features; + + gpu_props->raw_props.as_present = regdump.as_present; + gpu_props->raw_props.js_present = regdump.js_present; + gpu_props->raw_props.shader_present = + ((u64) regdump.shader_present_hi << 32) + + regdump.shader_present_lo; + gpu_props->raw_props.tiler_present = + ((u64) regdump.tiler_present_hi << 32) + + regdump.tiler_present_lo; + gpu_props->raw_props.l2_present = + ((u64) regdump.l2_present_hi << 32) + + regdump.l2_present_lo; + gpu_props->raw_props.stack_present = + ((u64) regdump.stack_present_hi << 32) + + regdump.stack_present_lo; + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + gpu_props->raw_props.js_features[i] = regdump.js_features[i]; + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; + + gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; + gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; + gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; + gpu_props->raw_props.thread_features = regdump.thread_features; + gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; + + return 0; +} + +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props) +{ + gpu_props->core_props.version_status = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); + gpu_props->core_props.minor_revision = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); + gpu_props->core_props.major_revision = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); + gpu_props->core_props.product_id = + KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); +} + +/** + * kbase_gpuprops_calculate_props - Calculate the derived properties + * @gpu_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &struct base_gpu_props structure with values derived from the GPU + * configuration registers + */ +static void kbase_gpuprops_calculate_props( + struct base_gpu_props * const gpu_props, struct kbase_device *kbdev) +{ + int i; + u32 gpu_id; + u32 product_id; + + /* Populate the base_gpu_props structure */ + kbase_gpuprops_update_core_props_gpu_id(gpu_props); + gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; +#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE + gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; +#else + gpu_props->core_props.gpu_available_memory_size = + totalram_pages() << PAGE_SHIFT; +#endif + + gpu_props->core_props.num_exec_engines = + KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; + + gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); + gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + + /* Field with number of l2 slices is added to MEM_FEATURES register + * since t76x. Below code assumes that for older GPU reserved bits will + * be read as zero. */ + gpu_props->l2_props.num_l2_slices = + KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + + gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); + gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); + + if (gpu_props->raw_props.thread_max_threads == 0) + gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; + else + gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; + + if (gpu_props->raw_props.thread_max_workgroup_size == 0) + gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; + else + gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; + + if (gpu_props->raw_props.thread_max_barrier_size == 0) + gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; + else + gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; + + if (gpu_props->raw_props.thread_tls_alloc == 0) + gpu_props->thread_props.tls_alloc = + gpu_props->thread_props.max_threads; + else + gpu_props->thread_props.tls_alloc = + gpu_props->raw_props.thread_tls_alloc; + + /* MIDHARC-2364 was intended for tULx. + * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. + */ + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; + product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) { + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 0U, 22); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 22U, 2); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 24U, 8); + gpu_props->thread_props.max_thread_group_split = 0; + } else { + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 0U, 16); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 16U, 8); + gpu_props->thread_props.max_thread_group_split = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 24U, 6); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 30U, 2); + } + + /* If values are not specified, then use defaults */ + if (gpu_props->thread_props.max_registers == 0) { + gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; + gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; + gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set(struct kbase_device *kbdev) +{ + struct kbase_gpu_props *gpu_props; + struct gpu_raw_gpu_props *raw; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + gpu_props = &kbdev->gpu_props; + raw = &gpu_props->props.raw_props; + + /* Initialize the base_gpu_props structure from the hardware */ + kbase_gpuprops_get_props(&gpu_props->props, kbdev); + + /* Populate the derived properties */ + kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); + + /* Populate kbase-only fields */ + gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); + gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); + + gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); + + gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); + gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); + + gpu_props->num_cores = hweight64(raw->shader_present); + gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_address_spaces = hweight32(raw->as_present); + gpu_props->num_job_slots = hweight32(raw->js_present); +} + +int kbase_gpuprops_set_features(struct kbase_device *kbdev) +{ + struct base_gpu_props *gpu_props; + struct kbase_gpuprops_regdump regdump; + int err; + + gpu_props = &kbdev->gpu_props.props; + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get_features(kbdev, ®dump); + if (err) + return err; + + /* + * Copy the raw value from the register, later this will get turned + * into the selected coherency mode. + * Additionally, add non-coherent mode, as this is always supported. + */ + gpu_props->raw_props.coherency_mode = regdump.coherency_features | + COHERENCY_FEATURE_BIT(COHERENCY_NONE); + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT)) + gpu_props->thread_props.max_thread_group_split = 0; + + return err; +} + +/* + * Module parameters to allow the L2 size and hash configuration to be + * overridden. + * + * These parameters must be set on insmod to take effect, and are not visible + * in sysfs. + */ +static u8 override_l2_size; +module_param(override_l2_size, byte, 0); +MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing"); + +static u8 override_l2_hash; +module_param(override_l2_hash, byte, 0); +MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); + +/** + * kbase_read_l2_config_from_dt - Read L2 configuration + * @kbdev: The kbase device for which to get the L2 configuration. + * + * Check for L2 configuration overrides in module parameters and device tree. + * Override values in module parameters take priority over override values in + * device tree. + * + * Return: true if either size or hash was overridden, false if no overrides + * were found. + */ +static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev) +{ + struct device_node *np = kbdev->dev->of_node; + + if (!np) + return false; + + if (override_l2_size) + kbdev->l2_size_override = override_l2_size; + else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) + kbdev->l2_size_override = 0; + + if (override_l2_hash) + kbdev->l2_hash_override = override_l2_hash; + else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) + kbdev->l2_hash_override = 0; + + if (kbdev->l2_size_override || kbdev->l2_hash_override) + return true; + + return false; +} + +int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) +{ + int err = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { + struct kbase_gpuprops_regdump regdump; + struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; + + /* Check for L2 cache size & hash overrides */ + if (!kbase_read_l2_config_from_dt(kbdev)) + return 0; + + /* Need L2 to get powered to reflect to L2_FEATURES */ + kbase_pm_context_active(kbdev); + + /* Wait for the completion of L2 power transition */ + kbase_pm_wait_for_l2_powered(kbdev); + + /* Dump L2_FEATURES register */ + err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); + if (err) + goto idle_gpu; + + dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", + regdump.l2_features); + + /* Update gpuprops with reflected L2_FEATURES */ + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->l2_props.log2_cache_size = + KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + +idle_gpu: + /* Let GPU idle */ + kbase_pm_context_idle(kbdev); + } + + return err; +} + +static struct { + u32 type; + size_t offset; + int size; +} gpu_property_mapping[] = { +#define PROP(name, member) \ + {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \ + sizeof(((struct base_gpu_props *)0)->member)} + PROP(PRODUCT_ID, core_props.product_id), + PROP(VERSION_STATUS, core_props.version_status), + PROP(MINOR_REVISION, core_props.minor_revision), + PROP(MAJOR_REVISION, core_props.major_revision), + PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max), + PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size), + PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]), + PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]), + PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]), + PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]), + PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size), + PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines), + + PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size), + PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size), + PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices), + + PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes), + PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels), + + PROP(MAX_THREADS, thread_props.max_threads), + PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size), + PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size), + PROP(MAX_REGISTERS, thread_props.max_registers), + PROP(MAX_TASK_QUEUE, thread_props.max_task_queue), + PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split), + PROP(IMPL_TECH, thread_props.impl_tech), + PROP(TLS_ALLOC, thread_props.tls_alloc), + + PROP(RAW_SHADER_PRESENT, raw_props.shader_present), + PROP(RAW_TILER_PRESENT, raw_props.tiler_present), + PROP(RAW_L2_PRESENT, raw_props.l2_present), + PROP(RAW_STACK_PRESENT, raw_props.stack_present), + PROP(RAW_L2_FEATURES, raw_props.l2_features), + PROP(RAW_CORE_FEATURES, raw_props.core_features), + PROP(RAW_MEM_FEATURES, raw_props.mem_features), + PROP(RAW_MMU_FEATURES, raw_props.mmu_features), + PROP(RAW_AS_PRESENT, raw_props.as_present), + PROP(RAW_JS_PRESENT, raw_props.js_present), + PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]), + PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]), + PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]), + PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]), + PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]), + PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]), + PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]), + PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]), + PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]), + PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]), + PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]), + PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]), + PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]), + PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]), + PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]), + PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]), + PROP(RAW_TILER_FEATURES, raw_props.tiler_features), + PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]), + PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]), + PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]), + PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]), + PROP(RAW_GPU_ID, raw_props.gpu_id), + PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads), + PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, + raw_props.thread_max_workgroup_size), + PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size), + PROP(RAW_THREAD_FEATURES, raw_props.thread_features), + PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), + PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), + + PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), + PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), + PROP(COHERENCY_COHERENCY, coherency_info.coherency), + PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask), + PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask), + PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask), + PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask), + PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask), + PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask), + PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask), + PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask), + PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask), + PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask), + PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask), + PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask), + PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask), + PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask), + PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask), + PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask), + +#undef PROP +}; + +int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev) +{ + struct kbase_gpu_props *kprops = &kbdev->gpu_props; + struct base_gpu_props *props = &kprops->props; + u32 count = ARRAY_SIZE(gpu_property_mapping); + u32 i; + u32 size = 0; + u8 *p; + + for (i = 0; i < count; i++) { + /* 4 bytes for the ID, and the size of the property */ + size += 4 + gpu_property_mapping[i].size; + } + + kprops->prop_buffer_size = size; + kprops->prop_buffer = kmalloc(size, GFP_KERNEL); + + if (!kprops->prop_buffer) { + kprops->prop_buffer_size = 0; + return -ENOMEM; + } + + p = kprops->prop_buffer; + +#define WRITE_U8(v) (*p++ = (v) & 0xFF) +#define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0) +#define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0) +#define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0) + + for (i = 0; i < count; i++) { + u32 type = gpu_property_mapping[i].type; + u8 type_size; + void *field = ((u8 *)props) + gpu_property_mapping[i].offset; + + switch (gpu_property_mapping[i].size) { + case 1: + type_size = KBASE_GPUPROP_VALUE_SIZE_U8; + break; + case 2: + type_size = KBASE_GPUPROP_VALUE_SIZE_U16; + break; + case 4: + type_size = KBASE_GPUPROP_VALUE_SIZE_U32; + break; + case 8: + type_size = KBASE_GPUPROP_VALUE_SIZE_U64; + break; + default: + dev_err(kbdev->dev, + "Invalid gpu_property_mapping type=%d size=%d", + type, gpu_property_mapping[i].size); + return -EINVAL; + } + + WRITE_U32((type<<2) | type_size); + + switch (type_size) { + case KBASE_GPUPROP_VALUE_SIZE_U8: + WRITE_U8(*((u8 *)field)); + break; + case KBASE_GPUPROP_VALUE_SIZE_U16: + WRITE_U16(*((u16 *)field)); + break; + case KBASE_GPUPROP_VALUE_SIZE_U32: + WRITE_U32(*((u32 *)field)); + break; + case KBASE_GPUPROP_VALUE_SIZE_U64: + WRITE_U64(*((u64 *)field)); + break; + default: /* Cannot be reached */ + WARN_ON(1); + return -EINVAL; + } + } + + return 0; +} + +void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev) +{ + kfree(kbdev->gpu_props.prop_buffer); +} + +int kbase_device_populate_max_freq(struct kbase_device *kbdev) +{ + struct mali_base_gpu_core_props *core_props; + + /* obtain max configured gpu frequency, if devfreq is enabled then + * this will be overridden by the highest operating point found + */ + core_props = &(kbdev->gpu_props.props.core_props); +#ifdef GPU_FREQ_KHZ_MAX + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; +#else + core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX; +#endif + + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h new file mode 100644 index 0000000..5eee794 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h @@ -0,0 +1,135 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_gpuprops.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_H_ +#define _KBASE_GPUPROPS_H_ + +#include "mali_kbase_gpuprops_types.h" + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/** + * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. + * @value: The value from which to extract bits. + * @offset: The first bit to extract (0 being the LSB). + * @size: The number of bits to extract. + * + * Context: @offset + @size <= 32. + * + * Return: Bits [@offset, @offset + @size) from @value. + */ +/* from mali_cdsb.h */ +#define KBASE_UBFX32(value, offset, size) \ + (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + +/** + * @brief Set up Kbase GPU properties. + * + * Set up Kbase GPU properties with information from the GPU registers + * + * @param kbdev The struct kbase_device structure for the device + */ +void kbase_gpuprops_set(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_set_features - Set up Kbase GPU properties + * @kbdev: Device pointer + * + * This function sets up GPU properties that are dependent on the hardware + * features bitmask. This function must be preceeded by a call to + * kbase_hw_set_features_mask(). + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_set_features(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES + * @kbdev: Device pointer + * + * This function updates l2_features and the log2 cache size. + * + * Return: Zero on success, Linux error code for failure + */ +int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer + * @kbdev: The kbase device + * + * Fills prop_buffer with the GPU properties for user space to read. + */ +int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_free_user_buffer - Free the GPU properties buffer. + * @kbdev: kbase device pointer + * + * Free the GPU properties buffer allocated from + * kbase_gpuprops_populate_user_buffer. + */ +void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev); + +/** + * kbase_device_populate_max_freq - Populate max gpu frequency. + * @kbdev: kbase device pointer + * + * Populate the maximum gpu frequency to be used when devfreq is disabled. + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_device_populate_max_freq(struct kbase_device *kbdev); + +/** + * kbase_gpuprops_update_core_props_gpu_id - break down gpu id value + * @gpu_props: the &base_gpu_props structure + * + * Break down gpu_id value stored in base_gpu_props::raw_props.gpu_id into + * separate fields (version_status, minor_revision, major_revision, product_id) + * stored in base_gpu_props::core_props. + */ +void kbase_gpuprops_update_core_props_gpu_id( + struct base_gpu_props * const gpu_props); + +#endif /* _KBASE_GPUPROPS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h new file mode 100644 index 0000000..ec6f1c3 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -0,0 +1,98 @@ +/* + * + * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_gpuprops_types.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_TYPES_H_ +#define _KBASE_GPUPROPS_TYPES_H_ + +#include "mali_base_kernel.h" + +#define KBASE_GPU_SPEED_MHZ 123 +#define KBASE_GPU_PC_SIZE_LOG2 24U + +struct kbase_gpuprops_regdump { + u32 gpu_id; + u32 l2_features; + u32 core_features; + u32 tiler_features; + u32 mem_features; + u32 mmu_features; + u32 as_present; + u32 js_present; + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + u32 thread_tls_alloc; + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + u32 js_features[GPU_MAX_JOB_SLOTS]; + u32 shader_present_lo; + u32 shader_present_hi; + u32 tiler_present_lo; + u32 tiler_present_hi; + u32 l2_present_lo; + u32 l2_present_hi; + u32 stack_present_lo; + u32 stack_present_hi; + u32 coherency_features; +}; + +struct kbase_gpu_cache_props { + u8 associativity; + u8 external_bus_width; +}; + +struct kbase_gpu_mem_props { + u8 core_group; +}; + +struct kbase_gpu_mmu_props { + u8 va_bits; + u8 pa_bits; +}; + +struct kbase_gpu_props { + /* kernel-only properties */ + u8 num_cores; + u8 num_core_groups; + u8 num_address_spaces; + u8 num_job_slots; + + struct kbase_gpu_cache_props l2_props; + + struct kbase_gpu_mem_props mem; + struct kbase_gpu_mmu_props mmu; + + /* Properties shared with userspace */ + struct base_gpu_props props; + + u32 prop_buffer_size; + void *prop_buffer; +}; + +#endif /* _KBASE_GPUPROPS_TYPES_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c new file mode 100644 index 0000000..6a47c9d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c @@ -0,0 +1,269 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_gwt.h" +#include + +static inline void kbase_gpu_gwt_setup_page_permission( + struct kbase_context *kctx, + unsigned long flag, + struct rb_node *node) +{ + struct rb_node *rbnode = node; + + while (rbnode) { + struct kbase_va_region *reg; + int err = 0; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->nr_pages && !kbase_is_region_invalid_or_free(reg) && + (reg->flags & KBASE_REG_GPU_WR)) { + err = kbase_mmu_update_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + reg->gpu_alloc->nents, + reg->flags & flag, + reg->gpu_alloc->group_id); + if (err) + dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n"); + } + + rbnode = rb_next(rbnode); + } +} + +static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, + unsigned long flag) +{ + kbase_gpu_gwt_setup_page_permission(kctx, flag, + rb_first(&(kctx->reg_rbtree_same))); + kbase_gpu_gwt_setup_page_permission(kctx, flag, + rb_first(&(kctx->reg_rbtree_custom))); +} + + +int kbase_gpu_gwt_start(struct kbase_context *kctx) +{ + kbase_gpu_vm_lock(kctx); + if (kctx->gwt_enabled) { + kbase_gpu_vm_unlock(kctx); + return -EBUSY; + } + + INIT_LIST_HEAD(&kctx->gwt_current_list); + INIT_LIST_HEAD(&kctx->gwt_snapshot_list); + + /* If GWT is enabled using new vector dumping format + * from user space, back up status of the job serialization flag and + * use full serialisation of jobs for dumping. + * Status will be restored on end of dumping in gwt_stop. + */ + kctx->kbdev->backup_serialize_jobs = kctx->kbdev->serialize_jobs; + kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | + KBASE_SERIALIZE_INTER_SLOT; + + /* Mark gwt enabled before making pages read only in case a + write page fault is triggered while we're still in this loop. + (kbase_gpu_vm_lock() doesn't prevent this!) + */ + kctx->gwt_enabled = true; + kctx->gwt_was_enabled = true; + + kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); + + kbase_gpu_vm_unlock(kctx); + return 0; +} + +int kbase_gpu_gwt_stop(struct kbase_context *kctx) +{ + struct kbasep_gwt_list_element *pos, *n; + + kbase_gpu_vm_lock(kctx); + if (!kctx->gwt_enabled) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + list_for_each_entry_safe(pos, n, &kctx->gwt_current_list, link) { + list_del(&pos->link); + kfree(pos); + } + + list_for_each_entry_safe(pos, n, &kctx->gwt_snapshot_list, link) { + list_del(&pos->link); + kfree(pos); + } + + kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; + + kbase_gpu_gwt_setup_pages(kctx, ~0UL); + + kctx->gwt_enabled = false; + kbase_gpu_vm_unlock(kctx); + return 0; +} + + +static int list_cmp_function(void *priv, struct list_head *a, + struct list_head *b) +{ + struct kbasep_gwt_list_element *elementA = container_of(a, + struct kbasep_gwt_list_element, link); + struct kbasep_gwt_list_element *elementB = container_of(b, + struct kbasep_gwt_list_element, link); + + CSTD_UNUSED(priv); + + if (elementA->page_addr > elementB->page_addr) + return 1; + return -1; +} + +static void kbase_gpu_gwt_collate(struct kbase_context *kctx, + struct list_head *snapshot_list) +{ + struct kbasep_gwt_list_element *pos, *n; + struct kbasep_gwt_list_element *collated = NULL; + + /* Sort the list */ + list_sort(NULL, snapshot_list, list_cmp_function); + + /* Combine contiguous areas. */ + list_for_each_entry_safe(pos, n, snapshot_list, link) { + if (collated == NULL || collated->region != + pos->region || + (collated->page_addr + + (collated->num_pages * PAGE_SIZE)) != + pos->page_addr) { + /* This is the first time through, a new region or + * is not contiguous - start collating to this element + */ + collated = pos; + } else { + /* contiguous so merge */ + collated->num_pages += pos->num_pages; + /* remove element from list */ + list_del(&pos->link); + kfree(pos); + } + } +} + +int kbase_gpu_gwt_dump(struct kbase_context *kctx, + union kbase_ioctl_cinstr_gwt_dump *gwt_dump) +{ + const u32 ubuf_size = gwt_dump->in.len; + u32 ubuf_count = 0; + __user void *user_addr = (__user void *) + (uintptr_t)gwt_dump->in.addr_buffer; + __user void *user_sizes = (__user void *) + (uintptr_t)gwt_dump->in.size_buffer; + + kbase_gpu_vm_lock(kctx); + + if (!kctx->gwt_enabled) { + kbase_gpu_vm_unlock(kctx); + /* gwt_dump shouldn't be called when gwt is disabled */ + return -EPERM; + } + + if (!gwt_dump->in.len || !gwt_dump->in.addr_buffer + || !gwt_dump->in.size_buffer) { + kbase_gpu_vm_unlock(kctx); + /* We don't have any valid user space buffer to copy the + * write modified addresses. + */ + return -EINVAL; + } + + if (list_empty(&kctx->gwt_snapshot_list) && + !list_empty(&kctx->gwt_current_list)) { + + list_replace_init(&kctx->gwt_current_list, + &kctx->gwt_snapshot_list); + + /* We have collected all write faults so far + * and they will be passed on to user space. + * Reset the page flags state to allow collection of + * further write faults. + */ + kbase_gpu_gwt_setup_pages(kctx, ~KBASE_REG_GPU_WR); + + /* Sort and combine consecutive pages in the dump list*/ + kbase_gpu_gwt_collate(kctx, &kctx->gwt_snapshot_list); + } + + while ((!list_empty(&kctx->gwt_snapshot_list))) { + u64 addr_buffer[32]; + u64 num_page_buffer[32]; + u32 count = 0; + int err; + struct kbasep_gwt_list_element *dump_info, *n; + + list_for_each_entry_safe(dump_info, n, + &kctx->gwt_snapshot_list, link) { + addr_buffer[count] = dump_info->page_addr; + num_page_buffer[count] = dump_info->num_pages; + count++; + list_del(&dump_info->link); + kfree(dump_info); + if (ARRAY_SIZE(addr_buffer) == count || + ubuf_size == (ubuf_count + count)) + break; + } + + if (count) { + err = copy_to_user((user_addr + + (ubuf_count * sizeof(u64))), + (void *)addr_buffer, + count * sizeof(u64)); + if (err) { + dev_err(kctx->kbdev->dev, "Copy to user failure\n"); + kbase_gpu_vm_unlock(kctx); + return err; + } + err = copy_to_user((user_sizes + + (ubuf_count * sizeof(u64))), + (void *)num_page_buffer, + count * sizeof(u64)); + if (err) { + dev_err(kctx->kbdev->dev, "Copy to user failure\n"); + kbase_gpu_vm_unlock(kctx); + return err; + } + + ubuf_count += count; + } + + if (ubuf_count == ubuf_size) + break; + } + + if (!list_empty(&kctx->gwt_snapshot_list)) + gwt_dump->out.more_data_available = 1; + else + gwt_dump->out.more_data_available = 0; + + gwt_dump->out.no_of_addr_collected = ubuf_count; + kbase_gpu_vm_unlock(kctx); + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h new file mode 100644 index 0000000..7e7746e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_GWT_H) +#define _KBASE_GWT_H + +#include +#include + +/** + * kbase_gpu_gwt_start - Start the GPU write tracking + * @kctx: Pointer to kernel context + * + * @return 0 on success, error on failure. + */ +int kbase_gpu_gwt_start(struct kbase_context *kctx); + +/** + * kbase_gpu_gwt_stop - Stop the GPU write tracking + * @kctx: Pointer to kernel context + * + * @return 0 on success, error on failure. + */ +int kbase_gpu_gwt_stop(struct kbase_context *kctx); + +/** + * kbase_gpu_gwt_dump - Pass page address of faulting addresses to user space. + * @kctx: Pointer to kernel context + * @gwt_dump: User space data to be passed. + * + * @return 0 on success, error on failure. + */ +int kbase_gpu_gwt_dump(struct kbase_context *kctx, + union kbase_ioctl_cinstr_gwt_dump *gwt_dump); + +#endif /* _KBASE_GWT_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c new file mode 100644 index 0000000..f8a9248 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -0,0 +1,425 @@ +/* + * + * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Run-time work-arounds helpers + */ + +#include +#include +#include "gpu/mali_kbase_gpu_regmap.h" +#include "mali_kbase.h" +#include "mali_kbase_hw.h" + +void kbase_hw_set_features_mask(struct kbase_device *kbdev) +{ + const enum base_hw_feature *features; + u32 gpu_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + features = base_hw_features_tMIx; + break; + case GPU_ID2_PRODUCT_THEX: + features = base_hw_features_tHEx; + break; + case GPU_ID2_PRODUCT_TSIX: + features = base_hw_features_tSIx; + break; + case GPU_ID2_PRODUCT_TDVX: + features = base_hw_features_tDVx; + break; + case GPU_ID2_PRODUCT_TNOX: + features = base_hw_features_tNOx; + break; + case GPU_ID2_PRODUCT_TGOX: + features = base_hw_features_tGOx; + break; + case GPU_ID2_PRODUCT_TTRX: + features = base_hw_features_tTRx; + break; + case GPU_ID2_PRODUCT_TNAX: + features = base_hw_features_tNAx; + break; + case GPU_ID2_PRODUCT_LBEX: + case GPU_ID2_PRODUCT_TBEX: + features = base_hw_features_tBEx; + break; + case GPU_ID2_PRODUCT_TDUX: + features = base_hw_features_tDUx; + break; + case GPU_ID2_PRODUCT_TODX: + case GPU_ID2_PRODUCT_LODX: + features = base_hw_features_tODx; + break; + case GPU_ID2_PRODUCT_TGRX: + features = base_hw_features_tGRx; + break; + case GPU_ID2_PRODUCT_TVAX: + features = base_hw_features_tVAx; + break; + case GPU_ID2_PRODUCT_TTUX: + /* Fallthrough */ + case GPU_ID2_PRODUCT_LTUX: + features = base_hw_features_tTUx; + break; + case GPU_ID2_PRODUCT_TE2X: + features = base_hw_features_tE2x; + break; + default: + features = base_hw_features_generic; + break; + } + + for (; *features != BASE_HW_FEATURE_END; features++) + set_bit(*features, &kbdev->hw_features_mask[0]); + +#if defined(CONFIG_MALI_VECTOR_DUMP) + /* When dumping is enabled, need to disable flush reduction optimization + * for GPUs on which it is safe to have only cache clean operation at + * the end of job chain. + * This is required to make vector dump work. There is some discrepancy + * in the implementation of flush reduction optimization due to + * unclear or ambiguous ARCH spec. + */ + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE)) + clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION, + &kbdev->hw_features_mask[0]); +#endif +} + +/** + * kbase_hw_get_issues_for_new_id - Get the hardware issues for a new GPU ID + * @kbdev: Device pointer + * + * Return: pointer to an array of hardware issues, terminated by + * BASE_HW_ISSUE_END. + * + * In debugging versions of the driver, unknown versions of a known GPU will + * be treated as the most recent known version not later than the actual + * version. In such circumstances, the GPU ID in @kbdev will also be replaced + * with the most recent known version. + * + * Note: The GPU configuration must have been read by kbase_gpuprops_get_props() + * before calling this function. + */ +static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( + struct kbase_device *kbdev) +{ + const enum base_hw_issue *issues = NULL; + + struct base_hw_product { + u32 product_model; + struct { + u32 version; + const enum base_hw_issue *issues; + } map[7]; + }; + + static const struct base_hw_product base_hw_products[] = { + {GPU_ID2_PRODUCT_TMIX, + {{GPU_ID2_VERSION_MAKE(0, 0, 1), + base_hw_issues_tMIx_r0p0_05dev0}, + {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, + {U32_MAX /* sentinel value */, NULL} } }, + + {GPU_ID2_PRODUCT_THEX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, + {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TSIX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, + {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TDVX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TNOX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TGOX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TTRX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TNAX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1}, + {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LBEX, + {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, + {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TBEX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, + {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, + {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TDUX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TODX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LODX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TGRX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TVAX, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TTUX, + {{GPU_ID2_VERSION_MAKE(2, 0, 0), base_hw_issues_tTUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_LTUX, + {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0}, + {U32_MAX, NULL} } }, + + {GPU_ID2_PRODUCT_TE2X, + {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0}, + {U32_MAX, NULL} } }, + }; + + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + const u32 product_model = gpu_id & GPU_ID2_PRODUCT_MODEL; + const struct base_hw_product *product = NULL; + size_t p; + + /* Stop when we reach the end of the products array. */ + for (p = 0; p < ARRAY_SIZE(base_hw_products); ++p) { + if (product_model == base_hw_products[p].product_model) { + product = &base_hw_products[p]; + break; + } + } + + if (product != NULL) { + /* Found a matching product. */ + const u32 version = gpu_id & GPU_ID2_VERSION; + u32 fallback_version = 0; + const enum base_hw_issue *fallback_issues = NULL; + size_t v; + + /* Stop when we reach the end of the map. */ + for (v = 0; product->map[v].version != U32_MAX; ++v) { + + if (version == product->map[v].version) { + /* Exact match so stop. */ + issues = product->map[v].issues; + break; + } + + /* Check whether this is a candidate for most recent + known version not later than the actual + version. */ + if ((version > product->map[v].version) && + (product->map[v].version >= fallback_version)) { +#if MALI_CUSTOMER_RELEASE + /* Match on version's major and minor fields */ + if (((version ^ product->map[v].version) >> + GPU_ID2_VERSION_MINOR_SHIFT) == 0) +#endif + { + fallback_version = product->map[v].version; + fallback_issues = product->map[v].issues; + } + } + } + + if ((issues == NULL) && (fallback_issues != NULL)) { + /* Fall back to the issue set of the most recent known + version not later than the actual version. */ + issues = fallback_issues; + +#if MALI_CUSTOMER_RELEASE + dev_warn(kbdev->dev, + "GPU hardware issue table may need updating:\n" +#else + dev_info(kbdev->dev, +#endif + "r%dp%d status %d is unknown; treating as r%dp%d status %d", + (gpu_id & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT, + (fallback_version & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (fallback_version & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (fallback_version & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT); + + gpu_id &= ~GPU_ID2_VERSION; + gpu_id |= fallback_version; + kbdev->gpu_props.props.raw_props.gpu_id = gpu_id; + + kbase_gpuprops_update_core_props_gpu_id( + &kbdev->gpu_props.props); + } + } + return issues; +} + +int kbase_hw_set_issues_mask(struct kbase_device *kbdev) +{ + const enum base_hw_issue *issues; + u32 gpu_id; + u32 impl_tech; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; + + if (impl_tech != IMPLEMENTATION_MODEL) { + issues = kbase_hw_get_issues_for_new_id(kbdev); + if (issues == NULL) { + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + +#if !MALI_CUSTOMER_RELEASE + /* The GPU ID might have been replaced with the last + known version of the same GPU. */ + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; +#endif + } else { + /* Software model */ + switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { + case GPU_ID2_PRODUCT_TMIX: + issues = base_hw_issues_model_tMIx; + break; + case GPU_ID2_PRODUCT_THEX: + issues = base_hw_issues_model_tHEx; + break; + case GPU_ID2_PRODUCT_TSIX: + issues = base_hw_issues_model_tSIx; + break; + case GPU_ID2_PRODUCT_TDVX: + issues = base_hw_issues_model_tDVx; + break; + case GPU_ID2_PRODUCT_TNOX: + issues = base_hw_issues_model_tNOx; + break; + case GPU_ID2_PRODUCT_TGOX: + issues = base_hw_issues_model_tGOx; + break; + case GPU_ID2_PRODUCT_TTRX: + issues = base_hw_issues_model_tTRx; + break; + case GPU_ID2_PRODUCT_TNAX: + issues = base_hw_issues_model_tNAx; + break; + case GPU_ID2_PRODUCT_LBEX: + case GPU_ID2_PRODUCT_TBEX: + issues = base_hw_issues_model_tBEx; + break; + case GPU_ID2_PRODUCT_TDUX: + issues = base_hw_issues_model_tDUx; + break; + case GPU_ID2_PRODUCT_TODX: + case GPU_ID2_PRODUCT_LODX: + issues = base_hw_issues_model_tODx; + break; + case GPU_ID2_PRODUCT_TGRX: + issues = base_hw_issues_model_tGRx; + break; + case GPU_ID2_PRODUCT_TVAX: + issues = base_hw_issues_model_tVAx; + break; + case GPU_ID2_PRODUCT_TTUX: + case GPU_ID2_PRODUCT_LTUX: + issues = base_hw_issues_model_tTUx; + break; + case GPU_ID2_PRODUCT_TE2X: + issues = base_hw_issues_model_tE2x; + break; + default: + dev_err(kbdev->dev, + "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } + + dev_info(kbdev->dev, + "GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d", + (gpu_id & GPU_ID2_PRODUCT_MAJOR) >> + GPU_ID2_PRODUCT_MAJOR_SHIFT, + (gpu_id & GPU_ID2_ARCH_MAJOR) >> + GPU_ID2_ARCH_MAJOR_SHIFT, + (gpu_id & GPU_ID2_ARCH_MINOR) >> + GPU_ID2_ARCH_MINOR_SHIFT, + (gpu_id & GPU_ID2_ARCH_REV) >> + GPU_ID2_ARCH_REV_SHIFT, + (gpu_id & GPU_ID2_VERSION_MAJOR) >> + GPU_ID2_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_MINOR) >> + GPU_ID2_VERSION_MINOR_SHIFT, + (gpu_id & GPU_ID2_VERSION_STATUS) >> + GPU_ID2_VERSION_STATUS_SHIFT); + + for (; *issues != BASE_HW_ISSUE_END; issues++) + set_bit(*issues, &kbdev->hw_issues_mask[0]); + + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h new file mode 100644 index 0000000..f386b16 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h @@ -0,0 +1,70 @@ +/* + * + * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file + * Run-time work-arounds helpers + */ + +#ifndef _KBASE_HW_H_ +#define _KBASE_HW_H_ + +#include "mali_kbase_defs.h" + +/** + * @brief Tell whether a work-around should be enabled + */ +#define kbase_hw_has_issue(kbdev, issue)\ + test_bit(issue, &(kbdev)->hw_issues_mask[0]) + +/** + * @brief Tell whether a feature is supported + */ +#define kbase_hw_has_feature(kbdev, feature)\ + test_bit(feature, &(kbdev)->hw_features_mask[0]) + +/** + * kbase_hw_set_issues_mask - Set the hardware issues mask based on the GPU ID + * @kbdev: Device pointer + * + * Return: 0 if the GPU ID was recognized, otherwise -EINVAL. + * + * The GPU ID is read from the @kbdev. + * + * In debugging versions of the driver, unknown versions of a known GPU with a + * new-format ID will be treated as the most recent known version not later + * than the actual version. In such circumstances, the GPU ID in @kbdev will + * also be replaced with the most recent known version. + * + * Note: The GPU configuration must have been read by + * kbase_gpuprops_get_props() before calling this function. + */ +int kbase_hw_set_issues_mask(struct kbase_device *kbdev); + +/** + * @brief Set the features mask depending on the GPU ID + */ +void kbase_hw_set_features_mask(struct kbase_device *kbdev); + +#endif /* _KBASE_HW_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h new file mode 100644 index 0000000..89df251 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * HW access backend common APIs + */ + +#ifndef _KBASE_HWACCESS_BACKEND_H_ +#define _KBASE_HWACCESS_BACKEND_H_ + +/** + * kbase_backend_devfreq_init - Perform backend devfreq related initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_backend_devfreq_init(struct kbase_device *kbdev); + +/** + * kbase_backend_devfreq_term - Perform backend-devfreq termination. + * @kbdev: Device pointer + */ +void kbase_backend_devfreq_term(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_BACKEND_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h new file mode 100644 index 0000000..124a2d9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h @@ -0,0 +1,51 @@ +/* + * + * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_hwaccess_gpu_defs.h + * HW access common definitions + */ + +#ifndef _KBASE_HWACCESS_DEFS_H_ +#define _KBASE_HWACCESS_DEFS_H_ + +#include + +/** + * struct kbase_hwaccess_data - object encapsulating the GPU backend specific + * data for the HW access layer. + * hwaccess_lock (a spinlock) must be held when + * accessing this structure. + * @active_kctx: pointer to active kbase context which last submitted an + * atom to GPU and while the context is active it can + * submit new atoms to GPU from the irq context also, without + * going through the bottom half of job completion path. + * @backend: GPU backend specific data for HW access layer + */ +struct kbase_hwaccess_data { + struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; + + struct kbase_backend_data backend; +}; + +#endif /* _KBASE_HWACCESS_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h new file mode 100644 index 0000000..3ae0dbe --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h @@ -0,0 +1,87 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +/** + * Base kernel property query backend APIs + */ + +#ifndef _KBASE_HWACCESS_GPUPROPS_H_ +#define _KBASE_HWACCESS_GPUPROPS_H_ + +/** + * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from + * GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * The caller should ensure that GPU remains powered-on during this function. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + +/** + * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read + * from GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * This function reads GPU properties that are dependent on the hardware + * features bitmask. It will power-on the GPU if required. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + +/** + * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read + * from GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + * + * This function reads L2_FEATURES register that is dependent on the hardware + * features bitmask. It will power-on the GPU if required. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + + +#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h new file mode 100644 index 0000000..be85491 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h @@ -0,0 +1,151 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * HW Access instrumentation common APIs + */ + +#ifndef _KBASE_HWACCESS_INSTR_H_ +#define _KBASE_HWACCESS_INSTR_H_ + +#include + +/** + * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. + * @dump_buffer: GPU address to write counters to. + * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. + * @jm_bm: counters selection bitmask (JM). + * @shader_bm: counters selection bitmask (Shader). + * @tiler_bm: counters selection bitmask (Tiler). + * @mmu_l2_bm: counters selection bitmask (MMU_L2). + * @use_secondary: use secondary performance counters set for applicable + * counter blocks. + */ +struct kbase_instr_hwcnt_enable { + u64 dump_buffer; + u64 dump_buffer_bytes; + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + bool use_secondary; +}; + +/** + * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection + * @kbdev: Kbase device + * @kctx: Kbase context + * @enable: HW counter setup parameters + * + * Context: might sleep, waiting for reset to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_instr_hwcnt_enable *enable); + +/** + * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection + * @kctx: Kbase context + * + * Context: might sleep, waiting for an ongoing dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU + * @kctx: Kbase context + * + * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, + * of call kbase_instr_hwcnt_wait_for_dump(). + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has + * completed. + * @kctx: Kbase context + * + * Context: will sleep, waiting for dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has + * completed + * @kctx: Kbase context + * @success: Set to true if successful + * + * Context: does not sleep. + * + * Return: true if the dump is complete + */ +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success); + +/** + * kbase_instr_hwcnt_clear() - Clear HW counters + * @kctx: Kbase context + * + * Context: might sleep, waiting for reset to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_clear(struct kbase_context *kctx); + +/** + * kbase_instr_backend_init() - Initialise the instrumentation backend + * @kbdev: Kbase device + * + * This function should be called during driver initialization. + * + * Return: 0 on success + */ +int kbase_instr_backend_init(struct kbase_device *kbdev); + +/** + * kbase_instr_backend_init() - Terminate the instrumentation backend + * @kbdev: Kbase device + * + * This function should be called during driver termination. + */ +void kbase_instr_backend_term(struct kbase_device *kbdev); + +#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +/** + * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the + * hardware counter set. + * @kbdev: kbase device + */ +void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev); +#endif + +#endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h new file mode 100644 index 0000000..3d5934e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -0,0 +1,302 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * HW access job manager common APIs + */ + +#ifndef _KBASE_HWACCESS_JM_H_ +#define _KBASE_HWACCESS_JM_H_ + +/** + * kbase_backend_run_atom() - Run an atom on the GPU + * @kbdev: Device pointer + * @atom: Atom to run + * + * Caller must hold the HW access lock + */ +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_backend_slot_update - Update state based on slot ringbuffers + * + * @kbdev: Device pointer + * + * Inspect the jobs in the slot ringbuffers and update state. + * + * This will cause jobs to be submitted to hardware if they are unblocked + */ +void kbase_backend_slot_update(struct kbase_device *kbdev); + +/** + * kbase_backend_find_and_release_free_address_space() - Release a free AS + * @kbdev: Device pointer + * @kctx: Context pointer + * + * This function can evict an idle context from the runpool, freeing up the + * address space it was using. + * + * The address space is marked as in use. The caller must either assign a + * context using kbase_gpu_use_ctx(), or release it using + * kbase_ctx_sched_release() + * + * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none + * available + */ +int kbase_backend_find_and_release_free_address_space( + struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the + * provided address space. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @as_nr: Free address space to use + * + * kbase_gpu_next_job() will pull atoms from the active context. + * + * Return: true if successful, false if ASID not assigned. + */ +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr); + +/** + * kbase_backend_use_ctx_sched() - Activate a context. + * @kbdev: Device pointer + * @kctx: Context pointer + * @js: Job slot to activate context on + * + * kbase_gpu_next_job() will pull atoms from the active context. + * + * The context must already be scheduled and assigned to an address space. If + * the context is not scheduled, then kbase_gpu_use_ctx() should be used + * instead. + * + * Caller must hold hwaccess_lock + * + * Return: true if context is now active, false otherwise (ie if context does + * not have an address space assigned) + */ +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx, int js); + +/** + * kbase_backend_release_ctx_irq - Release a context from the GPU. This will + * de-assign the assigned address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * Caller must hold kbase_device->mmu_hw_mutex and hwaccess_lock + */ +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will + * de-assign the assigned address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * Caller must hold kbase_device->mmu_hw_mutex + * + * This function must perform any operations that could not be performed in IRQ + * context by kbase_backend_release_ctx_irq(). + */ +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_cache_clean - Perform a cache clean if the given atom requires + * one + * @kbdev: Device pointer + * @katom: Pointer to the failed atom + * + * On some GPUs, the GPU cache must be cleaned following a failed atom. This + * function performs a clean if it is required by @katom. + */ +void kbase_backend_cache_clean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + + +/** + * kbase_backend_complete_wq() - Perform backend-specific actions required on + * completing an atom. + * @kbdev: Device pointer + * @katom: Pointer to the atom to complete + * + * This function should only be called from kbase_jd_done_worker() or + * js_return_worker(). + * + * Return: true if atom has completed, false if atom should be re-submitted + */ +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_backend_complete_wq_post_sched - Perform backend-specific actions + * required on completing an atom, after + * any scheduling has taken place. + * @kbdev: Device pointer + * @core_req: Core requirements of atom + * + * This function should only be called from kbase_jd_done_worker() or + * js_return_worker(). + */ +void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, + base_jd_core_req core_req); + +/** + * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU + * and remove any others from the ringbuffers. + * @kbdev: Device pointer + * @end_timestamp: Timestamp of reset + */ +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); + +/** + * kbase_backend_inspect_tail - Return the atom currently at the tail of slot + * @js + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Atom currently at the head of slot @js, or NULL + */ +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js); + +/** + * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a + * slot. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of atoms currently on slot + */ +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); + +/** + * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot + * that are currently on the GPU. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of atoms currently on slot @js that are currently on the GPU. + */ +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); + +/** + * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs + * has changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg starting/stopping + * scheduling timers). + */ +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); + +/** + * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg updating timeouts of + * currently running atoms). + */ +void kbase_backend_timeouts_changed(struct kbase_device *kbdev); + +/** + * kbase_backend_slot_free() - Return the number of jobs that can be currently + * submitted to slot @js. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of jobs that can be submitted. + */ +int kbase_backend_slot_free(struct kbase_device *kbdev, int js); + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_jm_kill_running_jobs_from_kctx - Kill all jobs that are + * currently running on GPU from a context + * @kctx: Context pointer + * + * This is used in response to a page fault to remove all jobs from the faulting + * context from the hardware. + * + * Caller must hold hwaccess_lock. + */ +void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx); + +/** + * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and + * to be descheduled. + * @kctx: Context pointer + * + * This should be called following kbase_js_zap_context(), to ensure the context + * can be safely destroyed. + */ +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); + +/** + * kbase_backend_get_current_flush_id - Return the current flush ID + * + * @kbdev: Device pointer + * + * Return: the current flush ID to be recorded for each job chain + */ +u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev); + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom); + +/** + * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms + * currently on the GPU + * @kbdev: Device pointer + * + * Return: true if there are any atoms on the GPU, false otherwise + */ +bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h new file mode 100644 index 0000000..bbaf6ea --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h @@ -0,0 +1,229 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * @file mali_kbase_hwaccess_pm.h + * HW access power manager common APIs + */ + +#ifndef _KBASE_HWACCESS_PM_H_ +#define _KBASE_HWACCESS_PM_H_ + +#include +#include + +#include + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/* Functions common to all HW access backends */ + +/** + * Initialize the power management framework. + * + * Must be called before any other power management function + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the power management framework was successfully initialized. + */ +int kbase_hwaccess_pm_init(struct kbase_device *kbdev); + +/** + * Terminate the power management framework. + * + * No power management functions may be called after this + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_hwaccess_pm_term(struct kbase_device *kbdev); + +/** + * kbase_hwaccess_pm_powerup - Power up the GPU. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags to pass on to kbase_pm_init_hw + * + * Power up GPU after all modules have been initialized and interrupt handlers + * installed. + * + * Return: 0 if powerup was successful. + */ +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags); + +/** + * Halt the power management framework. + * + * Should ensure that no new interrupts are generated, but allow any currently + * running interrupt handlers to complete successfully. The GPU is forced off by + * the time this function returns, regardless of whether or not the active power + * policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); + +/** + * Perform any backend-specific actions to suspend the GPU + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); + +/** + * Perform any backend-specific actions to resume the GPU from a suspend + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + +/** + * Perform any required actions for activating the GPU. Called when the first + * context goes active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + +/** + * Perform any required actions for idling the GPU. Called when the last + * context goes idle. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); + + +/** + * Set the debug core mask. + * + * This determines which cores the power manager is allowed to use. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * @param new_core_mask_js0 The core mask to use for job slot 0 + * @param new_core_mask_js0 The core mask to use for job slot 1 + * @param new_core_mask_js0 The core mask to use for job slot 2 + */ +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask_js0, u64 new_core_mask_js1, + u64 new_core_mask_js2); + + +/** + * Get the current policy. + * + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return The current policy + */ +const struct kbase_pm_ca_policy +*kbase_pm_ca_get_policy(struct kbase_device *kbdev); + +/** + * Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param policy The policy to change to (valid pointer returned from + * @ref kbase_pm_ca_list_policies) + */ +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_ca_policy *policy); + +/** + * Retrieve a static list of the available policies. + * + * @param[out] policies An array pointer to take the list of policies. This may + * be NULL. The contents of this array must not be + * modified. + * + * @return The number of policies + */ +int +kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); + + +/** + * Get the current policy. + * + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return The current policy + */ +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + +/** + * Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param policy The policy to change to (valid pointer returned from + * @ref kbase_pm_list_policies) + */ +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *policy); + +/** + * kbase_pm_list_policies - Retrieve a static list of the available policies. + * + * @kbdev: The kbase device structure for the device. + * @list: An array pointer to take the list of policies. This may be NULL. + * The contents of this array must not be modified. + * + * Return: The number of policies + */ +int kbase_pm_list_policies(struct kbase_device *kbdev, + const struct kbase_pm_policy * const **list); + +/** + * kbase_protected_most_enable - Enable protected mode + * + * @kbdev: Address of the instance of a GPU platform device. + * + * Return: Zero on success or an error code + */ +int kbase_pm_protected_mode_enable(struct kbase_device *kbdev); + +/** + * kbase_protected_mode_disable - Disable protected mode + * + * @kbdev: Address of the instance of a GPU platform device. + * + * Return: Zero on success or an error code + */ +int kbase_pm_protected_mode_disable(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_PM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h new file mode 100644 index 0000000..a61e5b9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h @@ -0,0 +1,42 @@ +/* + * + * (C) COPYRIGHT 2014,2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/** + * + */ + +#ifndef _KBASE_BACKEND_TIME_H_ +#define _KBASE_BACKEND_TIME_H_ + +/** + * kbase_backend_get_gpu_time() - Get current GPU time + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts); + +#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c new file mode 100644 index 0000000..14ec5cb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c @@ -0,0 +1,800 @@ +/* + * + * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Implementation of hardware counter context and accumulator APIs. + */ + +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_backend.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include +#include +#include + +/** + * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. + * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. + * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. + * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are + * any enabled counters. + */ +enum kbase_hwcnt_accum_state { + ACCUM_STATE_ERROR, + ACCUM_STATE_DISABLED, + ACCUM_STATE_ENABLED +}; + +/** + * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. + * @backend: Pointer to created counter backend. + * @state: The current state of the accumulator. + * - State transition from disabled->enabled or + * disabled->error requires state_lock. + * - State transition from enabled->disabled or + * enabled->error requires both accum_lock and + * state_lock. + * - Error state persists until next disable. + * @enable_map: The current set of enabled counters. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map_any_enabled. + * @enable_map_any_enabled: True if any counters in the map are enabled, else + * false. If true, and state is ACCUM_STATE_ENABLED, + * then the counter backend will be enabled. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map. + * @scratch_map: Scratch enable map, used as temporary enable map + * storage during dumps. + * - Must only be read or modified while holding + * accum_lock. + * @accum_buf: Accumulation buffer, where dumps will be accumulated + * into on transition to a disable state. + * - Must only be read or modified while holding + * accum_lock. + * @accumulated: True if the accumulation buffer has been accumulated + * into and not subsequently read from yet, else false. + * - Must only be read or modified while holding + * accum_lock. + * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent + * dump that was requested by the user. + * - Must only be read or modified while holding + * accum_lock. + */ +struct kbase_hwcnt_accumulator { + struct kbase_hwcnt_backend *backend; + enum kbase_hwcnt_accum_state state; + struct kbase_hwcnt_enable_map enable_map; + bool enable_map_any_enabled; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool accumulated; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_context - Hardware counter context structure. + * @iface: Pointer to hardware counter backend interface. + * @state_lock: Spinlock protecting state. + * @disable_count: Disable count of the context. Initialised to 1. + * Decremented when the accumulator is acquired, and incremented + * on release. Incremented on calls to + * kbase_hwcnt_context_disable[_atomic], and decremented on + * calls to kbase_hwcnt_context_enable. + * - Must only be read or modified while holding state_lock. + * @accum_lock: Mutex protecting accumulator. + * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or + * termination. Set to true before accumulator initialisation, + * and false after accumulator termination. + * - Must only be modified while holding both accum_lock and + * state_lock. + * - Can be read while holding either lock. + * @accum: Hardware counter accumulator structure. + */ +struct kbase_hwcnt_context { + const struct kbase_hwcnt_backend_interface *iface; + spinlock_t state_lock; + size_t disable_count; + struct mutex accum_lock; + bool accum_inited; + struct kbase_hwcnt_accumulator accum; +}; + +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx) +{ + struct kbase_hwcnt_context *hctx = NULL; + + if (!iface || !out_hctx) + return -EINVAL; + + hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); + if (!hctx) + return -ENOMEM; + + hctx->iface = iface; + spin_lock_init(&hctx->state_lock); + hctx->disable_count = 1; + mutex_init(&hctx->accum_lock); + hctx->accum_inited = false; + + *out_hctx = hctx; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); + +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return; + + /* Make sure we didn't leak the accumulator */ + WARN_ON(hctx->accum_inited); + kfree(hctx); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); + +/** + * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) +{ + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); + kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); + kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); + hctx->iface->term(hctx->accum.backend); + memset(&hctx->accum, 0, sizeof(hctx->accum)); +} + +/** + * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) +{ + int errcode; + + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + errcode = hctx->iface->init( + hctx->iface->info, &hctx->accum.backend); + if (errcode) + goto error; + + hctx->accum.state = ACCUM_STATE_ERROR; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.enable_map); + if (errcode) + goto error; + + hctx->accum.enable_map_any_enabled = false; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hctx->iface->metadata, &hctx->accum.accum_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hctx->iface->metadata, &hctx->accum.scratch_map); + if (errcode) + goto error; + + hctx->accum.accumulated = false; + + hctx->accum.ts_last_dump_ns = + hctx->iface->timestamp_ns(hctx->accum.backend); + + return 0; + +error: + kbasep_hwcnt_accumulator_term(hctx); + return errcode; +} + +/** + * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the + * disabled state, from the enabled or + * error states. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_accumulator_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + int errcode = 0; + bool backend_enabled = false; + struct kbase_hwcnt_accumulator *accum; + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + WARN_ON(!hctx->accum_inited); + + accum = &hctx->accum; + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count != 0); + WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); + + if ((hctx->accum.state == ACCUM_STATE_ENABLED) && + (accum->enable_map_any_enabled)) + backend_enabled = true; + + if (!backend_enabled) + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Early out if the backend is not already enabled */ + if (!backend_enabled) + return; + + if (!accumulate) + goto disable; + + /* Try and accumulate before disabling */ + errcode = hctx->iface->dump_request(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_get(accum->backend, + &accum->accum_buf, &accum->enable_map, accum->accumulated); + if (errcode) + goto disable; + + accum->accumulated = true; + +disable: + hctx->iface->dump_disable(accum->backend); + + /* Regardless of any errors during the accumulate, put the accumulator + * in the disabled state. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} + +/** + * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the + * enabled state, from the disabled state. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) +{ + int errcode = 0; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->state_lock); + WARN_ON(!hctx->accum_inited); + WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); + + accum = &hctx->accum; + + /* The backend only needs enabling if any counters are enabled */ + if (accum->enable_map_any_enabled) + errcode = hctx->iface->dump_enable_nolock( + accum->backend, &accum->enable_map); + + if (!errcode) + accum->state = ACCUM_STATE_ENABLED; + else + accum->state = ACCUM_STATE_ERROR; +} + +/** + * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date + * values of enabled counters possible, and + * optionally update the set of enabled + * counters. + * @hctx : Non-NULL pointer to the hardware counter context + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * @new_map: Pointer to the new counter enable map. If non-NULL, must have + * the same metadata as the accumulator. If NULL, the set of + * enabled counters will be unchanged. + */ +static int kbasep_hwcnt_accumulator_dump( + struct kbase_hwcnt_context *hctx, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf, + const struct kbase_hwcnt_enable_map *new_map) +{ + int errcode = 0; + unsigned long flags; + enum kbase_hwcnt_accum_state state; + bool dump_requested = false; + bool dump_written = false; + bool cur_map_any_enabled; + struct kbase_hwcnt_enable_map *cur_map; + bool new_map_any_enabled = false; + u64 dump_time_ns; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); + WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); + WARN_ON(!hctx->accum_inited); + lockdep_assert_held(&hctx->accum_lock); + + accum = &hctx->accum; + cur_map = &accum->scratch_map; + + /* Save out info about the current enable map */ + cur_map_any_enabled = accum->enable_map_any_enabled; + kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); + + if (new_map) + new_map_any_enabled = + kbase_hwcnt_enable_map_any_enabled(new_map); + + /* + * We're holding accum_lock, so the accumulator state might transition + * from disabled to enabled during this function (as enabling is lock + * free), but it will never disable (as disabling needs to hold the + * accum_lock), nor will it ever transition from enabled to error (as + * an enable while we're already enabled is impossible). + * + * If we're already disabled, we'll only look at the accumulation buffer + * rather than do a real dump, so a concurrent enable does not affect + * us. + * + * If a concurrent enable fails, we might transition to the error + * state, but again, as we're only looking at the accumulation buffer, + * it's not an issue. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + state = accum->state; + + /* + * Update the new map now, such that if an enable occurs during this + * dump then that enable will set the new map. If we're already enabled, + * then we'll do it ourselves after the dump. + */ + if (new_map) { + kbase_hwcnt_enable_map_copy( + &accum->enable_map, new_map); + accum->enable_map_any_enabled = new_map_any_enabled; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Error state, so early out. No need to roll back any map updates */ + if (state == ACCUM_STATE_ERROR) + return -EIO; + + /* Initiate the dump if the backend is enabled. */ + if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { + /* Disable pre-emption, to make the timestamp as accurate as + * possible. + */ + preempt_disable(); + { + dump_time_ns = hctx->iface->timestamp_ns( + accum->backend); + if (dump_buf) { + errcode = hctx->iface->dump_request( + accum->backend); + dump_requested = true; + } else { + errcode = hctx->iface->dump_clear( + accum->backend); + } + } + preempt_enable(); + if (errcode) + goto error; + } else { + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + } + + /* Copy any accumulation into the dest buffer */ + if (accum->accumulated && dump_buf) { + kbase_hwcnt_dump_buffer_copy( + dump_buf, &accum->accum_buf, cur_map); + dump_written = true; + } + + /* Wait for any requested dumps to complete */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto error; + } + + /* If we're enabled and there's a new enable map, change the enabled set + * as soon after the dump has completed as possible. + */ + if ((state == ACCUM_STATE_ENABLED) && new_map) { + /* Backend is only enabled if there were any enabled counters */ + if (cur_map_any_enabled) + hctx->iface->dump_disable(accum->backend); + + /* (Re-)enable the backend if the new map has enabled counters. + * No need to acquire the spinlock, as concurrent enable while + * we're already enabled and holding accum_lock is impossible. + */ + if (new_map_any_enabled) { + errcode = hctx->iface->dump_enable( + accum->backend, new_map); + if (errcode) + goto error; + } + } + + /* Copy, accumulate, or zero into the dest buffer to finish */ + if (dump_buf) { + /* If we dumped, copy or accumulate it into the destination */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_get( + accum->backend, + dump_buf, + cur_map, + dump_written); + if (errcode) + goto error; + dump_written = true; + } + + /* If we've not written anything into the dump buffer so far, it + * means there was nothing to write. Zero any enabled counters. + */ + if (!dump_written) + kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + } + + /* Write out timestamps */ + *ts_start_ns = accum->ts_last_dump_ns; + *ts_end_ns = dump_time_ns; + + accum->accumulated = false; + accum->ts_last_dump_ns = dump_time_ns; + + return 0; +error: + /* An error was only physically possible if the backend was enabled */ + WARN_ON(state != ACCUM_STATE_ENABLED); + + /* Disable the backend, and transition to the error state */ + hctx->iface->dump_disable(accum->backend); + spin_lock_irqsave(&hctx->state_lock, flags); + + accum->state = ACCUM_STATE_ERROR; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return errcode; +} + +/** + * kbasep_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_context_disable( + struct kbase_hwcnt_context *hctx, bool accumulate) +{ + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + + if (!kbase_hwcnt_context_disable_atomic(hctx)) { + kbasep_hwcnt_accumulator_disable(hctx, accumulate); + + spin_lock_irqsave(&hctx->state_lock, flags); + + /* Atomic disable failed and we're holding the mutex, so current + * disable count must be 0. + */ + WARN_ON(hctx->disable_count != 0); + hctx->disable_count++; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + } +} + +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum) +{ + int errcode = 0; + unsigned long flags; + + if (!hctx || !accum) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!hctx->accum_inited) + /* Set accum initing now to prevent concurrent init */ + hctx->accum_inited = true; + else + /* Already have an accum, or already being inited */ + errcode = -EBUSY; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + if (errcode) + return errcode; + + errcode = kbasep_hwcnt_accumulator_init(hctx); + + if (errcode) { + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + return errcode; + } + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count == 0); + WARN_ON(hctx->accum.enable_map_any_enabled); + + /* Decrement the disable count to allow the accumulator to be accessible + * now that it's fully constructed. + */ + hctx->disable_count--; + + /* + * Make sure the accumulator is initialised to the correct state. + * Regardless of initial state, counters don't need to be enabled via + * the backend, as the initial enable map has no enabled counters. + */ + hctx->accum.state = (hctx->disable_count == 0) ? + ACCUM_STATE_ENABLED : + ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + *accum = &hctx->accum; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); + +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) +{ + unsigned long flags; + struct kbase_hwcnt_context *hctx; + + if (!accum) + return; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + mutex_lock(&hctx->accum_lock); + + /* Double release is a programming error */ + WARN_ON(!hctx->accum_inited); + + /* Disable the context to ensure the accumulator is inaccesible while + * we're destroying it. This performs the corresponding disable count + * increment to the decrement done during acquisition. + */ + kbasep_hwcnt_context_disable(hctx, false); + + mutex_unlock(&hctx->accum_lock); + + kbasep_hwcnt_accumulator_term(hctx); + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); + +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) +{ + if (WARN_ON(!hctx)) + return; + + /* Try and atomically disable first, so we can avoid locking the mutex + * if we don't need to. + */ + if (kbase_hwcnt_context_disable_atomic(hctx)) + return; + + mutex_lock(&hctx->accum_lock); + + kbasep_hwcnt_context_disable(hctx, true); + + mutex_unlock(&hctx->accum_lock); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); + +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + bool atomic_disabled = false; + + if (WARN_ON(!hctx)) + return false; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { + /* + * If disable count is non-zero, we can just bump the disable + * count. + * + * Otherwise, we can't disable in an atomic context. + */ + if (hctx->disable_count != 0) { + hctx->disable_count++; + atomic_disabled = true; + } + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return atomic_disabled; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); + +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + + if (WARN_ON(!hctx)) + return; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == 0)) { + if (hctx->disable_count == 1) + kbasep_hwcnt_accumulator_enable(hctx); + + hctx->disable_count--; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); + +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return NULL; + + return hctx->iface->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); + +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !new_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if ((new_map->metadata != hctx->iface->metadata) || + (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); + +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump( + hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); + +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) +{ + struct kbase_hwcnt_context *hctx; + + if (WARN_ON(!accum)) + return 0; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + return hctx->iface->timestamp_ns(accum->backend); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h new file mode 100644 index 0000000..eb82ea4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h @@ -0,0 +1,146 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter accumulator API. + */ + +#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ +#define _KBASE_HWCNT_ACCUMULATOR_H_ + +#include + +struct kbase_hwcnt_context; +struct kbase_hwcnt_accumulator; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator + * for a hardware counter context. + * @hctx: Non-NULL pointer to a hardware counter context. + * @accum: Non-NULL pointer to where the pointer to the created accumulator + * will be stored on success. + * + * There can exist at most one instance of the hardware counter accumulator per + * context at a time. + * + * If multiple clients need access to the hardware counters at the same time, + * then an abstraction built on top of the single instance to the hardware + * counter accumulator is required. + * + * No counters will be enabled with the returned accumulator. A subsequent call + * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. + * + * There are four components to a hardware counter dump: + * - A set of enabled counters + * - A start time + * - An end time + * - A dump buffer containing the accumulated counter values for all enabled + * counters between the start and end times. + * + * For each dump, it is guaranteed that all enabled counters were active for the + * entirety of the period between the start and end times. + * + * It is also guaranteed that the start time of dump "n" is always equal to the + * end time of dump "n - 1". + * + * For all dumps, the values of any counters that were not enabled is undefined. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_acquire( + struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum); + +/** + * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * The accumulator must be released before the context the accumulator was + * created from is terminated. + */ +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); + +/** + * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently + * enabled counters, and enable a new + * set of counters that will be used + * for subsequent dumps. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @new_map: Non-NULL pointer to the new counter enable map. Must have the + * same metadata as the accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_set_counters( + struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled + * counters. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_dump( + struct kbase_hwcnt_accumulator *accum, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend + * timestamp. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * Return: Accumulator backend timestamp in nanoseconds. + */ +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum); + +#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h new file mode 100644 index 0000000..b7aa0e1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h @@ -0,0 +1,217 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Virtual interface for hardware counter backends. + */ + +#ifndef _KBASE_HWCNT_BACKEND_H_ +#define _KBASE_HWCNT_BACKEND_H_ + +#include + +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to information used to + * create an instance of a hardware counter + * backend. + */ +struct kbase_hwcnt_backend_info; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter + * backend, used to perform dumps. + */ +struct kbase_hwcnt_backend; + +/** + * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * All uses of the created hardware counter backend must be externally + * synchronised. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_init_fn)( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); + +/** + * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. + * @backend: Pointer to backend to be terminated. + */ +typedef void (*kbase_hwcnt_backend_term_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend + * timestamp. + * @backend: Non-NULL pointer to backend. + * + * Return: Backend timestamp in nanoseconds. + */ +typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the + * backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * The enable_map must have been created using the interface's metadata. + * If the backend has already been enabled, an error is returned. + * + * May be called in an atomic context. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping + * with the backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be + * called in an atomic context with the spinlock documented by the specific + * backend interface held. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with + * the backend. + * @backend: Non-NULL pointer to backend. + * + * If the backend is already disabled, does nothing. + * Any undumped counter values since the last dump get will be lost. + */ +typedef void (*kbase_hwcnt_backend_dump_disable_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped + * counters. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_clear_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter + * dump. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled or another dump is already in progress, + * returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_request_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested + * counter dump has completed. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_wait_fn)( + struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the + * counters dumped after the last dump + * request into the dump buffer. + * @backend: Non-NULL pointer to backend. + * @dump_buffer: Non-NULL pointer to destination dump buffer. + * @enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters should be accumulated into dump_buffer, rather + * than copied. + * + * If the backend is not enabled, returns an error. + * If a dump is in progress (i.e. dump_wait has not yet returned successfully) + * then the resultant contents of the dump buffer will be undefined. + * + * Return: 0 on success, else error code. + */ +typedef int (*kbase_hwcnt_backend_dump_get_fn)( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); + +/** + * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual + * interface. + * @metadata: Immutable hardware counter metadata. + * @info: Immutable info used to initialise an instance of the + * backend. + * @init: Function ptr to initialise an instance of the backend. + * @term: Function ptr to terminate an instance of the backend. + * @timestamp_ns: Function ptr to get the current backend timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_enable_nolock: Function ptr to enable dumping while the + * backend-specific spinlock is already held. + * @dump_disable: Function ptr to disable dumping. + * @dump_clear: Function ptr to clear counters. + * @dump_request: Function ptr to request a dump. + * @dump_wait: Function ptr to wait until dump to complete. + * @dump_get: Function ptr to copy or accumulate dump into a dump + * buffer. + */ +struct kbase_hwcnt_backend_interface { + const struct kbase_hwcnt_metadata *metadata; + const struct kbase_hwcnt_backend_info *info; + kbase_hwcnt_backend_init_fn init; + kbase_hwcnt_backend_term_fn term; + kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; + kbase_hwcnt_backend_dump_enable_fn dump_enable; + kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; + kbase_hwcnt_backend_dump_disable_fn dump_disable; + kbase_hwcnt_backend_dump_clear_fn dump_clear; + kbase_hwcnt_backend_dump_request_fn dump_request; + kbase_hwcnt_backend_dump_wait_fn dump_wait; + kbase_hwcnt_backend_dump_get_fn dump_get; +}; + +#endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h new file mode 100644 index 0000000..bc50ad1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h @@ -0,0 +1,119 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter context API. + */ + +#ifndef _KBASE_HWCNT_CONTEXT_H_ +#define _KBASE_HWCNT_CONTEXT_H_ + +#include + +struct kbase_hwcnt_backend_interface; +struct kbase_hwcnt_context; + +/** + * kbase_hwcnt_context_init() - Initialise a hardware counter context. + * @iface: Non-NULL pointer to a hardware counter backend interface. + * @out_hctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * On creation, the disable count of the context will be 0. + * A hardware counter accumulator can be acquired using a created context. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_context_init( + const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx); + +/** + * kbase_hwcnt_context_term() - Terminate a hardware counter context. + * @hctx: Pointer to context to be terminated. + */ +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by + * the context, so related counter data + * structures can be created. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( + struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function increments the disable count from 0 to 1, and + * an accumulator has been acquired, then a counter dump will be performed + * before counters are disabled via the backend interface. + * + * Subsequent dumps via the accumulator while counters are disabled will first + * return the accumulated dump, then will return dumps with zeroed counters. + * + * After this function call returns, it is guaranteed that counters will not be + * enabled via the backend interface. + */ +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the + * context if possible in an atomic + * context. + * @hctx: Pointer to the hardware counter context. + * + * This function will only succeed if hardware counters are effectively already + * disabled, i.e. there is no accumulator, the disable count is already + * non-zero, or the accumulator has no counters set. + * + * After this function call returns true, it is guaranteed that counters will + * not be enabled via the backend interface. + * + * Return: True if the disable count was incremented, else False. + */ +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_enable() - Decrement the disable count of the context. + * @hctx: Pointer to the hardware counter context. + * + * If a call to this function decrements the disable count from 1 to 0, and + * an accumulator has been acquired, then counters will be re-enabled via the + * backend interface. + * + * If an accumulator has been acquired and enabling counters fails for some + * reason, the accumulator will be placed into an error state. + * + * It is only valid to call this function one time for each prior returned call + * to kbase_hwcnt_context_disable. + * + * The spinlock documented in the backend interface that was passed in to + * kbase_hwcnt_context_init() must be held before calling this function. + */ +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + +#endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c new file mode 100644 index 0000000..095c765 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c @@ -0,0 +1,773 @@ +/* + * + * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#ifdef CONFIG_MALI_NO_MALI +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 +#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 +#define KBASE_HWCNT_V4_MAX_GROUPS \ + (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) +#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V4 counter block */ +#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 + +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) +/* Index of the PRFCNT_EN header into a V5 counter block */ +#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 + +/** + * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter + * metadata for a v4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * @metadata: Non-NULL pointer to where created metadata is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v4_create( + const struct kbase_hwcnt_gpu_v4_info *v4_info, + const struct kbase_hwcnt_metadata **metadata) +{ + size_t grp; + int errcode = -ENOMEM; + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description *grps; + size_t avail_mask_bit; + + WARN_ON(!v4_info); + WARN_ON(!metadata); + + /* Check if there are enough bits in the availability mask to represent + * all the hardware counter blocks in the system. + */ + if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) + return -EINVAL; + + grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); + if (!grps) + goto clean_up; + + desc.grp_cnt = v4_info->cg_count; + desc.grps = grps; + + for (grp = 0; grp < v4_info->cg_count; grp++) { + size_t blk; + size_t sc; + const u64 core_mask = v4_info->cgs[grp].core_mask; + struct kbase_hwcnt_block_description *blks = kcalloc( + KBASE_HWCNT_V4_BLOCKS_PER_GROUP, + sizeof(*blks), + GFP_KERNEL); + + if (!blks) + goto clean_up; + + grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; + grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; + grps[grp].blks = blks; + + for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { + blks[blk].inst_cnt = 1; + blks[blk].hdr_cnt = + KBASE_HWCNT_V4_HEADERS_PER_BLOCK; + blks[blk].ctr_cnt = + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; + } + + for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { + blks[sc].type = core_mask & (1ull << sc) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + } + + blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; + blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; + blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + blks[7].type = (grp == 0) ? + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + + WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); + } + + /* Initialise the availability mask */ + desc.avail_mask = 0; + avail_mask_bit = 0; + + for (grp = 0; grp < desc.grp_cnt; grp++) { + size_t blk; + const struct kbase_hwcnt_block_description *blks = + desc.grps[grp].blks; + for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { + WARN_ON(blks[blk].inst_cnt != 1); + if (blks[blk].type != + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) + desc.avail_mask |= (1ull << avail_mask_bit); + + avail_mask_bit++; + } + } + + errcode = kbase_hwcnt_metadata_create(&desc, metadata); + + /* Always clean up, as metadata will make a copy of the input args */ +clean_up: + if (grps) { + for (grp = 0; grp < v4_info->cg_count; grp++) + kfree(grps[grp].blks); + kfree(grps); + } + return errcode; +} + +/** + * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a + * V4 GPU. + * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. + * + * Return: Size of buffer the V4 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( + const struct kbase_hwcnt_gpu_v4_info *v4_info) +{ + return v4_info->cg_count * + KBASE_HWCNT_V4_BLOCKS_PER_GROUP * + KBASE_HWCNT_V4_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +/** + * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter + * metadata for a v5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @metadata: Non-NULL pointer to where created metadata is stored + * on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_v5_create( + const struct kbase_hwcnt_gpu_v5_info *v5_info, + bool use_secondary, + const struct kbase_hwcnt_metadata **metadata) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description + blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t non_sc_block_count; + size_t sc_block_count; + + WARN_ON(!v5_info); + WARN_ON(!metadata); + + /* Calculate number of block instances that aren't shader cores */ + non_sc_block_count = 2 + v5_info->l2_count; + /* Calculate number of block instances that are shader cores */ + sc_block_count = fls64(v5_info->core_mask); + + /* + * A system can have up to 64 shader cores, but the 64-bit + * availability mask can't physically represent that many cores as well + * as the other hardware blocks. + * Error out if there are more blocks than our implementation can + * support. + */ + if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + + /* One Job Manager block */ + blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; + blks[0].inst_cnt = 1; + blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* One Tiler block */ + blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + blks[1].inst_cnt = 1; + blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* l2_count memsys blks */ + blks[2].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; + blks[2].inst_cnt = v5_info->l2_count; + blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + /* + * There are as many shader cores in the system as there are bits set in + * the core mask. However, the dump buffer memory requirements need to + * take into account the fact that the core mask may be non-contiguous. + * + * For example, a system with a core mask of 0b1011 has the same dump + * buffer memory requirements as a system with 0b1111, but requires more + * memory than a system with 0b0111. However, core 2 of the system with + * 0b1011 doesn't physically exist, and the dump buffer memory that + * accounts for that core will never be written to when we do a counter + * dump. + * + * We find the core mask's last set bit to determine the memory + * requirements, and embed the core mask into the availability mask so + * we can determine later which shader cores physically exist. + */ + blks[3].type = use_secondary ? + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + blks[3].inst_cnt = sc_block_count; + blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + + WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + + group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; + group.blks = blks; + + desc.grp_cnt = 1; + desc.grps = &group; + + /* The JM, Tiler, and L2s are always available, and are before cores */ + desc.avail_mask = (1ull << non_sc_block_count) - 1; + /* Embed the core mask directly in the availability mask */ + desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); + + return kbase_hwcnt_metadata_create(&desc, metadata); +} + +/** + * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a + * V5 GPU. + * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * + * Return: Size of buffer the V5 GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( + const struct kbase_hwcnt_gpu_v5_info *v5_info) +{ + WARN_ON(!v5_info); + return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * + KBASE_HWCNT_V5_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; +} + +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) +{ + if (!kbdev || !info) + return -EINVAL; + +#ifdef CONFIG_MALI_NO_MALI + /* NO_MALI uses V5 layout, regardless of the underlying platform. */ + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +#else + { + const struct base_gpu_props *props = &kbdev->gpu_props.props; + const size_t l2_count = props->l2_props.num_l2_slices; + const size_t core_mask = + props->coherency_info.group[0].core_mask; + + info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + info->v5.l2_count = l2_count; + info->v5.core_mask = core_mask; + } +#endif + return 0; +} + +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + + if (!info || !out_metadata || !out_dump_bytes) + return -EINVAL; + + switch (info->type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); + errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( + &info->v4, &metadata); + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); + errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( + &info->v5, use_secondary, &metadata); + break; + default: + return -EINVAL; + } + if (errcode) + return errcode; + + /* + * Dump abstraction size should be exactly the same size and layout as + * the physical dump size, for backwards compatibility. + */ + WARN_ON(dump_bytes != metadata->dump_buf_bytes); + + *out_metadata = metadata; + *out_dump_bytes = dump_bytes; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); + +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); + +static bool is_block_type_shader( + const u64 grp_type, + const u64 blk_type, + const size_t blk) +{ + bool is_shader = false; + + switch (grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + /* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1] + * corresponds to a shader, or its implementation + * reserved. As such, here we use the blk index value to + * tell the reserved case. + */ + if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER || + (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP && + blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)) + is_shader = true; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2) + is_shader = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + + return is_shader; +} + +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + u64 pm_core_mask, + bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + const u32 *dump_src; + size_t src_offset, grp, blk, blk_inst; + size_t grp_prev = 0; + u64 core_mask = pm_core_mask; + + if (!dst || !src || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + dump_src = (const u32 *)src; + src_offset = 0; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const size_t hdr_cnt = + kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const bool is_shader_core = is_block_type_shader( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type, blk); + + if (grp != grp_prev) { + /* grp change would only happen with V4. V5 and + * further are envisaged to be single group + * scenario only. Here needs to drop the lower + * group core-mask by shifting right with + * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP. + */ + core_mask = pm_core_mask >> + KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; + grp_prev = grp; + } + + /* Early out if no values in the dest block are enabled */ + if (kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = dump_src + src_offset; + + if (!is_shader_core || (core_mask & 1)) { + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, + ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); + } + } else if (!accumulate) { + kbase_hwcnt_dump_buffer_block_zero( + dst_blk, (hdr_cnt + ctr_cnt)); + } + } + + src_offset += (hdr_cnt + ctr_cnt); + if (is_shader_core) + core_mask = core_mask >> 1; + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); + +/** + * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block + * enable map abstraction to + * a physical block enable + * map. + * @lo: Low 64 bits of block enable map abstraction. + * @hi: High 64 bits of block enable map abstraction. + * + * The abstraction uses 128 bits to enable 128 block values, whereas the + * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. + * Therefore, this conversion is lossy. + * + * Return: 32-bit physical block enable map. + */ +static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical( + u64 lo, + u64 hi) +{ + u32 phys = 0; + u64 dwords[2] = {lo, hi}; + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u64 dword = dwords[dword_idx]; + u16 packed = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u16 mask = + ((dword >> (dword_bit + 0)) & 0x1) | + ((dword >> (dword_bit + 1)) & 0x1) | + ((dword >> (dword_bit + 2)) & 0x1) | + ((dword >> (dword_bit + 3)) & 0x1); + packed |= (mask << hword_bit); + } + phys |= ((u32)packed) << (16 * dword_idx); + } + return phys; +} + +/** + * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical + * block enable map to a + * block enable map + * abstraction. + * @phys: Physical 32-bit block enable map + * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction + * will be stored. + * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction + * will be stored. + */ +static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical( + u32 phys, + u64 *lo, + u64 *hi) +{ + u64 dwords[2] = {0, 0}; + + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u16 packed = phys >> (16 * dword_idx); + u64 dword = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u64 mask = (packed >> (hword_bit)) & 0x1; + + dword |= mask << (dword_bit + 0); + dword |= mask << (dword_bit + 1); + dword |= mask << (dword_bit + 2); + dword |= mask << (dword_bit + 3); + } + dwords[dword_idx] = dword; + } + *lo = dwords[0]; + *hi = dwords[1]; +} + +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 jm_bm = 0; + u64 shader_bm = 0; + u64 tiler_bm = 0; + u64 mmu_l2_bm = 0; + + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = src->metadata; + + kbase_hwcnt_metadata_for_each_block( + metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + src, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + mmu_l2_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + jm_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + jm_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + shader_bm |= *blk_map; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm |= *blk_map; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } + + dst->jm_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); + dst->shader_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); + dst->tiler_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0); + dst->mmu_l2_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); + +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 ignored_hi; + u64 jm_bm; + u64 shader_bm; + u64 tiler_bm; + u64 mmu_l2_bm; + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = dst->metadata; + + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->jm_bm, &jm_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->shader_bm, &shader_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->tiler_bm, &tiler_bm, &ignored_hi); + kbasep_hwcnt_backend_gpu_block_map_from_physical( + src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi); + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = kbase_hwcnt_metadata_group_type( + metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type( + metadata, grp, blk); + const size_t blk_val_cnt = + kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: + *blk_map = mmu_l2_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: + *blk_map = jm_bm; + break; + case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: + break; + default: + WARN_ON(true); + } + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: + *blk_map = jm_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + *blk_map = tiler_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + *blk_map = shader_bm; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + *blk_map = mmu_l2_bm; + break; + default: + WARN_ON(true); + } + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); + +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!buf) || WARN_ON(!enable_map) || + WARN_ON(buf->metadata != enable_map->metadata)) + return; + + metadata = buf->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const u64 grp_type = + kbase_hwcnt_metadata_group_type(metadata, grp); + u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance( + buf, grp, blk, blk_inst); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + const u32 prfcnt_en = + kbasep_hwcnt_backend_gpu_block_map_to_physical( + blk_map[0], 0); + + switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; + break; + default: + WARN_ON(true); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h new file mode 100644 index 0000000..12891e0 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h @@ -0,0 +1,251 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_HWCNT_GPU_H_ +#define _KBASE_HWCNT_GPU_H_ + +#include + +struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to + * identify metadata groups. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ +enum kbase_hwcnt_gpu_group_type { + KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, + KBASE_HWCNT_GPU_GROUP_TYPE_V5, +}; + +/** + * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. + */ +enum kbase_hwcnt_gpu_v4_block_type { + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, + KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, +}; + +/** + * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + */ +enum kbase_hwcnt_gpu_v5_block_type { + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, +}; + +/** + * struct kbase_hwcnt_physical_enable_map - Representation of enable map + * directly used by GPU. + * @jm_bm: Job Manager counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ +struct kbase_hwcnt_physical_enable_map { + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; +}; + +/** + * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. + * @cg_count: Core group count. + * @cgs: Non-NULL pointer to array of cg_count coherent group structures. + * + * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, + * where each core group may have a physically different layout. + */ +struct kbase_hwcnt_gpu_v4_info { + size_t cg_count; + const struct mali_base_gpu_coherent_group *cgs; +}; + +/** + * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. + * @l2_count: L2 cache count. + * @core_mask: Shader core mask. May be sparse. + */ +struct kbase_hwcnt_gpu_v5_info { + size_t l2_count; + u64 core_mask; +}; + +/** + * struct kbase_hwcnt_gpu_info - Tagged union with information about the current + * GPU's hwcnt blocks. + * @type: GPU type. + * @v4: Info filled in if a v4 GPU. + * @v5: Info filled in if a v5 GPU. + */ +struct kbase_hwcnt_gpu_info { + enum kbase_hwcnt_gpu_group_type type; + union { + struct kbase_hwcnt_gpu_v4_info v4; + struct kbase_hwcnt_gpu_v5_info v5; + }; +}; + +/** + * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the + * hwcnt metadata. + * @kbdev: Non-NULL pointer to kbase device. + * @info: Non-NULL pointer to data structure to be filled in. + * + * The initialised info struct will only be valid for use while kbdev is valid. + */ +int kbase_hwcnt_gpu_info_init( + struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info); + +/** + * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the + * current GPU. + * @info: Non-NULL pointer to info struct initialised by + * kbase_hwcnt_gpu_info_init. + * @use_secondary: True if secondary performance counters should be used, else + * false. Ignored if secondary counters are not supported. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump + * buffer is stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + bool use_secondary, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); + +/** + * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_gpu_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src raw dump buffer, of same length + * as returned in out_dump_bytes parameter of + * kbase_hwcnt_gpu_metadata_create. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @accumulate: True if counters in src should be accumulated into dst, + * rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_dump_get( + struct kbase_hwcnt_dump_buffer *dst, + void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + const u64 pm_core_mask, + bool accumulate); + +/** + * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction + * into a physical enable map. + * @dst: Non-NULL pointer to dst physical enable map. + * @src: Non-NULL pointer to src enable map abstraction. + * + * The src must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the enable map abstraction has one bit per + * individual counter block value, but the physical enable map uses 1 bit for + * every 4 counters, shared over all instances of a block. + */ +void kbase_hwcnt_gpu_enable_map_to_physical( + struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); + +/** + * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to + * an enable map abstraction. + * @dst: Non-NULL pointer to dst enable map abstraction. + * @src: Non-NULL pointer to src physical enable map. + * + * The dst must have been created from a metadata returned from a call to + * kbase_hwcnt_gpu_metadata_create. + * + * This is a lossy conversion, as the physical enable map can technically + * support counter blocks with 128 counters each, but no hardware actually uses + * more than 64, so the enable map abstraction has nowhere to store the enable + * information for the 64 non-existent counters. + */ +void kbase_hwcnt_gpu_enable_map_from_physical( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src); + +/** + * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter + * enable headers in a dump buffer to + * reflect the specified enable map. + * @buf: Non-NULL pointer to dump buffer to patch. + * @enable_map: Non-NULL pointer to enable map. + * + * The buf and enable_map must have been created from a metadata returned from + * a call to kbase_hwcnt_gpu_metadata_create. + * + * This function should be used before handing off a dump buffer over the + * kernel-user boundary, to ensure the header is accurate for the enable map + * used by the user. + */ +void kbase_hwcnt_gpu_patch_dump_headers( + struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map); + +#endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c new file mode 100644 index 0000000..b0e6aee --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c @@ -0,0 +1,152 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_legacy.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" + +#include +#include + +/** + * struct kbase_hwcnt_legacy_client - Legacy hardware counter client. + * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned. + * @enable_map: Counter enable map. + * @dump_buf: Dump buffer used to manipulate dumps before copied to user. + * @hvcli: Hardware counter virtualizer client. + */ +struct kbase_hwcnt_legacy_client { + void __user *user_dump_buf; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer dump_buf; + struct kbase_hwcnt_virtualizer_client *hvcli; +}; + +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli) +{ + int errcode; + struct kbase_hwcnt_legacy_client *hlcli; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_physical_enable_map phys_em; + + if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + + hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL); + if (!hlcli) + return -ENOMEM; + + hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map); + if (errcode) + goto error; + + /* Translate from the ioctl enable map to the internal one */ + phys_em.jm_bm = enable->jm_bm; + phys_em.shader_bm = enable->shader_bm; + phys_em.tiler_bm = enable->tiler_bm; + phys_em.mmu_l2_bm = enable->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em); + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &hlcli->enable_map, &hlcli->hvcli); + if (errcode) + goto error; + + *out_hlcli = hlcli; + return 0; + +error: + kbase_hwcnt_legacy_client_destroy(hlcli); + return errcode; +} + +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli) +{ + if (!hlcli) + return; + + kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli); + kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf); + kbase_hwcnt_enable_map_free(&hlcli->enable_map); + kfree(hlcli); +} + +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump into the kernel buffer */ + errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, &hlcli->dump_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled( + &hlcli->dump_buf, &hlcli->enable_map); + + /* Copy into the user's buffer */ + errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf, + hlcli->dump_buf.metadata->dump_buf_bytes); + /* Non-zero errcode implies user buf was invalid or too small */ + if (errcode) + return -EFAULT; + + return 0; +} + +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli) +{ + u64 ts_start_ns; + u64 ts_end_ns; + + if (!hlcli) + return -EINVAL; + + /* Dump with a NULL buffer to clear this client's counters */ + return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli, + &ts_start_ns, &ts_end_ns, NULL); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h new file mode 100644 index 0000000..7a610ae --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h @@ -0,0 +1,94 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Legacy hardware counter interface, giving userspace clients simple, + * synchronous access to hardware counters. + * + * Any functions operating on an single legacy hardware counter client instance + * must be externally synchronised. + * Different clients may safely be used concurrently. + */ + +#ifndef _KBASE_HWCNT_LEGACY_H_ +#define _KBASE_HWCNT_LEGACY_H_ + +struct kbase_hwcnt_legacy_client; +struct kbase_ioctl_hwcnt_enable; +struct kbase_hwcnt_virtualizer; + +/** + * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client. + * @hvirt: Non-NULL pointer to hardware counter virtualizer the client + * should be attached to. + * @enable: Non-NULL pointer to hwcnt_enable structure, containing a valid + * pointer to a user dump buffer large enough to hold a dump, and + * the counters that should be enabled. + * @out_hlcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_ioctl_hwcnt_enable *enable, + struct kbase_hwcnt_legacy_client **out_hlcli); + +/** + * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter + * client. + * @hlcli: Pointer to the legacy hardware counter client. + * + * Will safely destroy a client in any partial state of construction. + */ +void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the + * client's user buffer. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously dump hardware counters into the user buffer + * specified on client creation, with the counters specified on client creation. + * + * The counters are automatically cleared after each dump, such that the next + * dump performed will return the counter values accumulated between the time of + * this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli); + +/** + * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter + * dump. + * @hlcli: Non-NULL pointer to the legacy hardware counter client. + * + * This function will synchronously clear the hardware counters, such that the + * next dump performed will return the counter values accumulated between the + * time of this function call and the next dump. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli); + +#endif /* _KBASE_HWCNT_LEGACY_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_reader.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c new file mode 100644 index 0000000..1e9efde --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c @@ -0,0 +1,538 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" + +/* Minimum alignment of each block of hardware counters */ +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ + (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +/** + * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. + * @value: The value to align upwards. + * @alignment: The alignment. + * + * Return: A number greater than or equal to value that is aligned to alignment. + */ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ + (value + ((alignment - (value % alignment)) % alignment)) + +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **out_metadata) +{ + char *buf; + struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_group_metadata *grp_mds; + size_t grp; + size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count; /* Number of u32 values (inc padding) */ + size_t avail_mask_bits; /* Number of availability mask bits */ + + size_t size; + size_t offset; + + if (!desc || !out_metadata) + return -EINVAL; + + /* Calculate the bytes needed to tightly pack the metadata */ + + /* Top level metadata */ + size = 0; + size += sizeof(struct kbase_hwcnt_metadata); + + /* Group metadata */ + size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + /* Block metadata */ + for (grp = 0; grp < desc->grp_cnt; grp++) { + size += sizeof(struct kbase_hwcnt_block_metadata) * + desc->grps[grp].blk_cnt; + } + + /* Single allocation for the entire metadata */ + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Use the allocated memory for the metadata and its members */ + + /* Bump allocate the top level metadata */ + offset = 0; + metadata = (struct kbase_hwcnt_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_metadata); + + /* Bump allocate the group metadata */ + grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + enable_map_count = 0; + dump_buf_count = 0; + avail_mask_bits = 0; + + for (grp = 0; grp < desc->grp_cnt; grp++) { + size_t blk; + + const struct kbase_hwcnt_group_description *grp_desc = + desc->grps + grp; + struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; + + size_t group_enable_map_count = 0; + size_t group_dump_buffer_count = 0; + size_t group_avail_mask_bits = 0; + + /* Bump allocate this group's block metadata */ + struct kbase_hwcnt_block_metadata *blk_mds = + (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * + grp_desc->blk_cnt; + + /* Fill in each block in the group's information */ + for (blk = 0; blk < grp_desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = + grp_desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = + blk_mds + blk; + const size_t n_values = + blk_desc->hdr_cnt + blk_desc->ctr_cnt; + + blk_md->type = blk_desc->type; + blk_md->inst_cnt = blk_desc->inst_cnt; + blk_md->hdr_cnt = blk_desc->hdr_cnt; + blk_md->ctr_cnt = blk_desc->ctr_cnt; + blk_md->enable_map_index = group_enable_map_count; + blk_md->enable_map_stride = + kbase_hwcnt_bitfield_count(n_values); + blk_md->dump_buf_index = group_dump_buffer_count; + blk_md->dump_buf_stride = + KBASE_HWCNT_ALIGN_UPWARDS( + n_values, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + blk_md->avail_mask_index = group_avail_mask_bits; + + group_enable_map_count += + blk_md->enable_map_stride * blk_md->inst_cnt; + group_dump_buffer_count += + blk_md->dump_buf_stride * blk_md->inst_cnt; + group_avail_mask_bits += blk_md->inst_cnt; + } + + /* Fill in the group's information */ + grp_md->type = grp_desc->type; + grp_md->blk_cnt = grp_desc->blk_cnt; + grp_md->blk_metadata = blk_mds; + grp_md->enable_map_index = enable_map_count; + grp_md->dump_buf_index = dump_buf_count; + grp_md->avail_mask_index = avail_mask_bits; + + enable_map_count += group_enable_map_count; + dump_buf_count += group_dump_buffer_count; + avail_mask_bits += group_avail_mask_bits; + } + + /* Fill in the top level metadata's information */ + metadata->grp_cnt = desc->grp_cnt; + metadata->grp_metadata = grp_mds; + metadata->enable_map_bytes = + enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; + metadata->avail_mask = desc->avail_mask; + + WARN_ON(size != offset); + /* Due to the block alignment, there should be exactly one enable map + * bit per 4 bytes in the dump buffer. + */ + WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * + BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + + *out_metadata = metadata; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); + +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + kfree(metadata); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); + +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map) +{ + u64 *enable_map_buf; + + if (!metadata || !enable_map) + return -EINVAL; + + enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + + enable_map->metadata = metadata; + enable_map->enable_map = enable_map_buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); + +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) +{ + if (!enable_map) + return; + + kfree(enable_map->enable_map); + enable_map->enable_map = NULL; + enable_map->metadata = NULL; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); + +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + u32 *buf; + + if (!metadata || !dump_buf) + return -EINVAL; + + buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dump_buf->metadata = metadata; + dump_buf->dump_buf = buf; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); + +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) +{ + if (!dump_buf) + return; + + kfree(dump_buf->dump_buf); + memset(dump_buf, 0, sizeof(*dump_buf)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); + +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + struct kbase_hwcnt_dump_buffer *buffers; + size_t buf_idx; + unsigned int order; + unsigned long addr; + + if (!metadata || !dump_bufs) + return -EINVAL; + + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) + return -ENOMEM; + + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ + order = get_order(metadata->dump_buf_bytes * n); + addr = __get_free_pages(GFP_KERNEL, order); + + if (!addr) { + kfree(buffers); + return -ENOMEM; + } + + dump_bufs->page_addr = addr; + dump_bufs->page_order = order; + dump_bufs->buf_cnt = n; + dump_bufs->bufs = buffers; + + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { + const size_t offset = metadata->dump_buf_bytes * buf_idx; + + buffers[buf_idx].metadata = metadata; + buffers[buf_idx].dump_buf = (u32 *)(addr + offset); + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); + +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + if (!dump_bufs) + return; + + kfree(dump_bufs->bufs); + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); + +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); + +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst) +{ + if (WARN_ON(!dst)) + return; + + memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); + +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + if (kbase_hwcnt_metadata_block_instance_avail( + metadata, grp, blk, blk_inst)) { + /* Block available, so only zero non-enabled values */ + kbase_hwcnt_dump_buffer_block_zero_non_enabled( + dst_blk, blk_em, val_cnt); + } else { + /* Block not available, so zero the entire thing */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); + +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); + +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, val_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); + +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk; + const u32 *src_blk; + size_t hdr_cnt; + size_t ctr_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled( + dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); + +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || + WARN_ON(!src) || + WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + + kbase_hwcnt_dump_buffer_block_accumulate_strict( + dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + } +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h new file mode 100644 index 0000000..4d78c84 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h @@ -0,0 +1,1087 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter types. + * Contains structures for describing the physical layout of hardware counter + * dump buffers and enable maps within a system. + * + * Also contains helper functions for manipulation of these dump buffers and + * enable maps. + * + * Through use of these structures and functions, hardware counters can be + * enabled, copied, accumulated, and generally manipulated in a generic way, + * regardless of the physical counter dump layout. + * + * Terminology: + * + * Hardware Counter System: + * A collection of hardware counter groups, making a full hardware counter + * system. + * Hardware Counter Group: + * A group of Hardware Counter Blocks (e.g. a t62x might have more than one + * core group, so has one counter group per core group, where each group + * may have a different number and layout of counter blocks). + * Hardware Counter Block: + * A block of hardware counters (e.g. shader block, tiler block). + * Hardware Counter Block Instance: + * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have + * 4 shader block instances). + * + * Block Header: + * A header value inside a counter block. Headers don't count anything, + * so it is only valid to copy or zero them. Headers are always the first + * values in the block. + * Block Counter: + * A counter value inside a counter block. Counters can be zeroed, copied, + * or accumulated. Counters are always immediately after the headers in the + * block. + * Block Value: + * A catch-all term for block headers and block counters. + * + * Enable Map: + * An array of u64 bitfields, where each bit either enables exactly one + * block value, or is unused (padding). + * Dump Buffer: + * An array of u32 values, where each u32 corresponds either to one block + * value, or is unused (padding). + * Availability Mask: + * A bitfield, where each bit corresponds to whether a block instance is + * physically available (e.g. an MP3 GPU may have a sparse core mask of + * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the + * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this + * case, the availability mask might be 0b1011111 (the exact layout will + * depend on the specific hardware architecture), with the 3 extra early bits + * corresponding to other block instances in the hardware counter system). + * Metadata: + * Structure describing the physical layout of the enable map and dump buffers + * for a specific hardware counter system. + * + */ + +#ifndef _KBASE_HWCNT_TYPES_H_ +#define _KBASE_HWCNT_TYPES_H_ + +#include +#include +#include +#include +#include "mali_malisw.h" + +/* Number of bytes in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) + +/* Number of bits in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) + +/* Number of bytes for each counter value */ +#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32)) + +/* Number of bits in an availability mask (i.e. max total number of block + * instances supported in a Hardware Counter System) + */ +#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + +/** + * struct kbase_hwcnt_block_description - Description of one or more identical, + * contiguous, Hardware Counter Blocks. + * @type: The arbitrary identifier used to identify the type of the block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + */ +struct kbase_hwcnt_block_description { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; +}; + +/** + * struct kbase_hwcnt_group_description - Description of one or more identical, + * contiguous Hardware Counter Groups. + * @type: The arbitrary identifier used to identify the type of the group. + * @blk_cnt: The number of types of Hardware Counter Block in the group. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each type of Hardware Counter Block in the group. + */ +struct kbase_hwcnt_group_description { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; +}; + +/** + * struct kbase_hwcnt_description - Description of a Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, + * describing each Hardware Counter Group in the system. + * @avail_mask: Flat Availability Mask for all block instances in the system. + */ +struct kbase_hwcnt_description { + size_t grp_cnt; + const struct kbase_hwcnt_group_description *grps; + u64 avail_mask; +}; + +/** + * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout + * of a block in a Hardware Counter System's + * Dump Buffers and Enable Maps. + * @type: The arbitrary identifier used to identify the type of the + * block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 32-bit Block Headers in the block. + * @ctr_cnt: The number of 32-bit Block Counters in the block. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Map bitfields of the Block Instances described by + * this metadata start. + * @enable_map_stride: Stride in u64s between the Enable Maps of each of the + * Block Instances described by this metadata. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the Block Instances described by this + * metadata start. + * @dump_buf_stride: Stride in u32s between the Dump Buffers of each of the + * Block Instances described by this metadata. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the Block Instances described + * by this metadata start. + */ +struct kbase_hwcnt_block_metadata { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; + size_t enable_map_index; + size_t enable_map_stride; + size_t dump_buf_index; + size_t dump_buf_stride; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout + * of a group of blocks in a Hardware + * Counter System's Dump Buffers and Enable + * Maps. + * @type: The arbitrary identifier used to identify the type of the + * group. + * @blk_cnt: The number of types of Hardware Counter Block in the + * group. + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, + * describing the physical layout of each type of Hardware + * Counter Block in the group. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Maps of the blocks within the group described by + * this metadata start. + * @dump_buf_index: Index in u32s into the parent's Dump Buffer where the + * Dump Buffers of the blocks within the group described by + * metadata start. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the blocks within the group + * described by this metadata start. + */ +struct kbase_hwcnt_group_metadata { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; + size_t enable_map_index; + size_t dump_buf_index; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_metadata - Metadata describing the physical layout + * of Dump Buffers and Enable Maps within a + * Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * describing the physical layout of each Hardware Counter + * Group in the system. + * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. + * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @avail_mask: The Availability Mask for the system. + */ +struct kbase_hwcnt_metadata { + size_t grp_cnt; + const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t enable_map_bytes; + size_t dump_buf_bytes; + u64 avail_mask; +}; + +/** + * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 + * bitfields. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the enable map. + * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array + * of u64 bitfields, each bit of which enables one hardware + * counter. + */ +struct kbase_hwcnt_enable_map { + const struct kbase_hwcnt_metadata *metadata; + u64 *enable_map; +}; + +/** + * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32 + * values. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the Dump Buffer. + * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array + * of u32 values. + */ +struct kbase_hwcnt_dump_buffer { + const struct kbase_hwcnt_metadata *metadata; + u32 *dump_buf; +}; + +/** + * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. + * @page_addr: Address of allocated pages. A single allocation is used for all + * Dump Buffers in the array. + * @page_order: The allocation order of the pages. + * @buf_cnt: The number of allocated Dump Buffers. + * @bufs: Non-NULL pointer to the array of Dump Buffers. + */ +struct kbase_hwcnt_dump_buffer_array { + unsigned long page_addr; + unsigned int page_order; + size_t buf_cnt; + struct kbase_hwcnt_dump_buffer *bufs; +}; + +/** + * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object + * from a description. + * @desc: Non-NULL pointer to a hardware counter description. + * @metadata: Non-NULL pointer to where created metadata will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_metadata_create( + const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **metadata); + +/** + * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. + * @metadata: Pointer to hardware counter metadata + */ +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_metadata_group_count() - Get the number of groups. + * @metadata: Non-NULL pointer to metadata. + * + * Return: Number of hardware counter groups described by metadata. + */ +#define kbase_hwcnt_metadata_group_count(metadata) \ + ((metadata)->grp_cnt) + +/** + * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Type of the group grp. + */ +#define kbase_hwcnt_metadata_group_type(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].type) + +/** + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Number of blocks in group grp. + */ +#define kbase_hwcnt_metadata_block_count(metadata, grp) \ + ((metadata)->grp_metadata[(grp)].blk_cnt) + +/** + * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Type of the block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type) + +/** + * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of + * a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of instances of block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt) + +/** + * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter + * headers. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counter headers in each instance of block blk in + * group grp. + */ +#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) + +/** + * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 counters in each instance of block blk in group + * grp. + */ +#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) + +/** + * kbase_hwcnt_metadata_block_values_count() - Get the number of values. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of u32 headers plus counters in each instance of block blk + * in group grp. + */ +#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ + (kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \ + + kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk))) + +/** + * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in + * the metadata. + * @md: Non-NULL pointer to metadata. + * @grp: size_t variable used as group iterator. + * @blk: size_t variable used as block iterator. + * @blk_inst: size_t variable used as block instance iterator. + * + * Iteration order is group, then block, then block instance (i.e. linearly + * through memory). + */ +#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ + for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ + for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++) + +/** + * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail + * mask corresponding to the block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: The bit index into the avail mask for the block. + */ +static inline size_t kbase_hwcnt_metadata_block_avail_bit( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk) +{ + const size_t bit = + metadata->grp_metadata[grp].avail_mask_index + + metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; + + return bit; +} + +/** + * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is + * available. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if the block instance is available, else false. + */ +static inline bool kbase_hwcnt_metadata_block_instance_avail( + const struct kbase_hwcnt_metadata *metadata, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t bit = kbase_hwcnt_metadata_block_avail_bit( + metadata, grp, blk) + blk_inst; + const u64 mask = 1ull << bit; + + return (metadata->avail_mask & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. + * @metadata: Non-NULL pointer to metadata describing the system. + * @enable_map: Non-NULL pointer to enable map to be initialised. Will be + * initialised to all zeroes (i.e. all counters disabled). + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_enable_map_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_free() - Free an enable map. + * @enable_map: Enable map to be freed. + * + * Can be safely called on an all-zeroed enable map structure, or on an already + * freed enable map. + */ +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block + * instance's enable map. + * @map: Non-NULL pointer to (const) enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u64* to the bitfield(s) used as the enable map for the + * block instance. + */ +#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ + ((map)->enable_map + \ + (map)->metadata->grp_metadata[(grp)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ + (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) + +/** + * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * to have at minimum one bit per value. + * @val_cnt: Number of values. + * + * Return: Number of required bitfields. + */ +static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) +{ + return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / + KBASE_HWCNT_BITFIELD_BITS; +} + +/** + * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_disable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); +} + +/** + * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. + * @dst: Non-NULL pointer to enable map to zero. + */ +static inline void kbase_hwcnt_enable_map_disable_all( + struct kbase_hwcnt_enable_map *dst) +{ + memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_enable_all( + struct kbase_hwcnt_enable_map *dst, + size_t grp, + size_t blk, + size_t blk_inst) +{ + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + dst->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + dst, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + block_enable_map[bitfld_idx] = block_enable_map_mask; + } +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable + * map. + * @dst: Non-NULL pointer to enable map. + */ +static inline void kbase_hwcnt_enable_map_enable_all( + struct kbase_hwcnt_enable_map *dst) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all( + dst, grp, blk, blk_inst); +} + +/** + * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_copy( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + memcpy(dst->enable_map, + src->enable_map, + dst->metadata->enable_map_bytes); +} + +/** + * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_union( + struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const size_t bitfld_count = + dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; + size_t i; + + for (i = 0; i < bitfld_count; i++) + dst->enable_map[i] |= src->enable_map[i]; +} + +/** + * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block + * instance are enabled. + * @enable_map: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if any values in the block are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_enabled( + const struct kbase_hwcnt_enable_map *enable_map, + size_t grp, + size_t blk, + size_t blk_inst) +{ + bool any_enabled = false; + const size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + enable_map->metadata, grp, blk); + const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance( + enable_map, grp, blk, blk_inst); + + size_t bitfld_idx; + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - + (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + any_enabled = any_enabled || + (block_enable_map[bitfld_idx] & block_enable_map_mask); + } + + return any_enabled; +} + +/** + * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. + * @enable_map: Non-NULL pointer to enable map. + * + * Return: true if any values are enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_any_enabled( + const struct kbase_hwcnt_enable_map *enable_map) +{ + size_t grp, blk, blk_inst; + + kbase_hwcnt_metadata_for_each_block( + enable_map->metadata, grp, blk, blk_inst) { + if (kbase_hwcnt_enable_map_block_enabled( + enable_map, grp, blk, blk_inst)) + return true; + } + + return false; +} + +/** + * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block + * instance is enabled. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to check in the block instance. + * + * Return: true if the value was enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_value_enabled( + const u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + return (bitfld[idx] & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to enable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_enable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] |= mask; +} + +/** + * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to disable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_disable_value( + u64 *bitfld, + size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] &= ~mask; +} + +/** + * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. + * @metadata: Non-NULL pointer to metadata describing the system. + * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be + * initialised to undefined values, so must be used as a copy dest, + * or cleared before use. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. + * @dump_buf: Dump buffer to be freed. + * + * Can be safely called on an all-zeroed dump buffer structure, or on an already + * freed dump buffer. + */ +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. + * @metadata: Non-NULL pointer to metadata describing the system. + * @n: Number of dump buffers to allocate + * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each + * dump buffer in the array will be initialised to undefined values, + * so must be used as a copy dest, or cleared before use. + * + * A single contiguous page allocation will be used for all of the buffers + * inside the array, where: + * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_array_alloc( + const struct kbase_hwcnt_metadata *metadata, + size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. + * @dump_bufs: Dump buffer array to be freed. + * + * Can be safely called on an all-zeroed dump buffer array structure, or on an + * already freed dump buffer array. + */ +void kbase_hwcnt_dump_buffer_array_free( + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block + * instance's dump buffer. + * @buf: Non-NULL pointer to (const) dump buffer. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: (const) u32* to the dump buffer for the block instance. + */ +#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \ + ((buf)->dump_buf + \ + (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \ + (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst)) + +/** + * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero( + u32 *dst_blk, + size_t val_cnt) +{ + memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. + * After the operation, all values + * (including padding bytes) will be + * zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dump buffer. + */ +void kbase_hwcnt_dump_buffer_zero_strict( + struct kbase_hwcnt_dump_buffer *dst); + +/** + * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in + * dst (including padding bytes and + * unavailable blocks). + * After the operation, all enabled + * values will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero_non_enabled( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled + * values in a block. + * After the operation, all + * enabled values will be + * unchanged. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled( + u32 *dst_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) + dst_blk[val] = 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy( + u32 *dst_blk, + const u32 *src_blk, + size_t val_cnt) +{ + /* Copy all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to + * dst. + * After the operation, all non-enabled + * values (including padding bytes) will + * be zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values + * from src to dst. + * After the operation, all + * non-enabled values will be + * zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + * + * After the copy, any disabled values in dst will be zero. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, val); + + dst_blk[val] = val_enabled ? src_blk[val] : 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and + * accumulate all enabled counters from + * src to dst. + * After the operation, all non-enabled + * values will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and + * accumulate all block counters + * from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate( + u32 *dst_blk, + const u32 *src_blk, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + /* Copy all the headers in the block instance. + * Values of non-enabled headers are undefined. + */ + memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); + + /* Accumulate all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = accumulated; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and + * accumulate all enabled counters + * from src to dst. + * After the operation, all + * non-enabled values (including + * padding bytes) will be zero. + * Slower than the non-strict + * variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate_strict( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block + * headers and accumulate + * all block counters from + * src to dst. + * After the operation, all + * non-enabled values will + * be zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( + u32 *dst_blk, + const u32 *src_blk, + const u64 *blk_em, + size_t hdr_cnt, + size_t ctr_cnt) +{ + size_t ctr; + + kbase_hwcnt_dump_buffer_block_copy_strict( + dst_blk, src_blk, blk_em, hdr_cnt); + + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled( + blk_em, ctr); + + u32 *dst_ctr = dst_blk + ctr; + const u32 *src_ctr = src_blk + ctr; + + const u32 src_counter = *src_ctr; + const u32 dst_counter = *dst_ctr; + + /* Saturating add */ + u32 accumulated = src_counter + dst_counter; + + if (accumulated < src_counter) + accumulated = U32_MAX; + + *dst_ctr = ctr_enabled ? accumulated : 0; + } +} + +#endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c new file mode 100644 index 0000000..917e47c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c @@ -0,0 +1,790 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_accumulator.h" +#include "mali_kbase_hwcnt_context.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" +#include "mali_kbase_linux.h" + +#include +#include + +/** + * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. + * @hctx: Hardware counter context being virtualized. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting is disabled. + * @metadata: Hardware counter metadata. + * @lock: Lock acquired at all entrypoints, to protect mutable + * state. + * @client_count: Current number of virtualizer clients. + * @clients: List of virtualizer clients. + * @accum: Hardware counter accumulator. NULL if no clients. + * @scratch_map: Enable map used as scratch space during counter changes. + * @scratch_buf: Dump buffer used as scratch space during dumps. + * @ts_last_dump_ns: End time of most recent dump across all clients. + */ +struct kbase_hwcnt_virtualizer { + struct kbase_hwcnt_context *hctx; + u64 dump_threshold_ns; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t client_count; + struct list_head clients; + struct kbase_hwcnt_accumulator *accum; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer scratch_buf; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. + * @node: List node used for virtualizer client list. + * @hvirt: Hardware counter virtualizer. + * @enable_map: Enable map with client's current enabled counters. + * @accum_buf: Dump buffer with client's current accumulated counters. + * @has_accum: True if accum_buf contains any accumulated counters. + * @ts_start_ns: Counter collection start time of current dump. + */ +struct kbase_hwcnt_virtualizer_client { + struct list_head node; + struct kbase_hwcnt_virtualizer *hvirt; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool has_accum; + u64 ts_start_ns; +}; + +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return NULL; + + return hvirt->metadata; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); + +/** + * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. + * @hvcli: Pointer to virtualizer client. + * + * Will safely free a client in any partial state of construction. + */ +static void kbasep_hwcnt_virtualizer_client_free( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); + kbase_hwcnt_enable_map_free(&hvcli->enable_map); + kfree(hvcli); +} + +/** + * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer + * client. + * @metadata: Non-NULL pointer to counter metadata. + * @out_hvcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_alloc( + const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli = NULL; + + WARN_ON(!metadata); + WARN_ON(!out_hvcli); + + hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); + if (!hvcli) + return -ENOMEM; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); + if (errcode) + goto error; + + *out_hvcli = hvcli; + return 0; +error: + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a + * client's accumulation buffer. + * @hvcli: Non-NULL pointer to virtualizer client. + * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. + */ +static void kbasep_hwcnt_virtualizer_client_accumulate( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_dump_buffer *dump_buf) +{ + WARN_ON(!hvcli); + WARN_ON(!dump_buf); + lockdep_assert_held(&hvcli->hvirt->lock); + + if (hvcli->has_accum) { + /* If already some accumulation, accumulate */ + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } else { + /* If no accumulation, copy */ + kbase_hwcnt_dump_buffer_copy( + &hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } + hvcli->has_accum = true; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter + * accumulator after final client + * removal. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Will safely terminate the accumulator in any partial state of initialisation. + */ +static void kbasep_hwcnt_virtualizer_accumulator_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + + kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); + kbase_hwcnt_enable_map_free(&hvirt->scratch_map); + kbase_hwcnt_accumulator_release(hvirt->accum); + hvirt->accum = NULL; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter + * accumulator before first client + * addition. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_accumulator_init( + struct kbase_hwcnt_virtualizer *hvirt) +{ + int errcode; + + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + WARN_ON(hvirt->accum); + + errcode = kbase_hwcnt_accumulator_acquire( + hvirt->hctx, &hvirt->accum); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc( + hvirt->metadata, &hvirt->scratch_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc( + hvirt->metadata, &hvirt->scratch_buf); + if (errcode) + goto error; + + return 0; +error: + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to add. + * @enable_map: Non-NULL pointer to client's initial enable map. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_add( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->client_count == 0) + /* First client added, so initialise the accumulator */ + errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); + if (errcode) + return errcode; + + hvirt->client_count += 1; + + if (hvirt->client_count == 1) { + /* First client, so just pass the enable map onwards as is */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + enable_map, &ts_start_ns, &ts_end_ns, NULL); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy( + &hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into only existing clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + } + if (errcode) + goto error; + + list_add(&hvcli->node, &hvirt->clients); + hvcli->hvirt = hvirt; + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + hvcli->has_accum = false; + hvcli->ts_start_ns = ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + + return 0; +error: + hvirt->client_count -= 1; + if (hvirt->client_count == 0) + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to remove. + */ +static void kbasep_hwcnt_virtualizer_client_remove( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + lockdep_assert_held(&hvirt->lock); + + list_del(&hvcli->node); + hvirt->client_count -= 1; + + if (hvirt->client_count == 0) { + /* Last client removed, so terminate the accumulator */ + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); + list_for_each_entry(pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into remaining clients' accumulation bufs */ + if (!errcode) + list_for_each_entry(pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + } + WARN_ON(errcode); +} + +/** + * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, + * and enable a new set of + * counters that will be used for + * subsequent dumps. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(enable_map->metadata != hvirt->metadata); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry(pos, &hvirt->clients, node) + /* Ignore the enable map of the selected client */ + if (pos != hvcli) + kbase_hwcnt_enable_map_union( + &hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, + &hvirt->scratch_map, ts_start_ns, ts_end_ns, + &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if ((enable_map->metadata != hvirt->metadata) || + (dump_buf && (dump_buf->metadata != hvirt->metadata))) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_set_counters( + hvirt->accum, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy( + &hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_set_counters( + hvirt, hvcli, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); + +/** + * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Perform the dump */ + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, + ts_start_ns, ts_end_ns, &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry(pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate( + pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate( + &hvcli->accum_buf, src, &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the + * client's currently enabled counters + * if it hasn't been rate limited, + * otherwise return the client's most + * recent accumulation. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + bool rate_limited = true; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->dump_threshold_ns == 0) { + /* Threshold == 0, so rate limiting disabled */ + rate_limited = false; + } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) { + /* Last dump was performed by this client, and dumps from an + * individual client are never rate limited + */ + rate_limited = false; + } else { + const u64 ts_ns = + kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); + const u64 time_since_last_dump_ns = + ts_ns - hvirt->ts_last_dump_ns; + + /* Dump period equals or exceeds the threshold */ + if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) + rate_limited = false; + } + + if (!rate_limited) + return kbasep_hwcnt_virtualizer_client_dump( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + + /* If we've gotten this far, the client must have something accumulated + * otherwise it is a logic error + */ + WARN_ON(!hvcli->has_accum); + + if (dump_buf) + kbase_hwcnt_dump_buffer_copy( + dump_buf, &hvcli->accum_buf, &hvcli->enable_map); + hvcli->has_accum = false; + + *ts_start_ns = hvcli->ts_start_ns; + *ts_end_ns = hvirt->ts_last_dump_ns; + hvcli->ts_start_ns = hvirt->ts_last_dump_ns; + + return 0; +} + +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if (dump_buf && (dump_buf->metadata != hvirt->metadata)) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_dump( + hvirt->accum, ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); + +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli; + + if (!hvirt || !enable_map || !out_hvcli || + (enable_map->metadata != hvirt->metadata)) + return -EINVAL; + + errcode = kbasep_hwcnt_virtualizer_client_alloc( + hvirt->metadata, &hvcli); + if (errcode) + return errcode; + + mutex_lock(&hvirt->lock); + + errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); + + mutex_unlock(&hvirt->lock); + + if (errcode) { + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; + } + + *out_hvcli = hvcli; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); + +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + mutex_lock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); + + mutex_unlock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_free(hvcli); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); + +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt) +{ + struct kbase_hwcnt_virtualizer *virt; + const struct kbase_hwcnt_metadata *metadata; + + if (!hctx || !out_hvirt) + return -EINVAL; + + metadata = kbase_hwcnt_context_metadata(hctx); + if (!metadata) + return -EINVAL; + + virt = kzalloc(sizeof(*virt), GFP_KERNEL); + if (!virt) + return -ENOMEM; + + virt->hctx = hctx; + virt->dump_threshold_ns = dump_threshold_ns; + virt->metadata = metadata; + + mutex_init(&virt->lock); + INIT_LIST_HEAD(&virt->clients); + + *out_hvirt = virt; + return 0; +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); + +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return; + + /* Non-zero client count implies client leak */ + if (WARN_ON(hvirt->client_count != 0)) { + struct kbase_hwcnt_virtualizer_client *pos, *n; + + list_for_each_entry_safe(pos, n, &hvirt->clients, node) + kbase_hwcnt_virtualizer_client_destroy(pos); + } + + WARN_ON(hvirt->client_count != 0); + WARN_ON(hvirt->accum); + + kfree(hvirt); +} +KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h new file mode 100644 index 0000000..8f628c3 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h @@ -0,0 +1,145 @@ +/* + * + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Hardware counter virtualizer API. + * + * Virtualizes a hardware counter context, so multiple clients can access + * a single hardware counter resource as though each was the exclusive user. + */ + +#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ +#define _KBASE_HWCNT_VIRTUALIZER_H_ + +#include + +struct kbase_hwcnt_context; +struct kbase_hwcnt_virtualizer; +struct kbase_hwcnt_virtualizer_client; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. + * @hctx: Non-NULL pointer to the hardware counter context to + * virtualize. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting will be disabled. + * @out_hvirt: Non-NULL pointer to where the pointer to the created + * virtualizer will be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_init( + struct kbase_hwcnt_context *hctx, + u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt); + +/** + * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. + * @hvirt: Pointer to virtualizer to be terminated. + */ +void kbase_hwcnt_virtualizer_term( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by + * the virtualizer, so related counter data + * structures can be created. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( + struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @enable_map: Non-NULL pointer to the enable map for the client. Must have the + * same metadata as the virtualizer. + * @out_hvcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_client_create( + struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli); + +/** + * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. + * @hvcli: Pointer to the hardware counter client. + */ +void kbase_hwcnt_virtualizer_client_destroy( + struct kbase_hwcnt_virtualizer_client *hvcli); + +/** + * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, and + * enable a new set of counters + * that will be used for + * subsequent dumps. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_dump( + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, + u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_ioctl.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c new file mode 100644 index 0000000..43f4b4d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -0,0 +1,1804 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#ifdef CONFIG_COMPAT +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mali_kbase_dma_fence.h" +#include + +#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +/* random32 was renamed to prandom_u32 in 3.8 */ +#define prandom_u32 random32 +#endif + +/* Return whether katom will run on the GPU or not. Currently only soft jobs and + * dependency-only atoms do not run on the GPU */ +#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ + ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ + BASE_JD_REQ_DEP))) + +/* Minimum API version that supports the just-in-time memory allocation pressure + * limit feature. + */ +#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20) + +/* + * This is the kernel side of the API. Only entry points are: + * - kbase_jd_submit(): Called from userspace to submit a single bag + * - kbase_jd_done(): Called from interrupt context to track the + * completion of a job. + * Callouts: + * - to the job manager (enqueue a job) + * - to the event subsystem (signals the completion/failure of bag/job-chains). + */ + +static void __user * +get_compat_pointer(struct kbase_context *kctx, const u64 p) +{ +#ifdef CONFIG_COMPAT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + return compat_ptr(p); +#endif + return u64_to_user_ptr(p); +} + +/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs + * + * Returns whether the JS needs a reschedule. + * + * Note that the caller must also check the atom status and + * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock + */ +static bool jd_run_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + /* Dependency only atom */ + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); + return 0; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* Soft-job */ + if (katom->will_fail_event_code) { + kbase_finish_soft_job(katom); + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, + "Atom %p status to completed\n", (void *)katom); + return 0; + } + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, + "Atom %p status to completed\n", (void *)katom); + } + return 0; + } + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); + /* Queue an action about whether we should try scheduling a context */ + return kbasep_js_add_job(kctx, katom); +} + +void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + /* Check whether the atom's other dependencies were already met. If + * katom is a GPU atom then the job scheduler may be able to represent + * the dependencies, hence we may attempt to submit it before they are + * met. Other atoms must have had both dependencies resolved. + */ + if (IS_GPU_ATOM(katom) || + (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + /* katom dep complete, attempt to run it */ + bool resched = false; + + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom, NULL); + } + + if (resched) + kbase_js_sched_all(kbdev); + } +} + +void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) +{ +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + * Any successfully completed atom would have had all it's callbacks + * completed before the atom was run, so only flush for failed atoms. + */ + if (katom->event_code != BASE_JD_EVENT_DONE) + flush_workqueue(katom->kctx->dma_fence.wq); +#endif /* CONFIG_MALI_DMA_FENCE */ +} + +static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_signal(katom); +#endif /* CONFIG_MALI_DMA_FENCE */ + + kbase_gpu_vm_lock(katom->kctx); + /* only roll back if extres is non-NULL */ + if (katom->extres) { + u32 res_no; + + res_no = katom->nr_extres; + while (res_no-- > 0) { + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + kbase_unmap_external_resource(katom->kctx, reg, alloc); + } + kfree(katom->extres); + katom->extres = NULL; + } + kbase_gpu_vm_unlock(katom->kctx); +} + +/* + * Set up external resources needed by this job. + * + * jctx.lock must be held when this is called. + */ + +static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) +{ + int err_ret_val = -EINVAL; + u32 res_no; +#ifdef CONFIG_MALI_DMA_FENCE + struct kbase_dma_fence_resv_info info = { + .resv_objs = NULL, + .dma_fence_resv_count = 0, + .dma_fence_excl_bitmap = NULL + }; +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + /* + * When both dma-buf fence and Android native sync is enabled, we + * disable dma-buf fence for contexts that are using Android native + * fences. + */ + const bool implicit_sync = !kbase_ctx_flag(katom->kctx, + KCTX_NO_IMPLICIT_SYNC); +#else /* CONFIG_SYNC || CONFIG_SYNC_FILE*/ + const bool implicit_sync = true; +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ +#endif /* CONFIG_MALI_DMA_FENCE */ + struct base_external_resource *input_extres; + + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + + /* no resources encoded, early out */ + if (!katom->nr_extres) + return -EINVAL; + + katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); + if (!katom->extres) + return -ENOMEM; + + /* copy user buffer to the end of our real buffer. + * Make sure the struct sizes haven't changed in a way + * we don't support */ + BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); + input_extres = (struct base_external_resource *) + (((unsigned char *)katom->extres) + + (sizeof(*katom->extres) - sizeof(*input_extres)) * + katom->nr_extres); + + if (copy_from_user(input_extres, + get_compat_pointer(katom->kctx, user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { + err_ret_val = -EINVAL; + goto early_err_out; + } + +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { + info.resv_objs = kmalloc_array(katom->nr_extres, + sizeof(struct reservation_object *), + GFP_KERNEL); + if (!info.resv_objs) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + + info.dma_fence_excl_bitmap = + kcalloc(BITS_TO_LONGS(katom->nr_extres), + sizeof(unsigned long), GFP_KERNEL); + if (!info.dma_fence_excl_bitmap) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* Take the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + + /* need to keep the GPU VM locked while we set up UMM buffers */ + kbase_gpu_vm_lock(katom->kctx); + for (res_no = 0; res_no < katom->nr_extres; res_no++) { + struct base_external_resource *res = &input_extres[res_no]; + struct kbase_va_region *reg; + struct kbase_mem_phy_alloc *alloc; +#ifdef CONFIG_MALI_DMA_FENCE + bool exclusive; + exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) + ? true : false; +#endif + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, + res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + /* did we find a matching region object? */ + if (kbase_is_region_invalid_or_free(reg)) { + /* roll back */ + goto failed_loop; + } + + if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && + (reg->flags & KBASE_REG_PROTECTED)) { + katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; + } + + alloc = kbase_map_external_resource(katom->kctx, reg, + current->mm); + if (!alloc) { + err_ret_val = -EINVAL; + goto failed_loop; + } + +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync && + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + struct reservation_object *resv; + + resv = reg->gpu_alloc->imported.umm.dma_buf->resv; + if (resv) + kbase_dma_fence_add_reservation(resv, &info, + exclusive); + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* finish with updating out array with the data we found */ + /* NOTE: It is important that this is the last thing we do (or + * at least not before the first write) as we overwrite elements + * as we loop and could be overwriting ourself, so no writes + * until the last read for an element. + * */ + katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ + katom->extres[res_no].alloc = alloc; + } + /* successfully parsed the extres array */ + /* drop the vm lock now */ + kbase_gpu_vm_unlock(katom->kctx); + + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { + if (info.dma_fence_resv_count) { + int ret; + + ret = kbase_dma_fence_wait(katom, &info); + if (ret < 0) + goto failed_dma_fence_setup; + } + + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + /* all done OK */ + return 0; + +/* error handling section */ + +#ifdef CONFIG_MALI_DMA_FENCE +failed_dma_fence_setup: + /* Lock the processes mmap lock */ + down_read(¤t->mm->mmap_sem); + + /* lock before we unmap */ + kbase_gpu_vm_lock(katom->kctx); +#endif + + failed_loop: + /* undo the loop work */ + while (res_no-- > 0) { + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; + + kbase_unmap_external_resource(katom->kctx, NULL, alloc); + } + kbase_gpu_vm_unlock(katom->kctx); + + /* Release the processes mmap lock */ + up_read(¤t->mm->mmap_sem); + + early_err_out: + kfree(katom->extres); + katom->extres = NULL; +#ifdef CONFIG_MALI_DMA_FENCE + if (implicit_sync) { + kfree(info.resv_objs); + kfree(info.dma_fence_excl_bitmap); + } +#endif + return err_ret_val; +} + +static inline void jd_resolve_dep(struct list_head *out_list, + struct kbase_jd_atom *katom, + u8 d, bool ctx_is_dying) +{ + u8 other_d = !d; + + while (!list_empty(&katom->dep_head[d])) { + struct kbase_jd_atom *dep_atom; + struct kbase_jd_atom *other_dep_atom; + u8 dep_type; + + dep_atom = list_entry(katom->dep_head[d].next, + struct kbase_jd_atom, dep_item[d]); + list_del(katom->dep_head[d].next); + + dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); + kbase_jd_katom_dep_clear(&dep_atom->dep[d]); + + if (katom->event_code != BASE_JD_EVENT_DONE && + (dep_type != BASE_JD_DEP_TYPE_ORDER)) { +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_callbacks(dep_atom); +#endif + + dep_atom->event_code = katom->event_code; + KBASE_DEBUG_ASSERT(dep_atom->status != + KBASE_JD_ATOM_STATE_UNUSED); + + dep_atom->will_fail_event_code = dep_atom->event_code; + } + other_dep_atom = (struct kbase_jd_atom *) + kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); + + if (!dep_atom->in_jd_list && (!other_dep_atom || + (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && + !dep_atom->will_fail_event_code && + !other_dep_atom->will_fail_event_code))) { + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = kbase_fence_dep_count_read(dep_atom); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else { + /* + * There are either still active callbacks, or + * all fences for this @dep_atom has signaled, + * but the worker that will queue the atom has + * not yet run. + * + * Wait for the fences to signal and the fence + * worker to run and handle @dep_atom. If + * @dep_atom was completed due to error on + * @katom, then the fence worker will pick up + * the complete status and error code set on + * @dep_atom above. + */ + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + if (dep_satisfied) { + trace_sysgraph(SGR_DEP_RES, + dep_atom->kctx->id, + kbase_jd_atom_id(katom->kctx, dep_atom)); + dep_atom->in_jd_list = true; + list_add_tail(&dep_atom->jd_item, out_list); + } + } + } +} + +/** + * is_dep_valid - Validate that a dependency is valid for early dependency + * submission + * @katom: Dependency atom to validate + * + * A dependency is valid if any of the following are true : + * - It does not exist (a non-existent dependency does not block submission) + * - It is in the job scheduler + * - It has completed, does not have a failure event code, and has not been + * marked to fail in the future + * + * Return: true if valid, false otherwise + */ +static bool is_dep_valid(struct kbase_jd_atom *katom) +{ + /* If there's no dependency then this is 'valid' from the perspective of + * early dependency submission */ + if (!katom) + return true; + + /* Dependency must have reached the job scheduler */ + if (katom->status < KBASE_JD_ATOM_STATE_IN_JS) + return false; + + /* If dependency has completed and has failed or will fail then it is + * not valid */ + if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && + (katom->event_code != BASE_JD_EVENT_DONE || + katom->will_fail_event_code)) + return false; + + return true; +} + +static void jd_try_submitting_deps(struct list_head *out_list, + struct kbase_jd_atom *node) +{ + int i; + + for (i = 0; i < 2; i++) { + struct list_head *pos; + + list_for_each(pos, &node->dep_head[i]) { + struct kbase_jd_atom *dep_atom = list_entry(pos, + struct kbase_jd_atom, dep_item[i]); + + if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { + /*Check if atom deps look sane*/ + bool dep0_valid = is_dep_valid( + dep_atom->dep[0].atom); + bool dep1_valid = is_dep_valid( + dep_atom->dep[1].atom); + bool dep_satisfied = true; +#ifdef CONFIG_MALI_DMA_FENCE + int dep_count; + + dep_count = kbase_fence_dep_count_read( + dep_atom); + if (likely(dep_count == -1)) { + dep_satisfied = true; + } else { + /* + * There are either still active callbacks, or + * all fences for this @dep_atom has signaled, + * but the worker that will queue the atom has + * not yet run. + * + * Wait for the fences to signal and the fence + * worker to run and handle @dep_atom. If + * @dep_atom was completed due to error on + * @katom, then the fence worker will pick up + * the complete status and error code set on + * @dep_atom above. + */ + dep_satisfied = false; + } +#endif /* CONFIG_MALI_DMA_FENCE */ + + if (dep0_valid && dep1_valid && dep_satisfied) { + trace_sysgraph(SGR_DEP_RES, + dep_atom->kctx->id, + kbase_jd_atom_id(dep_atom->kctx, + dep_atom)); + dep_atom->in_jd_list = true; + list_add(&dep_atom->jd_item, out_list); + } + } + } + } +} + +#if MALI_JIT_PRESSURE_LIMIT +/** + * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. + * + * @katom: An atom that has just finished. + * + * Read back actual just-in-time memory region usage from atoms that provide + * this information, and update the current physical page pressure. + * + * The caller must hold the kbase_jd_context.lock. + */ +static void jd_update_jit_usage(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr; + u64 used_pages; + unsigned int idx; + + lockdep_assert_held(&kctx->jctx.lock); + + /* If this atom wrote to JIT memory, find out how much it has written + * and update the usage information in the region. + */ + for (idx = 0; + idx < ARRAY_SIZE(katom->jit_ids) && katom->jit_ids[idx]; + idx++) { + enum heap_pointer { LOW = 0, HIGH, COUNT }; + size_t size_to_read; + u64 read_val; + + reg = kctx->jit_alloc[katom->jit_ids[idx]]; + + if (!reg) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has no region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (reg == KBASE_RESERVED_REG_JIT_ALLOC) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u has failed to allocate a region\n", + __func__, idx, katom->jit_ids[idx]); + continue; + } + + if (!reg->heap_info_gpu_addr) + continue; + + size_to_read = sizeof(*ptr); + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + size_to_read = sizeof(u32); + else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) + size_to_read = sizeof(u64[COUNT]); + + ptr = kbase_vmap(kctx, reg->heap_info_gpu_addr, size_to_read, + &mapping); + + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx unable to map end marker %llx\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + reg->heap_info_gpu_addr); + continue; + } + + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) { + read_val = READ_ONCE(*(u32 *)ptr); + used_pages = PFN_UP(read_val); + } else { + u64 addr_end; + + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + const unsigned long extent_bytes = reg->extent + << PAGE_SHIFT; + const u64 low_ptr = ptr[LOW]; + const u64 high_ptr = ptr[HIGH]; + + /* As either the low or high pointer could + * consume their partition and move onto the + * next chunk, we need to account for both. + * In the case where nothing has been allocated + * from the high pointer the whole chunk could + * be backed unnecessarily - but the granularity + * is the chunk size anyway and any non-zero + * offset of low pointer from the start of the + * chunk would result in the whole chunk being + * backed. + */ + read_val = max(high_ptr, low_ptr); + + /* kbase_check_alloc_sizes() already satisfies + * this, but here to avoid future maintenance + * hazards + */ + WARN_ON(!is_power_of_2(extent_bytes)); + addr_end = ALIGN(read_val, extent_bytes); + } else { + addr_end = read_val = READ_ONCE(*ptr); + } + + if (addr_end >= (reg->start_pfn << PAGE_SHIFT)) + used_pages = PFN_UP(addr_end) - reg->start_pfn; + else + used_pages = reg->used_pages; + } + + trace_mali_jit_report(katom, reg, idx, read_val, used_pages); + kbase_trace_jit_report_gpu_mem(kctx, reg, 0u); + + /* We can never have used more pages than the VA size of the + * region + */ + if (used_pages > reg->nr_pages) { + dev_warn(kctx->kbdev->dev, + "%s: JIT id[%u]=%u start=0x%llx used_pages %llx > %zx (read 0x%llx as %s%s)\n", + __func__, idx, katom->jit_ids[idx], + reg->start_pfn << PAGE_SHIFT, + used_pages, reg->nr_pages, read_val, + (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) ? + "size" : "addr", + (reg->flags & KBASE_REG_TILER_ALIGN_TOP) ? + " with align" : ""); + used_pages = reg->nr_pages; + } + /* Note: one real use case has an atom correctly reporting 0 + * pages in use. This happens in normal use-cases but may only + * happen for a few of the application's frames. + */ + + kbase_vunmap(kctx, &mapping); + + kbase_jit_report_update_pressure(kctx, reg, used_pages, 0u); + } + + kbase_jit_retry_pending_alloc(kctx); +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/* + * Perform the necessary handling of an atom that has finished running + * on the GPU. + * + * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller + * is responsible for calling kbase_finish_soft_job *before* calling this function. + * + * The caller must hold the kbase_jd_context.lock. + */ +bool jd_done_nolock(struct kbase_jd_atom *katom, + struct list_head *completed_jobs_ctx) +{ + struct kbase_context *kctx = katom->kctx; + struct list_head completed_jobs; + struct list_head runnable_jobs; + bool need_to_try_schedule_context = false; + int i; + + INIT_LIST_HEAD(&completed_jobs); + INIT_LIST_HEAD(&runnable_jobs); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + +#if MALI_JIT_PRESSURE_LIMIT + jd_update_jit_usage(katom); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + /* This is needed in case an atom is failed due to being invalid, this + * can happen *before* the jobs that the atom depends on have completed */ + for (i = 0; i < 2; i++) { + if (kbase_jd_katom_dep_atom(&katom->dep[i])) { + list_del(&katom->dep_item[i]); + kbase_jd_katom_dep_clear(&katom->dep[i]); + } + } + + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", + (void *)katom); + list_add_tail(&katom->jd_item, &completed_jobs); + + while (!list_empty(&completed_jobs)) { + katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); + list_del(completed_jobs.prev); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + for (i = 0; i < 2; i++) + jd_resolve_dep(&runnable_jobs, katom, i, + kbase_ctx_flag(kctx, KCTX_DYING)); + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_post_external_resources(katom); + + while (!list_empty(&runnable_jobs)) { + struct kbase_jd_atom *node; + + node = list_entry(runnable_jobs.next, + struct kbase_jd_atom, jd_item); + list_del(runnable_jobs.next); + node->in_jd_list = false; + + dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + node, node->status); + + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + if (node->status == KBASE_JD_ATOM_STATE_IN_JS) + continue; + + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && + !kbase_ctx_flag(kctx, KCTX_DYING)) { + need_to_try_schedule_context |= jd_run_atom(node); + } else { + node->event_code = katom->event_code; + + if (node->core_req & + BASE_JD_REQ_SOFT_JOB) { + WARN_ON(!list_empty(&node->queue)); + kbase_finish_soft_job(node); + } + node->status = KBASE_JD_ATOM_STATE_COMPLETED; + } + + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { + list_add_tail(&node->jd_item, &completed_jobs); + } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && + !node->will_fail_event_code) { + /* Node successfully submitted, try submitting + * dependencies as they may now be representable + * in JS */ + jd_try_submitting_deps(&runnable_jobs, node); + } + } + + /* Register a completed job as a disjoint event when the GPU + * is in a disjoint state (ie. being reset). + */ + kbase_disjoint_event_potential(kctx->kbdev); + if (completed_jobs_ctx) + list_add_tail(&katom->jd_item, completed_jobs_ctx); + else + kbase_event_post(kctx, katom); + + /* Decrement and check the TOTAL number of jobs. This includes + * those not tracked by the scheduler: 'not ready to run' and + * 'dependency-only' jobs. */ + if (--kctx->jctx.job_nr == 0) + wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter + * that we've got no more jobs (so we can be safely terminated) */ + } + + return need_to_try_schedule_context; +} + +KBASE_EXPORT_TEST_API(jd_done_nolock); + +#ifdef CONFIG_GPU_TRACEPOINTS +enum { + CORE_REQ_DEP_ONLY, + CORE_REQ_SOFT, + CORE_REQ_COMPUTE, + CORE_REQ_FRAGMENT, + CORE_REQ_VERTEX, + CORE_REQ_TILER, + CORE_REQ_FRAGMENT_VERTEX, + CORE_REQ_FRAGMENT_VERTEX_TILER, + CORE_REQ_FRAGMENT_TILER, + CORE_REQ_VERTEX_TILER, + CORE_REQ_UNKNOWN +}; +static const char * const core_req_strings[] = { + "Dependency Only Job", + "Soft Job", + "Compute Shader Job", + "Fragment Shader Job", + "Vertex/Geometry Shader Job", + "Tiler Job", + "Fragment Shader + Vertex/Geometry Shader Job", + "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", + "Fragment Shader + Tiler Job", + "Vertex/Geometry Shader Job + Tiler Job", + "Unknown Job" +}; +static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) +{ + if (core_req & BASE_JD_REQ_SOFT_JOB) + return core_req_strings[CORE_REQ_SOFT]; + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + return core_req_strings[CORE_REQ_COMPUTE]; + switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { + case BASE_JD_REQ_DEP: + return core_req_strings[CORE_REQ_DEP_ONLY]; + case BASE_JD_REQ_FS: + return core_req_strings[CORE_REQ_FRAGMENT]; + case BASE_JD_REQ_CS: + return core_req_strings[CORE_REQ_VERTEX]; + case BASE_JD_REQ_T: + return core_req_strings[CORE_REQ_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_TILER]; + case (BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_VERTEX_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; + } + return core_req_strings[CORE_REQ_UNKNOWN]; +} +#endif + +static bool jd_submit_atom(struct kbase_context *const kctx, + const struct base_jd_atom_v2 *const user_atom, + const struct base_jd_fragment *const user_jc_incr, + struct kbase_jd_atom *const katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_jd_context *jctx = &kctx->jctx; + int queued = 0; + int i; + int sched_prio; + bool will_fail = false; + + dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + + /* Update the TOTAL number of jobs. This includes those not tracked by + * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + jctx->job_nr++; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) + katom->start_timestamp.tv64 = 0; +#else + katom->start_timestamp = 0; +#endif + katom->udata = user_atom->udata; + katom->kctx = kctx; + katom->nr_extres = user_atom->nr_extres; + katom->extres = NULL; + katom->device_nr = user_atom->device_nr; + katom->jc = user_atom->jc; + katom->core_req = user_atom->core_req; + katom->jobslot = user_atom->jobslot; + katom->atom_flags = 0; + katom->retry_count = 0; + katom->need_cache_flush_cores_retained = 0; + katom->pre_dep = NULL; + katom->post_dep = NULL; + katom->x_pre_dep = NULL; + katom->x_post_dep = NULL; + katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; + katom->softjob_data = NULL; + + trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); + +#if MALI_JIT_PRESSURE_LIMIT + /* Older API version atoms might have random values where jit_id now + * lives, but we must maintain backwards compatibility - handle the + * issue. + */ + if (kctx->api_version < MIN_API_VERSION_WITH_JPL) { + katom->jit_ids[0] = 0; + katom->jit_ids[1] = 0; + } else { + katom->jit_ids[0] = user_atom->jit_id[0]; + katom->jit_ids[1] = user_atom->jit_id[1]; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + katom->renderpass_id = user_atom->renderpass_id; + + /* Implicitly sets katom->protected_state.enter as well. */ + katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; + + katom->age = kctx->age_count++; + + INIT_LIST_HEAD(&katom->queue); + INIT_LIST_HEAD(&katom->jd_item); +#ifdef CONFIG_MALI_DMA_FENCE + kbase_fence_dep_count_set(katom, -1); +#endif + + /* Don't do anything if there is a mess up with dependencies. + This is done in a separate cycle to check both the dependencies at ones, otherwise + it will be extra complexity to deal with 1st dependency ( just added to the list ) + if only the 2nd one has invalid config. + */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i].atom_id; + base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; + + if (dep_atom_number) { + if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && + dep_atom_type != BASE_JD_DEP_TYPE_DATA) { + katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + dev_dbg(kbdev->dev, + "Atom %p status to completed\n", + (void *)katom); + + /* Wrong dependency setup. Atom will be sent + * back to user space. Do not record any + * dependencies. */ + KBASE_TLSTREAM_TL_NEW_ATOM( + kbdev, + katom, + kbase_jd_atom_id(kctx, katom)); + KBASE_TLSTREAM_TL_RET_ATOM_CTX( + kbdev, + katom, kctx); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( + kbdev, + katom, + TL_ATOM_STATE_IDLE); + + return jd_done_nolock(katom, NULL); + } + } + } + + /* Add dependencies */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i].atom_id; + base_jd_dep_type dep_atom_type; + struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; + + dep_atom_type = user_atom->pre_dep[i].dependency_type; + kbase_jd_katom_dep_clear(&katom->dep[i]); + + if (!dep_atom_number) + continue; + + if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || + dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + + if (dep_atom->event_code == BASE_JD_EVENT_DONE) + continue; + /* don't stop this atom if it has an order dependency + * only to the failed one, try to submit it through + * the normal path + */ + if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && + dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { + continue; + } + + /* Atom has completed, propagate the error code if any */ + katom->event_code = dep_atom->event_code; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", + (void *)katom); + + /* This atom will be sent back to user space. + * Do not record any dependencies. + */ + KBASE_TLSTREAM_TL_NEW_ATOM( + kbdev, + katom, + kbase_jd_atom_id(kctx, katom)); + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, + TL_ATOM_STATE_IDLE); + + will_fail = true; + + } else { + /* Atom is in progress, add this atom to the list */ + list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); + kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); + queued = 1; + } + } + + if (will_fail) { + if (!queued) { + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + int err = kbase_prepare_soft_job(katom); + + if (err >= 0) + kbase_finish_soft_job(katom); + } + + return jd_done_nolock(katom, NULL); + } + + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* This softjob has failed due to a previous + * dependency, however we should still run the + * prepare & finish functions + */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = + BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + + katom->will_fail_event_code = katom->event_code; + return false; + } + + /* These must occur after the above loop to ensure that an atom + * that depends on a previous atom with the same number behaves + * as expected + */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + + /* For invalid priority, be most lenient and choose the default */ + sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); + if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) + sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; + katom->sched_priority = sched_prio; + + /* Create a new atom. */ + KBASE_TLSTREAM_TL_NEW_ATOM( + kbdev, + katom, + kbase_jd_atom_id(kctx, katom)); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + +#if !MALI_INCREMENTAL_RENDERING + /* Reject atoms for incremental rendering if not supported */ + if (katom->core_req & + (BASE_JD_REQ_START_RENDERPASS|BASE_JD_REQ_END_RENDERPASS)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with unsupported core_req 0x%x\n", + katom->core_req); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +#endif /* !MALI_INCREMENTAL_RENDERING */ + + if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) { + WARN_ON(katom->jc != 0); + katom->jc_fragment = *user_jc_incr; + } else if (!katom->jc && + (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + /* Reject atoms with job chain = NULL, as these cause issues + * with soft-stop + */ + dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + + /* Reject atoms with an invalid device_nr */ + if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && + (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid device_nr %d\n", + katom->device_nr); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + + /* Reject atoms with invalid core requirements */ + if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && + (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { + dev_err(kctx->kbdev->dev, + "Rejecting atom with invalid core requirements\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; + return jd_done_nolock(katom, NULL); + } + + /* Reject soft-job atom of certain types from accessing external resources */ + if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && + (((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_FENCE_WAIT) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_ALLOC) || + ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == BASE_JD_REQ_SOFT_JIT_FREE))) { + dev_err(kctx->kbdev->dev, + "Rejecting soft-job atom accessing external resources\n"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + /* handle what we need to do to access the external resources */ + if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { + /* setup failed (no access, bad resource, unknown resource types, etc.) */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + +#if !MALI_JIT_PRESSURE_LIMIT + if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) && + (user_atom->jit_id[0] || user_atom->jit_id[1])) { + /* JIT pressure limit is disabled, but we are receiving non-0 + * JIT IDs - atom is invalid. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + /* Validate the atom. Function will return error if the atom is + * malformed. + * + * Soft-jobs never enter the job scheduler but have their own initialize method. + * + * If either fail then we immediately complete the atom with an error. + */ + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { + if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } else { + /* Soft-job */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return jd_done_nolock(katom, NULL); + } + } + +#ifdef CONFIG_GPU_TRACEPOINTS + katom->work_id = atomic_inc_return(&jctx->work_id); + trace_gpu_job_enqueue(kctx->id, katom->work_id, + kbasep_map_core_reqs_to_string(katom->core_req)); +#endif + + if (queued && !IS_GPU_ATOM(katom)) + return false; + +#ifdef CONFIG_MALI_DMA_FENCE + if (kbase_fence_dep_count_read(katom) != -1) + return false; + +#endif /* CONFIG_MALI_DMA_FENCE */ + + if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + return jd_done_nolock(katom, NULL); + } + return false; + } + + if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + bool need_to_try_schedule_context; + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); + + need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); + /* If job was cancelled then resolve immediately */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + return need_to_try_schedule_context; + } + + /* This is a pure dependency. Resolve it immediately */ + return jd_done_nolock(katom, NULL); +} + +int kbase_jd_submit(struct kbase_context *kctx, + void __user *user_addr, u32 nr_atoms, u32 stride, + bool uk6_atom) +{ + struct kbase_jd_context *jctx = &kctx->jctx; + int err = 0; + int i; + bool need_to_try_schedule_context = false; + struct kbase_device *kbdev; + u32 latest_flush; + + /* + * kbase_jd_submit isn't expected to fail and so all errors with the + * jobs are reported by immediately failing them (through event system) + */ + kbdev = kctx->kbdev; + + beenthere(kctx, "%s", "Enter"); + + if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) { + dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n"); + return -EINVAL; + } + + if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && + stride != sizeof(struct base_jd_atom_v2)) { + dev_err(kbdev->dev, + "Stride %u passed to job_submit isn't supported by the kernel\n", + stride); + return -EINVAL; + } + + /* All atoms submitted in this call have the same flush ID */ + latest_flush = kbase_backend_get_current_flush_id(kbdev); + + for (i = 0; i < nr_atoms; i++) { + struct base_jd_atom_v2 user_atom; + struct base_jd_fragment user_jc_incr; + struct kbase_jd_atom *katom; + + if (copy_from_user(&user_atom, user_addr, stride) != 0) { + dev_err(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } + + if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { + dev_dbg(kbdev->dev, "No renderpass ID: use 0\n"); + user_atom.renderpass_id = 0; + } else { + /* Ensure all padding bytes are 0 for potential future + * extension + */ + size_t j; + + dev_dbg(kbdev->dev, "Renderpass ID is %d\n", + user_atom.renderpass_id); + for (j = 0; j < sizeof(user_atom.padding); j++) { + if (user_atom.padding[j]) { + dev_err(kbdev->dev, + "Bad padding byte %zu: %d\n", + j, user_atom.padding[j]); + err = -EINVAL; + break; + } + } + if (err) + break; + } + + /* In this case 'jc' is the CPU address of a struct + * instead of a GPU address of a job chain. + */ + if (user_atom.core_req & BASE_JD_REQ_END_RENDERPASS) { + if (copy_from_user(&user_jc_incr, + u64_to_user_ptr(user_atom.jc), + sizeof(user_jc_incr))) { + dev_err(kbdev->dev, + "Invalid jc address 0x%llx passed to job_submit\n", + user_atom.jc); + err = -EFAULT; + break; + } + dev_dbg(kbdev->dev, "Copied IR jobchain addresses\n"); + user_atom.jc = 0; + } + + user_addr = (void __user *)((uintptr_t) user_addr + stride); + + mutex_lock(&jctx->lock); +#ifndef compiletime_assert +#define compiletime_assert_defined +#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ +while (false) +#endif + compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == + BASE_JD_ATOM_COUNT, + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); + compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == + sizeof(user_atom.atom_number), + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); +#ifdef compiletime_assert_defined +#undef compiletime_assert +#undef compiletime_assert_defined +#endif + katom = &jctx->atoms[user_atom.atom_number]; + + /* Record the flush ID for the cache flush optimisation */ + katom->flush_id = latest_flush; + + while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { + /* Atom number is already in use, wait for the atom to + * complete + */ + mutex_unlock(&jctx->lock); + + /* This thread will wait for the atom to complete. Due + * to thread scheduling we are not sure that the other + * thread that owns the atom will also schedule the + * context, so we force the scheduler to be active and + * hence eventually schedule this context at some point + * later. + */ + kbase_js_sched_all(kbdev); + + if (wait_event_killable(katom->completed, + katom->status == + KBASE_JD_ATOM_STATE_UNUSED) != 0) { + /* We're being killed so the result code + * doesn't really matter + */ + return 0; + } + mutex_lock(&jctx->lock); + } + + need_to_try_schedule_context |= jd_submit_atom(kctx, &user_atom, + &user_jc_incr, katom); + + /* Register a completed job as a disjoint event when the GPU is in a disjoint state + * (ie. being reset). + */ + kbase_disjoint_event_potential(kbdev); + + mutex_unlock(&jctx->lock); + } + + if (need_to_try_schedule_context) + kbase_js_sched_all(kbdev); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_submit); + +void kbase_jd_done_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + u64 cache_jc = katom->jc; + struct kbasep_js_atom_retained_state katom_retained_state; + bool context_idle; + base_jd_core_req core_req = katom->core_req; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + jctx = &kctx->jctx; + kbdev = kctx->kbdev; + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + + kbase_backend_complete_wq(kbdev, katom); + + /* + * Begin transaction on JD context and JS context + */ + mutex_lock(&jctx->lock); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE); + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* This worker only gets called on contexts that are scheduled *in*. This is + * because it only happens in response to an IRQ from a job that was + * running. + */ + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (katom->event_code == BASE_JD_EVENT_STOPPED) { + unsigned long flags; + + dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + (void *)katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + (void *)katom); + kbase_js_unpull(kctx, katom); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&jctx->lock); + + return; + } + + if ((katom->event_code != BASE_JD_EVENT_DONE) && + (!kbase_ctx_flag(katom->kctx, KCTX_DYING))) { + meson_gpu_fault ++; + dev_err(kbdev->dev, + "t6xx: GPU fault 0x%02lx from job slot %d\n", + (unsigned long)katom->event_code, + katom->slot_nr); + } + + /* Retain state before the katom disappears */ + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + + context_idle = kbase_js_complete_atom_wq(kctx, katom); + + KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); + + kbasep_js_remove_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ + jd_done_nolock(katom, &kctx->completed_jobs); + + /* katom may have been freed now, do not use! */ + + if (context_idle) { + unsigned long flags; + + context_idle = false; + mutex_lock(&js_devdata->queue_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* If kbase_sched() has scheduled this context back in then + * KCTX_ACTIVE will have been set after we marked it as + * inactive, and another pm reference will have been taken, so + * drop our reference. But do not call kbase_jm_idle_ctx(), as + * the context is active and fast-starting is allowed. + * + * If an atom has been fast-started then kctx->atoms_pulled will + * be non-zero but KCTX_ACTIVE will still be false (as the + * previous pm reference has been inherited). Do NOT drop our + * reference, as it has been re-used, and leave the context as + * active. + * + * If no new atoms have been started then KCTX_ACTIVE will still + * be false and atoms_pulled will be zero, so drop the reference + * and call kbase_jm_idle_ctx(). + * + * As the checks are done under both the queue_mutex and + * hwaccess_lock is should be impossible for this to race + * with the scheduler code. + */ + if (kbase_ctx_flag(kctx, KCTX_ACTIVE) || + !atomic_read(&kctx->atoms_pulled)) { + /* Calling kbase_jm_idle_ctx() here will ensure that + * atoms are not fast-started when we drop the + * hwaccess_lock. This is not performed if + * KCTX_ACTIVE is set as in that case another pm + * reference has been taken and a fast-start would be + * valid. + */ + if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) + kbase_jm_idle_ctx(kbdev, kctx); + context_idle = true; + } else { + kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->queue_mutex); + } + + /* + * Transaction complete + */ + mutex_unlock(&jctx->lock); + + /* Job is now no longer running, so can now safely release the context + * reference, and handle any actions that were logged against the atom's retained state */ + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + + kbase_js_sched_all(kbdev); + + if (!atomic_dec_return(&kctx->work_count)) { + /* If worker now idle then post all events that jd_done_nolock() + * has queued */ + mutex_lock(&jctx->lock); + while (!list_empty(&kctx->completed_jobs)) { + struct kbase_jd_atom *atom = list_entry( + kctx->completed_jobs.next, + struct kbase_jd_atom, jd_item); + list_del(kctx->completed_jobs.next); + + kbase_event_post(kctx, atom); + } + mutex_unlock(&jctx->lock); + } + + kbase_backend_complete_wq_post_sched(kbdev, core_req); + + if (context_idle) + kbase_pm_context_idle(kbdev); + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); + + dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + (void *)katom, (void *)kctx); +} + +/** + * jd_cancel_worker - Work queue job cancel function. + * @data: a &struct work_struct + * + * Only called as part of 'Zapping' a context (which occurs on termination). + * Operates serially with the kbase_jd_done_worker() on the work queue. + * + * This can only be called on contexts that aren't scheduled. + * + * We don't need to release most of the resources that would occur on + * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be + * running (by virtue of only being called on contexts that aren't + * scheduled). + */ +static void jd_cancel_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + bool need_to_try_schedule_context; + bool attr_state_changed; + struct kbase_device *kbdev; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + kbdev = kctx->kbdev; + jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + + /* This only gets called on contexts that are scheduled out. Hence, we must + * make sure we don't de-ref the number of running jobs (there aren't + * any), nor must we try to schedule out the context (it's already + * scheduled out). + */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + /* Scheduler: Remove the job from the system */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&jctx->lock); + + need_to_try_schedule_context = jd_done_nolock(katom, NULL); + /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to + * schedule the context. There's also no need for the jsctx_mutex to have been taken + * around this too. */ + KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); + + /* katom may have been freed now, do not use! */ + mutex_unlock(&jctx->lock); + + if (attr_state_changed) + kbase_js_sched_all(kbdev); +} + +/** + * kbase_jd_done - Complete a job that has been removed from the Hardware + * @katom: atom which has been completed + * @slot_nr: slot the atom was on + * @end_timestamp: completion time + * @done_code: completion code + * + * This must be used whenever a job has been removed from the Hardware, e.g.: + * An IRQ indicates that the job finished (for both error and 'done' codes), or + * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. + * + * Some work is carried out immediately, and the rest is deferred onto a + * workqueue + * + * Context: + * This can be called safely from atomic context. + * The caller must hold kbdev->hwaccess_lock + */ +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, + ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) +{ + struct kbase_context *kctx; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + + KBASE_KTRACE_ADD_JM(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + + kbase_job_check_leave_disjoint(kbdev, katom); + + katom->slot_nr = slot_nr; + + atomic_inc(&kctx->work_count); + +#ifdef CONFIG_DEBUG_FS + /* a failed job happened and is waiting for dumping*/ + if (!katom->will_fail_event_code && + kbase_debug_job_fault_process(katom, katom->event_code)) + return; +#endif + + WARN_ON(work_pending(&katom->work)); + INIT_WORK(&katom->work, kbase_jd_done_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +KBASE_EXPORT_TEST_API(kbase_jd_done); + +void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(NULL != kctx); + + dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); + KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is not scheduled */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + WARN_ON(work_pending(&katom->work)); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + INIT_WORK(&katom->work, jd_cancel_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + + +void kbase_jd_zap_context(struct kbase_context *kctx) +{ + struct kbase_jd_atom *katom; + struct list_head *entry, *tmp; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; + + KBASE_KTRACE_ADD_JM(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + + kbase_js_zap_context(kctx); + + mutex_lock(&kctx->jctx.lock); + + /* + * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are + * queued outside the job scheduler. + */ + + del_timer_sync(&kctx->soft_job_timeout); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + katom = list_entry(entry, struct kbase_jd_atom, queue); + kbase_cancel_soft_job(katom); + } + + +#ifdef CONFIG_MALI_DMA_FENCE + kbase_dma_fence_cancel_all_atoms(kctx); +#endif + + mutex_unlock(&kctx->jctx.lock); + +#ifdef CONFIG_MALI_DMA_FENCE + /* Flush dma-fence workqueue to ensure that any callbacks that may have + * been queued are done before continuing. + */ + flush_workqueue(kctx->dma_fence.wq); +#endif + +#ifdef CONFIG_DEBUG_FS + kbase_debug_job_fault_kctx_unblock(kctx); +#endif + + kbase_jm_wait_for_zero_jobs(kctx); +} + +KBASE_EXPORT_TEST_API(kbase_jd_zap_context); + +int kbase_jd_init(struct kbase_context *kctx) +{ + int i; + int mali_err = 0; + + KBASE_DEBUG_ASSERT(kctx); + + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (NULL == kctx->jctx.job_done_wq) { + mali_err = -ENOMEM; + goto out1; + } + + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { + init_waitqueue_head(&kctx->jctx.atoms[i].completed); + + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); + + /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ + kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; + kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + +#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) + kctx->jctx.atoms[i].dma_fence.context = + dma_fence_context_alloc(1); + atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); +#endif + } + + for (i = 0; i < BASE_JD_RP_COUNT; i++) + kctx->jctx.renderpasses[i].state = KBASE_JD_RP_COMPLETE; + + mutex_init(&kctx->jctx.lock); + + init_waitqueue_head(&kctx->jctx.zero_jobs_wait); + + spin_lock_init(&kctx->jctx.tb_lock); + + kctx->jctx.job_nr = 0; + INIT_LIST_HEAD(&kctx->completed_jobs); + atomic_set(&kctx->work_count, 0); + + return 0; + + out1: + return mali_err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_init); + +void kbase_jd_exit(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + /* Work queue is emptied by this */ + destroy_workqueue(kctx->jctx.job_done_wq); +} + +KBASE_EXPORT_TEST_API(kbase_jd_exit); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c new file mode 100644 index 0000000..e9a161f --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -0,0 +1,244 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifdef CONFIG_DEBUG_FS + +#include +#include +#include +#include +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#include +#endif +#include + +struct kbase_jd_debugfs_depinfo { + u8 id; + char type; +}; + +static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, + struct seq_file *sfile) +{ +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + struct kbase_sync_fence_info info; + int res; + + switch (atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + res = kbase_sync_fence_out_info_get(atom, &info); + if (res == 0) + seq_printf(sfile, "Sa([%p]%d) ", + info.fence, info.status); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + res = kbase_sync_fence_in_info_get(atom, &info); + if (res == 0) + seq_printf(sfile, "Wa([%p]%d) ", + info.fence, info.status); + break; + default: + break; + } +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + +#ifdef CONFIG_MALI_DMA_FENCE + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + struct kbase_fence_cb *cb; + + if (atom->dma_fence.fence) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = atom->dma_fence.fence; +#else + struct dma_fence *fence = atom->dma_fence.fence; +#endif + + seq_printf(sfile, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) + "Sd(%u#%u: %s) ", +#else + "Sd(%llu#%u: %s) ", +#endif + fence->context, + fence->seqno, + dma_fence_is_signaled(fence) ? + "signaled" : "active"); + } + + list_for_each_entry(cb, &atom->dma_fence.callbacks, + node) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = cb->fence; +#else + struct dma_fence *fence = cb->fence; +#endif + + seq_printf(sfile, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) + "Wd(%u#%u: %s) ", +#else + "Wd(%llu#%u: %s) ", +#endif + fence->context, + fence->seqno, + dma_fence_is_signaled(fence) ? + "signaled" : "active"); + } + } +#endif /* CONFIG_MALI_DMA_FENCE */ + +} + +static void kbasep_jd_debugfs_atom_deps( + struct kbase_jd_debugfs_depinfo *deps, + struct kbase_jd_atom *atom) +{ + struct kbase_context *kctx = atom->kctx; + int i; + + for (i = 0; i < 2; i++) { + deps[i].id = (unsigned)(atom->dep[i].atom ? + kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0); + + switch (atom->dep[i].dep_type) { + case BASE_JD_DEP_TYPE_INVALID: + deps[i].type = ' '; + break; + case BASE_JD_DEP_TYPE_DATA: + deps[i].type = 'D'; + break; + case BASE_JD_DEP_TYPE_ORDER: + deps[i].type = '>'; + break; + default: + deps[i].type = '?'; + break; + } + } +} +/** + * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to get the contents of the JD atoms debugfs file. + * This is a report of all atoms managed by kbase_jd_context.atoms + * + * Return: 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *kctx = sfile->private; + struct kbase_jd_atom *atoms; + unsigned long irq_flags; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* Print version */ + seq_printf(sfile, "v%u\n", MALI_JD_DEBUGFS_VERSION); + + /* Print U/K API version */ + seq_printf(sfile, "ukv%u.%u\n", BASE_UK_VERSION_MAJOR, + BASE_UK_VERSION_MINOR); + + /* Print table heading */ + seq_puts(sfile, " ID, Core req, St, CR, Predeps, Start time, Additional info...\n"); + + atoms = kctx->jctx.atoms; + /* General atom states */ + mutex_lock(&kctx->jctx.lock); + /* JS-related states */ + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); + for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { + struct kbase_jd_atom *atom = &atoms[i]; + s64 start_timestamp = 0; + struct kbase_jd_debugfs_depinfo deps[2]; + + if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) + continue; + + /* start_timestamp is cleared as soon as the atom leaves UNUSED state + * and set before a job is submitted to the h/w, a non-zero value means + * it is valid */ + if (ktime_to_ns(atom->start_timestamp)) + start_timestamp = ktime_to_ns( + ktime_sub(ktime_get(), atom->start_timestamp)); + + kbasep_jd_debugfs_atom_deps(deps, atom); + + seq_printf(sfile, + "%3u, %8x, %2u, %c%3u %c%3u, %20lld, ", + i, atom->core_req, atom->status, + deps[0].type, deps[0].id, + deps[1].type, deps[1].id, + start_timestamp); + + + kbase_jd_debugfs_fence_info(atom, sfile); + + seq_puts(sfile, "\n"); + } + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + + +/** + * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file + * @in: &struct inode pointer + * @file: &struct file pointer + * + * Return: file descriptor + */ +static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); +} + +static const struct file_operations kbasep_jd_debugfs_atoms_fops = { + .owner = THIS_MODULE, + .open = kbasep_jd_debugfs_atoms_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) +{ + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || + WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + /* Expose all atoms */ + debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, + &kbasep_jd_debugfs_atoms_fops); + +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h new file mode 100644 index 0000000..697bdef --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_jd_debugfs.h + * Header file for job dispatcher-related entries in debugfs + */ + +#ifndef _KBASE_JD_DEBUGFS_H +#define _KBASE_JD_DEBUGFS_H + +#include + +#define MALI_JD_DEBUGFS_VERSION 3 + +/* Forward declarations */ +struct kbase_context; + +/** + * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system + * + * @kctx Pointer to kbase_context + */ +void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); + +#endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c new file mode 100644 index 0000000..3f17dd7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -0,0 +1,151 @@ +/* + * + * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * HW access job manager common APIs + */ + +#include +#include "mali_kbase_hwaccess_jm.h" +#include "mali_kbase_jm.h" + +/** + * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot + * @js on the active context. + * @kbdev: Device pointer + * @js: Job slot to run on + * @nr_jobs_to_submit: Number of jobs to attempt to submit + * + * Return: true if slot can still be submitted on, false if slot is now full. + */ +static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, + int nr_jobs_to_submit) +{ + struct kbase_context *kctx; + int i; + + kctx = kbdev->hwaccess.active_kctx[js]; + dev_dbg(kbdev->dev, + "Trying to run the next %d jobs in kctx %p (s:%d)\n", + nr_jobs_to_submit, (void *)kctx, js); + + if (!kctx) + return true; + + for (i = 0; i < nr_jobs_to_submit; i++) { + struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); + + if (!katom) + return true; /* Context has no jobs on this slot */ + + kbase_backend_run_atom(kbdev, katom); + } + + dev_dbg(kbdev->dev, "Slot ringbuffer should now be full (s:%d)\n", js); + return false; +} + +u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) +{ + u32 ret_mask = 0; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "JM kick slot mask 0x%x\n", js_mask); + + while (js_mask) { + int js = ffs(js_mask) - 1; + int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); + + if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) + ret_mask |= (1 << js); + + js_mask &= ~(1 << js); + } + + dev_dbg(kbdev->dev, "Can still submit to mask 0x%x\n", ret_mask); + return ret_mask; +} + +void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!down_trylock(&js_devdata->schedule_sem)) { + kbase_jm_kick(kbdev, js_mask); + up(&js_devdata->schedule_sem); + } +} + +void kbase_jm_try_kick_all(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!down_trylock(&js_devdata->schedule_sem)) { + kbase_jm_kick_all(kbdev); + up(&js_devdata->schedule_sem); + } +} + +void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, js); + kbdev->hwaccess.active_kctx[js] = NULL; + } + } +} + +struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_STOPPED && + katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { + return kbase_js_complete_atom(katom, NULL); + } else { + kbase_js_unpull(katom->kctx, katom); + return NULL; + } +} + +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return kbase_js_complete_atom(katom, end_timestamp); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h new file mode 100644 index 0000000..a3c7744 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h @@ -0,0 +1,115 @@ +/* + * + * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +/* + * Job manager common APIs + */ + +#ifndef _KBASE_JM_H_ +#define _KBASE_JM_H_ + +/** + * kbase_jm_kick() - Indicate that there are jobs ready to run. + * @kbdev: Device pointer + * @js_mask: Mask of the job slots that can be pulled from. + * + * Caller must hold the hwaccess_lock and schedule_sem semaphore + * + * Return: Mask of the job slots that can still be submitted to. + */ +u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); + +/** + * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job + * slots. + * @kbdev: Device pointer + * + * Caller must hold the hwaccess_lock and schedule_sem semaphore + * + * Return: Mask of the job slots that can still be submitted to. + */ +static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) +{ + return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +/** + * kbase_jm_try_kick - Attempt to call kbase_jm_kick + * @kbdev: Device pointer + * @js_mask: Mask of the job slots that can be pulled from + * Context: Caller must hold hwaccess_lock + * + * If schedule_sem can be immediately obtained then this function will call + * kbase_jm_kick() otherwise it will do nothing. + */ +void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); + +/** + * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all + * @kbdev: Device pointer + * Context: Caller must hold hwaccess_lock + * + * If schedule_sem can be immediately obtained then this function will call + * kbase_jm_kick_all() otherwise it will do nothing. + */ +void kbase_jm_try_kick_all(struct kbase_device *kbdev); + +/** + * kbase_jm_idle_ctx() - Mark a context as idle. + * @kbdev: Device pointer + * @kctx: Context to mark as idle + * + * No more atoms will be pulled from this context until it is marked as active + * by kbase_js_use_ctx(). + * + * The context should have no atoms currently pulled from it + * (kctx->atoms_pulled == 0). + * + * Caller must hold the hwaccess_lock + */ +void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has + * been soft-stopped or will fail due to a + * dependency + * @kbdev: Device pointer + * @katom: Atom that has been stopped or will be failed + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ +struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_jm_complete() - Complete an atom + * @kbdev: Device pointer + * @katom: Atom that has completed + * @end_timestamp: Timestamp of atom completion + * + * Return: Atom that has now been unblocked and can now be run, or NULL if none + */ +struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, ktime_t *end_timestamp); + +#endif /* _KBASE_JM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c new file mode 100644 index 0000000..0b0c5bf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -0,0 +1,3760 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/* + * Job Scheduler Implementation + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mali_kbase_jm.h" +#include "mali_kbase_hwaccess_jm.h" + +/* + * Private types + */ + +/* Bitpattern indicating the result of releasing a context */ +enum { + /* The context was descheduled - caller should try scheduling in a new + * one to keep the runpool full */ + KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), + /* Ctx attributes were changed - caller should try scheduling all + * contexts */ + KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) +}; + +typedef u32 kbasep_js_release_result; + +const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { + KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ + KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ + KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ +}; + +const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { + BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ + BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ + BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ +}; + + +/* + * Private function prototypes + */ +static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + kbasep_js_ctx_job_cb callback); + +/* Helper for ktrace */ +#if KBASE_KTRACE_ENABLE +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} +#else /* KBASE_KTRACE_ENABLE */ +static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) +{ + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_KTRACE_ENABLE */ + +/* + * Private functions + */ + +/** + * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements + * @features: JSn_FEATURE register value + * + * Given a JSn_FEATURE register value returns the core requirements that match + * + * Return: Core requirement bit mask + */ +static base_jd_core_req core_reqs_from_jsn_features(u16 features) +{ + base_jd_core_req core_req = 0u; + + if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) + core_req |= BASE_JD_REQ_V; + + if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) + core_req |= BASE_JD_REQ_CF; + + if ((features & JS_FEATURE_COMPUTE_JOB) != 0) + core_req |= BASE_JD_REQ_CS; + + if ((features & JS_FEATURE_TILER_JOB) != 0) + core_req |= BASE_JD_REQ_T; + + if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) + core_req |= BASE_JD_REQ_FS; + + return core_req; +} + +static void kbase_js_sync_timers(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); +} + +/** + * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority to check. + * + * Return true if there are no atoms to pull. There may be running atoms in the + * ring buffer even if there are no atoms to pull. It is also possible for the + * ring buffer to be full (with running atoms) when this functions returns + * true. + * + * Return: true if there are no atoms to pull, false otherwise. + */ +static inline bool +jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) +{ + bool none_to_pull; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); + + dev_dbg(kctx->kbdev->dev, + "Slot %d (prio %d) is %spullable in kctx %p\n", + js, prio, none_to_pull ? "not " : "", kctx); + + return none_to_pull; +} + +/** + * jsctx_rb_none_to_pull(): - Check if all priority ring buffers have no + * pullable atoms + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * + * Caller must hold hwaccess_lock + * + * Return: true if the ring buffers for all priorities have no pullable atoms, + * false otherwise. + */ +static inline bool +jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) +{ + int prio; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) + return false; + } + + return true; +} + +/** + * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. + * @kctx: Pointer to kbase context with the queue. + * @js: Job slot id to iterate. + * @prio: Priority id to iterate. + * @callback: Function pointer to callback. + * + * Iterate over a queue and invoke @callback for each entry in the queue, and + * remove the entry from the queue. + * + * If entries are added to the queue while this is running those entries may, or + * may not be covered. To ensure that all entries in the buffer have been + * enumerated when this function returns jsctx->lock must be held when calling + * this function. + * + * The HW access lock must always be held when calling this function. + */ +static void +jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, + kbasep_js_ctx_job_cb callback) +{ + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { + struct rb_node *node = rb_first(&queue->runnable_tree); + struct kbase_jd_atom *entry = rb_entry(node, + struct kbase_jd_atom, runnable_tree_node); + + rb_erase(node, &queue->runnable_tree); + callback(kctx->kbdev, entry); + + /* Runnable end-of-renderpass atoms can also be in the linked + * list of atoms blocked on cross-slot dependencies. Remove them + * to avoid calling the callback twice. + */ + if (entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) { + WARN_ON(!(entry->core_req & + BASE_JD_REQ_END_RENDERPASS)); + dev_dbg(kctx->kbdev->dev, + "Del runnable atom %p from X_DEP list\n", + (void *)entry); + + list_del(&entry->queue); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + } + } + + while (!list_empty(&queue->x_dep_head)) { + struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, + struct kbase_jd_atom, queue); + + WARN_ON(!(entry->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + dev_dbg(kctx->kbdev->dev, + "Del blocked atom %p from X_DEP list\n", + (void *)entry); + + list_del(queue->x_dep_head.next); + entry->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + + callback(kctx->kbdev, entry); + } +} + +/** + * jsctx_queue_foreach(): - Execute callback for each entry in every queue + * @kctx: Pointer to kbase context with queue. + * @js: Job slot id to iterate. + * @callback: Function pointer to callback. + * + * Iterate over all the different priorities, and for each call + * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback + * for each entry, and remove the entry from the queue. + */ +static inline void +jsctx_queue_foreach(struct kbase_context *kctx, int js, + kbasep_js_ctx_job_cb callback) +{ + int prio; + + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + jsctx_queue_foreach_prio(kctx, js, prio, callback); +} + +/** + * jsctx_rb_peek_prio(): - Check buffer and get next atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority id to check. + * + * Check the ring buffer for the specified @js and @prio and return a pointer to + * the next atom, unless the ring buffer is empty. + * + * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + struct rb_node *node; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + dev_dbg(kctx->kbdev->dev, + "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + (void *)kctx, prio, js); + + node = rb_first(&rb->runnable_tree); + if (!node) { + dev_dbg(kctx->kbdev->dev, "Tree is empty\n"); + return NULL; + } + + return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); +} + +/** + * jsctx_rb_peek(): - Check all priority buffers and get next atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * + * Check the ring buffers for all priorities, starting from + * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a + * pointer to the next atom, unless all the priority's ring buffers are empty. + * + * Caller must hold the hwaccess_lock. + * + * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek(struct kbase_context *kctx, int js) +{ + int prio; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_jd_atom *katom; + + katom = jsctx_rb_peek_prio(kctx, js, prio); + if (katom) + return katom; + } + + return NULL; +} + +/** + * jsctx_rb_pull(): - Mark atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to pull. + * + * Mark an atom previously obtained from jsctx_rb_peek() as running. + * + * @katom must currently be at the head of the ring buffer. + */ +static inline void +jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + (void *)katom, (void *)kctx); + + /* Atoms must be pulled in the correct order. */ + WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); + + rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); +} + +#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) + +static void +jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + + while (*new) { + struct kbase_jd_atom *entry = container_of(*new, + struct kbase_jd_atom, runnable_tree_node); + + parent = *new; + if (LESS_THAN_WRAP(katom->age, entry->age)) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&katom->runnable_tree_node, parent, new); + rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY); +} + +/** + * jsctx_rb_unpull(): - Undo marking of atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to unpull. + * + * Undo jsctx_rb_pull() and put @katom back in the queue. + * + * jsctx_rb_unpull() must be called on atoms in the same order the atoms were + * pulled. + */ +static inline void +jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + jsctx_tree_add(kctx, katom); +} + +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, + int js, + bool is_scheduled); +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js); + +/* + * Functions private to KBase ('Protected' functions) + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev) +{ + struct kbasep_js_device_data *jsdd; + int i, j; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + jsdd = &kbdev->js_data; + +#ifdef CONFIG_MALI_DEBUG + /* Soft-stop will be disabled on a single context by default unless + * softstop_always is set */ + jsdd->softstop_always = false; +#endif /* CONFIG_MALI_DEBUG */ + jsdd->nr_all_contexts_running = 0; + jsdd->nr_user_contexts_running = 0; + jsdd->nr_contexts_pullable = 0; + atomic_set(&jsdd->nr_contexts_runnable, 0); + /* No ctx allowed to submit */ + jsdd->runpool_irq.submit_allowed = 0u; + memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, + sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); + memset(jsdd->runpool_irq.slot_affinities, 0, + sizeof(jsdd->runpool_irq.slot_affinities)); + memset(jsdd->runpool_irq.slot_affinity_refcount, 0, + sizeof(jsdd->runpool_irq.slot_affinity_refcount)); + INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); + + /* Config attributes */ + jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; + jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; + jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; + jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; + jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; + jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; + jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; + jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; + jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; + jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; + atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); + + dev_dbg(kbdev->dev, "JS Config Attribs: "); + dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", + jsdd->scheduling_period_ns); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", + jsdd->soft_stop_ticks); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", + jsdd->soft_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", + jsdd->hard_stop_ticks_ss); + dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", + jsdd->hard_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", + jsdd->hard_stop_ticks_dumping); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", + jsdd->gpu_reset_ticks_ss); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", + jsdd->gpu_reset_ticks_cl); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", + jsdd->gpu_reset_ticks_dumping); + dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", + jsdd->ctx_timeslice_ns); + dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", + atomic_read(&jsdd->soft_job_timeout_ms)); + + if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && + jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && + jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && + jsdd->hard_stop_ticks_dumping < + jsdd->gpu_reset_ticks_dumping)) { + dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); + return -EINVAL; + } + +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS + dev_dbg(kbdev->dev, "Job Scheduling Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", + jsdd->soft_stop_ticks, + jsdd->scheduling_period_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_HARD_STOPS + dev_dbg(kbdev->dev, "Job Scheduling Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", + jsdd->hard_stop_ticks_ss, + jsdd->hard_stop_ticks_dumping, + jsdd->scheduling_period_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS + dev_dbg(kbdev->dev, "Note: The JS tick timer (if coded) will still be run, but do nothing."); +#endif + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) + jsdd->js_reqs[i] = core_reqs_from_jsn_features( + kbdev->gpu_props.props.raw_props.js_features[i]); + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + + mutex_init(&jsdd->runpool_mutex); + mutex_init(&jsdd->queue_mutex); + sema_init(&jsdd->schedule_sem, 1); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { + for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); + INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); + } + } + + return 0; +} + +void kbasep_js_devdata_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbasep_js_devdata_term(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + /* The caller must de-register all contexts before calling this + */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); + KBASE_DEBUG_ASSERT(memcmp( + js_devdata->runpool_irq.ctx_attr_ref_count, + zero_ctx_attr_ref_count, + sizeof(zero_ctx_attr_ref_count)) == 0); + CSTD_UNUSED(zero_ctx_attr_ref_count); +} + +int kbasep_js_kctx_init(struct kbase_context *const kctx) +{ + struct kbase_device *kbdev; + struct kbasep_js_kctx_info *js_kctx_info; + int i, j; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) + INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); + + js_kctx_info = &kctx->jctx.sched_info; + + kctx->slots_pullable = 0; + js_kctx_info->ctx.nr_jobs = 0; + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + kbase_ctx_flag_clear(kctx, KCTX_DYING); + memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, + sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + + /* Initially, the context is disabled from submission until the create + * flags are set */ + kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + mutex_init(&js_kctx_info->ctx.jsctx_mutex); + + init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); + + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { + INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); + kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; + } + } + + return 0; +} + +void kbasep_js_kctx_term(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + struct kbasep_js_kctx_info *js_kctx_info; + int js; + bool update_ctx_count = false; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_kctx_info = &kctx->jctx.sched_info; + + /* The caller must de-register all jobs before calling this */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); + + mutex_lock(&kbdev->js_data.queue_mutex); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)) { + WARN_ON(atomic_read(&kbdev->js_data.nr_contexts_runnable) <= 0); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + update_ctx_count = true; + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + } + + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kbdev->js_data.queue_mutex); + + if (update_ctx_count) { + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); + } +} + +/** + * kbase_js_ctx_list_add_pullable_nolock - Variant of + * kbase_jd_ctx_list_add_pullable() + * where the caller must hold + * hwaccess_lock + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + (void *)kctx, js); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + + if (!kctx->slots_pullable) { + kbdev->js_data.nr_contexts_pullable++; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable |= (1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_add_pullable_head_nolock - Variant of + * kbase_js_ctx_list_add_pullable_head() + * where the caller must hold + * hwaccess_lock + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head_nolock( + struct kbase_device *kbdev, struct kbase_context *kctx, int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + (void *)kctx, js); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][kctx->priority]); + + if (!kctx->slots_pullable) { + kbdev->js_data.nr_contexts_pullable++; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable |= (1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_add_pullable_head - Add context to the head of the + * per-slot pullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the head. + * + * This function should be used when a context has been scheduled, but no jobs + * can currently be pulled from it. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +/** + * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the + * per-slot unpullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * The context must already be on the per-slot pullable queue. It will be + * removed from the pullable queue before being added to the unpullable queue. + * + * This function should be used when a context has been pulled from, and there + * are no jobs remaining on the specified slot. + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + (void *)kctx, js); + + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_unpullable[js][kctx->priority]); + + if (kctx->slots_pullable == (1 << js)) { + kbdev->js_data.nr_contexts_pullable--; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable &= ~(1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable + * or unpullable context queues + * @kbdev: Device pointer + * @kctx: Context to remove from queue + * @js: Job slot to use + * + * The context must already be on one of the queues. + * + * This function should be used when a context has no jobs on the GPU, and no + * jobs remaining for the specified slot. + * + * Caller must hold hwaccess_lock + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + if (kctx->slots_pullable == (1 << js)) { + kbdev->js_data.nr_contexts_pullable--; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + } + kctx->slots_pullable &= ~(1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() + * where the caller must hold + * hwaccess_lock + * @kbdev: Device pointer + * @js: Job slot to use + * + * Caller must hold hwaccess_lock + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( + struct kbase_device *kbdev, + int js) +{ + struct kbase_context *kctx; + int i; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) + continue; + + kctx = list_entry(kbdev->js_data.ctx_list_pullable[js][i].next, + struct kbase_context, + jctx.sched_info.ctx.ctx_list_entry[js]); + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + dev_dbg(kbdev->dev, + "Popped %p from the pullable queue (s:%d)\n", + (void *)kctx, js); + return kctx; + } + return NULL; +} + +/** + * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable + * queue. + * @kbdev: Device pointer + * @js: Job slot to use + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head( + struct kbase_device *kbdev, int js) +{ + struct kbase_context *kctx; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return kctx; +} + +/** + * kbase_js_ctx_pullable - Return if a context can be pulled from on the + * specified slot + * @kctx: Context pointer + * @js: Job slot to use + * @is_scheduled: true if the context is currently scheduled + * + * Caller must hold hwaccess_lock + * + * Return: true if context can be pulled from on specified slot + * false otherwise + */ +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, + bool is_scheduled) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_jd_atom *katom; + struct kbase_device *kbdev = kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + js_devdata = &kbdev->js_data; + + if (is_scheduled) { + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); + return false; + } + } + katom = jsctx_rb_peek(kctx, js); + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); + return false; /* No pullable atoms */ + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return false; + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + (void *)katom); + return false; /* next atom blocked */ + } + if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return false; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return false; + } + } + + dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + + return true; +} + +static bool kbase_js_dep_validate(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool ret = true; + bool has_dep = false, has_x_dep = false; + int js = kbase_js_get_slot(kbdev, katom); + int prio = katom->sched_priority; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + + if (dep_atom) { + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + int dep_prio = dep_atom->sched_priority; + + dev_dbg(kbdev->dev, + "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, dep_js); + + /* Dependent atom must already have been submitted */ + if (!(dep_atom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + dev_dbg(kbdev->dev, + "Blocker not submitted yet\n"); + ret = false; + break; + } + + /* Dependencies with different priorities can't + be represented in the ringbuffer */ + if (prio != dep_prio) { + dev_dbg(kbdev->dev, + "Different atom priorities\n"); + ret = false; + break; + } + + if (js == dep_js) { + /* Only one same-slot dependency can be + * represented in the ringbuffer */ + if (has_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot deps\n"); + ret = false; + break; + } + /* Each dependee atom can only have one + * same-slot dependency */ + if (dep_atom->post_dep) { + dev_dbg(kbdev->dev, + "Too many same-slot successors\n"); + ret = false; + break; + } + has_dep = true; + } else { + /* Only one cross-slot dependency can be + * represented in the ringbuffer */ + if (has_x_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot deps\n"); + ret = false; + break; + } + /* Each dependee atom can only have one + * cross-slot dependency */ + if (dep_atom->x_post_dep) { + dev_dbg(kbdev->dev, + "Too many cross-slot successors\n"); + ret = false; + break; + } + /* The dependee atom can not already be in the + * HW access ringbuffer */ + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + dev_dbg(kbdev->dev, + "Blocker already in ringbuffer (state:%d)\n", + dep_atom->gpu_rb_state); + ret = false; + break; + } + /* The dependee atom can not already have + * completed */ + if (dep_atom->status != + KBASE_JD_ATOM_STATE_IN_JS) { + dev_dbg(kbdev->dev, + "Blocker already completed (status:%d)\n", + dep_atom->status); + ret = false; + break; + } + + has_x_dep = true; + } + + /* Dependency can be represented in ringbuffers */ + } + } + + /* If dependencies can be represented by ringbuffer then clear them from + * atom structure */ + if (ret) { + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + + if (dep_atom) { + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + + dev_dbg(kbdev->dev, + "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + i, (void *)katom, js, (void *)dep_atom, + dep_js); + + if ((js != dep_js) && + (dep_atom->status != + KBASE_JD_ATOM_STATE_COMPLETED) + && (dep_atom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED) + && (dep_atom->status != + KBASE_JD_ATOM_STATE_UNUSED)) { + + katom->atom_flags |= + KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + (void *)katom); + + katom->x_pre_dep = dep_atom; + dep_atom->x_post_dep = katom; + if (kbase_jd_katom_dep_type( + &katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA) + katom->atom_flags |= + KBASE_KATOM_FLAG_FAIL_BLOCKER; + } + if ((kbase_jd_katom_dep_type(&katom->dep[i]) + == BASE_JD_DEP_TYPE_DATA) && + (js == dep_js)) { + katom->pre_dep = dep_atom; + dep_atom->post_dep = katom; + } + + list_del(&katom->dep_item[i]); + kbase_jd_katom_dep_clear(&katom->dep[i]); + } + } + } else { + dev_dbg(kbdev->dev, + "Deps of atom %p (s:%d) could not be represented\n", + (void *)katom, js); + } + + return ret; +} + +void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority) +{ + struct kbase_device *kbdev = kctx->kbdev; + int js; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* Move kctx to the pullable/upullable list as per the new priority */ + if (new_priority != kctx->priority) { + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kctx->slots_pullable & (1 << js)) + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js][new_priority]); + else + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_unpullable[js][new_priority]); + } + + kctx->priority = new_priority; + } +} + +void kbase_js_update_ctx_priority(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int new_priority = KBASE_JS_ATOM_SCHED_PRIO_LOW; + int prio; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->js_ctx_scheduling_mode == KBASE_JS_SYSTEM_PRIORITY_MODE) { + /* Determine the new priority for context, as per the priority + * of currently in-use atoms. + */ + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (kctx->atoms_count[prio]) { + new_priority = prio; + break; + } + } + } + + kbase_js_set_ctx_priority(kctx, new_priority); +} + +/** + * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler + * @start_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return -EINVAL; + + if (start_katom->core_req & BASE_JD_REQ_END_RENDERPASS) + return -EINVAL; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (rp->state != KBASE_JD_RP_COMPLETE) + return -EINVAL; + + dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + (void *)start_katom, start_katom->renderpass_id); + + /* The following members are read when updating the job slot + * ringbuffer/fifo therefore they require additional locking. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = KBASE_JD_RP_START; + rp->start_katom = start_katom; + rp->end_katom = NULL; + INIT_LIST_HEAD(&rp->oom_reg_list); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} + +/** + * js_add_end_rp() - Add an atom that ends a renderpass to the job scheduler + * @end_katom: Pointer to the atom to be added. + * Return: 0 if successful or a negative value on failure. + */ +static int js_add_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_jd_renderpass *rp; + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return -EINVAL; + + if (end_katom->core_req & BASE_JD_REQ_START_RENDERPASS) + return -EINVAL; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (rp->state == KBASE_JD_RP_COMPLETE) + return -EINVAL; + + if (rp->end_katom == NULL) { + /* We can't be in a retry state until the fragment job chain + * has completed. + */ + unsigned long flags; + + WARN_ON(rp->state == KBASE_JD_RP_RETRY); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_PEND_OOM); + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->end_katom = end_katom; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else + WARN_ON(rp->end_katom != end_katom); + + return 0; +} + +bool kbasep_js_add_job(struct kbase_context *kctx, + struct kbase_jd_atom *atom) +{ + unsigned long flags; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + int err = 0; + + bool enqueue_required = false; + bool timer_sync = false; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + lockdep_assert_held(&kctx->jctx.lock); + + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + if (atom->core_req & BASE_JD_REQ_START_RENDERPASS) + err = js_add_start_rp(atom); + else if (atom->core_req & BASE_JD_REQ_END_RENDERPASS) + err = js_add_end_rp(atom); + + if (err < 0) { + atom->event_code = BASE_JD_EVENT_JOB_INVALID; + atom->status = KBASE_JD_ATOM_STATE_COMPLETED; + goto out_unlock; + } + + /* + * Begin Runpool transaction + */ + mutex_lock(&js_devdata->runpool_mutex); + + /* Refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); + ++(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + /* Lock for state available during IRQ */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (++kctx->atoms_count[atom->sched_priority] == 1) + kbase_js_update_ctx_priority(kctx); + + if (!kbase_js_dep_validate(kctx, atom)) { + /* Dependencies could not be represented */ + --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + /* Setting atom status back to queued as it still has unresolved + * dependencies */ + atom->status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); + + /* Undo the count, as the atom will get added again later but + * leave the context priority adjusted or boosted, in case if + * this was the first higher priority atom received for this + * context. + * This will prevent the scenario of priority inversion, where + * another context having medium priority atoms keeps getting + * scheduled over this context, which is having both lower and + * higher priority atoms, but higher priority atoms are blocked + * due to dependency on lower priority atoms. With priority + * boost the high priority atom will get to run at earliest. + */ + kctx->atoms_count[atom->sched_priority]--; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + + goto out_unlock; + } + + enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, + kbase_ktrace_get_ctx_refcnt(kctx)); + + /* Context Attribute Refcounting */ + kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); + + if (enqueue_required) { + if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, atom->slot_nr); + else + timer_sync = kbase_js_ctx_list_add_unpullable_nolock( + kbdev, kctx, atom->slot_nr); + } + /* If this context is active and the atom is the first on its slot, + * kick the job manager to attempt to fast-start the atom */ + if (enqueue_required && kctx == + kbdev->hwaccess.active_kctx[atom->slot_nr]) + kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + /* End runpool transaction */ + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* A job got added while/after kbase_job_zap_context() + * was called on a non-scheduled context. Kill that job + * by killing the context. */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, + false); + } else if (js_kctx_info->ctx.nr_jobs == 1) { + /* Handle Refcount going from 0 to 1: schedule the + * context on the Queue */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + + /* Queue was updated - caller must try to + * schedule the head context */ + WARN_ON(!enqueue_required); + } + } +out_unlock: + dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + kctx, enqueue_required ? "" : "not "); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_unlock(&js_devdata->queue_mutex); + + return enqueue_required; +} + +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom) +{ + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, + kbase_ktrace_get_ctx_refcnt(kctx)); + + /* De-refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); + --(js_kctx_info->ctx.nr_jobs); + dev_dbg(kbdev->dev, + "Remove atom %p from kctx %p; now %d in ctx\n", + (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (--kctx->atoms_count[atom->sched_priority] == 0) + kbase_js_update_ctx_priority(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + unsigned long flags; + struct kbasep_js_atom_retained_state katom_retained_state; + bool attr_state_changed; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + kbasep_js_remove_job(kbdev, kctx, katom); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* The atom has 'finished' (will not be re-run), so no need to call + * kbasep_js_has_atom_finished(). + * + * This is because it returns false for soft-stopped atoms, but we + * want to override that, because we're cancelling an atom regardless of + * whether it was soft-stopped or not */ + attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, + &katom_retained_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return attr_state_changed; +} + +/** + * kbasep_js_run_jobs_after_ctx_and_atom_release - Try running more jobs after + * releasing a context and/or atom + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom_retained_state: Retained state from the atom + * @runpool_ctx_attr_change: True if the runpool context attributes have changed + * + * This collates a set of actions that must happen whilst hwaccess_lock is held. + * + * This includes running more jobs when: + * - The previously released kctx caused a ctx attribute change, + * - The released atom caused a ctx attribute change, + * - Slots were previously blocked due to affinity restrictions, + * - Submission during IRQ handling failed. + * + * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were + * changed. The caller should try scheduling all contexts + */ +static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state, + bool runpool_ctx_attr_change) +{ + struct kbasep_js_device_data *js_devdata; + kbasep_js_release_result result = 0; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom_retained_state != NULL); + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { + /* A change in runpool ctx attributes might mean we can + * run more jobs than before */ + result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + + KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, + kctx, NULL, 0u, 0); + } + return result; +} + +/** + * kbasep_js_runpool_release_ctx_internal - Internal function to release the reference + * on a ctx and an atom's "retained state", only + * taking the runpool and as transaction mutexes + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom_retained_state: Retained state from the atom + * + * This also starts more jobs running in the case of an ctx-attribute state change + * + * This does none of the followup actions for scheduling: + * - It does not schedule in a new context + * - It does not requeue or handle dying contexts + * + * For those tasks, just call kbasep_js_runpool_release_ctx() instead + * + * Has following requirements + * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr + * - Context has a non-zero refcount + * - Caller holds js_kctx_info->ctx.jsctx_mutex + * - Caller holds js_devdata->runpool_mutex + * + * Return: A bitpattern, containing KBASEP_JS_RELEASE_RESULT_* flags, indicating + * the result of releasing a context that whether the caller should try + * scheduling a new context or should try scheduling all contexts. + */ +static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + kbasep_js_release_result release_result = 0u; + bool runpool_ctx_attr_change = false; + int kctx_as_nr; + int new_ref_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + kctx_as_nr = kctx->as_nr; + KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + + /* + * Transaction begins on AS and runpool_irq + * + * Assert about out calling contract + */ + mutex_lock(&kbdev->pm.lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); + KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0); + + /* Update refcount */ + kbase_ctx_sched_release_ctx(kctx); + new_ref_count = atomic_read(&kctx->refcount); + + /* Release the atom if it finished (i.e. wasn't soft-stopped) */ + if (kbasep_js_has_atom_finished(katom_retained_state)) + runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( + kbdev, kctx, katom_retained_state); + + if (new_ref_count == 2 && kbase_ctx_flag(kctx, KCTX_PRIVILEGED) && +#ifdef CONFIG_MALI_ARBITER_SUPPORT + !kbase_pm_is_gpu_lost(kbdev) && +#endif + !kbase_pm_is_suspending(kbdev)) { + /* Context is kept scheduled into an address space even when + * there are no jobs, in this case we have to handle the + * situation where all jobs have been evicted from the GPU and + * submission is disabled. + * + * At this point we re-enable submission to allow further jobs + * to be executed + */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + } + + /* Make a set of checks to see if the context should be scheduled out. + * Note that there'll always be at least 1 reference to the context + * which was previously acquired by kbasep_js_schedule_ctx(). */ + if (new_ref_count == 1 && + (!kbasep_js_is_submit_allowed(js_devdata, kctx) || +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_pm_is_gpu_lost(kbdev) || +#endif + kbase_pm_is_suspending(kbdev))) { + int num_slots = kbdev->gpu_props.num_job_slots; + int slot; + + /* Last reference, and we've been told to remove this context + * from the Run Pool */ + dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", + kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, + kbasep_js_is_submit_allowed(js_devdata, kctx)); + + KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); + + kbase_backend_release_ctx_irq(kbdev, kctx); + + for (slot = 0; slot < num_slots; slot++) { + if (kbdev->hwaccess.active_kctx[slot] == kctx) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)kctx, slot); + kbdev->hwaccess.active_kctx[slot] = NULL; + } + } + + /* Ctx Attribute handling + * + * Releasing atoms attributes must either happen before this, or + * after the KCTX_SHEDULED flag is changed, otherwise we + * double-decount the attributes + */ + runpool_ctx_attr_change |= + kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + + /* Releasing the context and katom retained state can allow + * more jobs to run */ + release_result |= + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, + kctx, katom_retained_state, + runpool_ctx_attr_change); + + /* + * Transaction ends on AS and runpool_irq: + * + * By this point, the AS-related data is now clear and ready + * for re-use. + * + * Since releases only occur once for each previous successful + * retain, and no more retains are allowed on this context, no + * other thread will be operating in this + * code whilst we are + */ + + /* Recalculate pullable status for all slots */ + for (slot = 0; slot < num_slots; slot++) { + if (kbase_js_ctx_pullable(kctx, slot, false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, + kctx, slot); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_backend_release_ctx_noirq(kbdev, kctx); + + mutex_unlock(&kbdev->pm.lock); + + /* Note: Don't reuse kctx_as_nr now */ + + /* Synchronize with any timers */ + kbase_backend_ctx_count_changed(kbdev); + + /* update book-keeping info */ + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + /* Signal any waiter that the context is not scheduled, so is + * safe for termination - once the jsctx_mutex is also dropped, + * and jobs have finished. */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Queue an action to occur after we've dropped the lock */ + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | + KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + } else { + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, + katom_retained_state, runpool_ctx_attr_change); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->pm.lock); + } + + return release_result; +} + +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_atom_retained_state katom_retained_state; + + /* Setup a dummy katom_retained_state */ + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + &katom_retained_state); +} + +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* This is called if and only if you've you've detached the context from + * the Runpool Queue, and not added it back to the Runpool + */ + KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* Dying: don't requeue, but kill all jobs on the context. This + * happens asynchronously */ + dev_dbg(kbdev->dev, + "JS: ** Killing Context %p on RunPool Remove **", kctx); + kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); + } +} + +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) + kbase_js_sched_all(kbdev); +} + +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_atom_retained_state katom_retained_state; + + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, + &katom_retained_state); +} + +/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into + * kbase_js_sched_all() */ +static void kbasep_js_runpool_release_ctx_no_schedule( + struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + struct kbasep_js_atom_retained_state katom_retained_state_struct; + struct kbasep_js_atom_retained_state *katom_retained_state = + &katom_retained_state_struct; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + kbasep_js_atom_retained_state_init_invalid(katom_retained_state); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* NOTE: could return release_result if the caller would like to know + * whether it should schedule a new context, but currently no callers do + */ +} + +void kbase_js_set_timeouts(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_backend_timeouts_changed(kbdev); +} + +static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + bool kctx_suspended = false; + int as_nr; + + dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + /* Pick available address space for this context */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + as_nr = kbase_ctx_sched_retain_ctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + if (as_nr == KBASEP_AS_NR_INVALID) { + as_nr = kbase_backend_find_and_release_free_address_space( + kbdev, kctx); + if (as_nr != KBASEP_AS_NR_INVALID) { + /* Attempt to retain the context again, this should + * succeed */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + as_nr = kbase_ctx_sched_retain_ctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + WARN_ON(as_nr == KBASEP_AS_NR_INVALID); + } + } + if (as_nr == KBASEP_AS_NR_INVALID) + return false; /* No address spaces currently available */ + + /* + * Atomic transaction on the Context and Run Pool begins + */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Check to see if context is dying due to kbase_job_zap_context() */ + if (kbase_ctx_flag(kctx, KCTX_DYING)) { + /* Roll back the transaction so far and return */ + kbase_ctx_sched_release_ctx(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return false; + } + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, + 0u, + kbase_ktrace_get_ctx_refcnt(kctx)); + + kbase_ctx_flag_set(kctx, KCTX_SCHEDULED); + + /* Assign context to previously chosen address space */ + if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { + /* Roll back the transaction so far and return */ + kbase_ctx_sched_release_ctx(kctx); + kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return false; + } + + kbdev->hwaccess.active_kctx[js] = kctx; + + KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); + + /* Cause any future waiter-on-termination to wait until the context is + * descheduled */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Re-check for suspending: a suspend could've occurred, and all the + * contexts could've been removed from the runpool before we took this + * lock. In this case, we don't want to allow this context to run jobs, + * we just want it out immediately. + * + * The DMB required to read the suspend flag was issued recently as part + * of the hwaccess_lock locking. If a suspend occurs *after* that lock + * was taken (i.e. this condition doesn't execute), then the + * kbasep_js_suspend() code will cleanup this context instead (by virtue + * of it being called strictly after the suspend flag is set, and will + * wait for this lock to drop) */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { +#else + if (kbase_pm_is_suspending(kbdev)) { +#endif + /* Cause it to leave at some later point */ + bool retained; + + retained = kbase_ctx_sched_inc_refcount_nolock(kctx); + KBASE_DEBUG_ASSERT(retained); + + kbasep_js_clear_submit_allowed(js_devdata, kctx); + kctx_suspended = true; + } + + kbase_ctx_flag_clear(kctx, KCTX_PULLED_SINCE_ACTIVE_JS0 << js); + + /* Transaction complete */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* Synchronize with any timers */ + kbase_backend_ctx_count_changed(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* Note: after this point, the context could potentially get scheduled + * out immediately */ + + if (kctx_suspended) { + /* Finishing forcing out the context due to a suspend. Use a + * variant of kbasep_js_runpool_release_ctx() that doesn't + * schedule a new context, to prevent a risk of recursion back + * into this function */ + kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); + return false; + } + return true; +} + +static bool kbase_js_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_backend_use_ctx_sched(kbdev, kctx, js)) { + + dev_dbg(kbdev->dev, + "kctx %p already has ASID - mark as active (s:%d)\n", + (void *)kctx, js); + + if (kbdev->hwaccess.active_kctx[js] != kctx) { + kbdev->hwaccess.active_kctx[js] = kctx; + kbase_ctx_flag_clear(kctx, + KCTX_PULLED_SINCE_ACTIVE_JS0 << js); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return true; /* Context already scheduled */ + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return kbasep_js_schedule_ctx(kbdev, kctx, js); +} + +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + bool is_scheduled; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* This should only happen in response to a system call + * from a user-space thread. + * In a non-arbitrated environment this can never happen + * whilst suspending. + * + * In an arbitrated environment, user-space threads can run + * while we are suspended (for example GPU not available + * to this VM), however in that case we will block on + * the wait event for KCTX_SCHEDULED, since no context + * can be scheduled until we have the GPU again. + */ + if (kbdev->arb.arb_if == NULL) + if (WARN_ON(kbase_pm_is_suspending(kbdev))) + return; +#else + /* This should only happen in response to a system call + * from a user-space thread. + * In a non-arbitrated environment this can never happen + * whilst suspending. + */ + if (WARN_ON(kbase_pm_is_suspending(kbdev))) + return; +#endif + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* Mark the context as privileged */ + kbase_ctx_flag_set(kctx, KCTX_PRIVILEGED); + + is_scheduled = kbase_ctx_flag(kctx, KCTX_SCHEDULED); + if (!is_scheduled) { + /* Add the context to the pullable list */ + if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) + kbase_js_sync_timers(kbdev); + + /* Fast-starting requires the jsctx_mutex to be dropped, + * because it works on multiple ctxs */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + /* Try to schedule the context in */ + kbase_js_sched_all(kbdev); + + /* Wait for the context to be scheduled in */ + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, + kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + } else { + /* Already scheduled in - We need to retain it to keep the + * corresponding address space */ + WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + } +} +KBASE_EXPORT_TEST_API(kbasep_js_schedule_privileged_ctx); + +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + + /* We don't need to use the address space anymore */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbase_ctx_flag_clear(kctx, KCTX_PRIVILEGED); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Release the context - it will be scheduled out */ + kbasep_js_runpool_release_ctx(kbdev, kctx); + + kbase_js_sched_all(kbdev); +} +KBASE_EXPORT_TEST_API(kbasep_js_release_privileged_ctx); + +void kbasep_js_suspend(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + int i; + u16 retained = 0u; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Prevent all contexts from submitting */ + js_devdata->runpool_irq.submit_allowed = 0; + + /* Retain each of the contexts, so we can cause it to leave even if it + * had no refcount to begin with */ + for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { + struct kbase_context *kctx = kbdev->as_to_kctx[i]; + + retained = retained << 1; + + if (kctx && !(kbdev->as_free & (1u << i))) { + kbase_ctx_sched_retain_ctx_refcount(kctx); + retained |= 1u; + /* This loop will not have an effect on the privileged + * contexts as they would have an extra ref count + * compared to the normal contexts, so they will hold + * on to their address spaces. MMU will re-enabled for + * them on resume. + */ + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* De-ref the previous retain to ensure each context gets pulled out + * sometime later. */ + for (i = 0; + i < BASE_MAX_NR_AS; + ++i, retained = retained >> 1) { + struct kbase_context *kctx = kbdev->as_to_kctx[i]; + + if (retained & 1u) + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + + /* Caller must wait for all Power Manager active references to be + * dropped */ +} + +void kbasep_js_resume(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + int js, prio; + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + mutex_lock(&js_devdata->queue_mutex); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_context *kctx, *n; + unsigned long flags; + +#ifndef CONFIG_MALI_ARBITER_SUPPORT + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + struct kbasep_js_kctx_info *js_kctx_info; + bool timer_sync = false; + + /* Drop lock so we can take kctx mutexes */ + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Take lock before accessing list again */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + bool timer_sync = false; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js][prio], + jctx.sched_info.ctx.ctx_list_entry[js]) { + + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED) && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (timer_sync) { + mutex_lock(&js_devdata->runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + } +#endif + } + } + mutex_unlock(&js_devdata->queue_mutex); + + /* Restart atom processing */ + kbase_js_sched_all(kbdev); + + /* JS Resume complete */ +} + +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if ((katom->core_req & BASE_JD_REQ_FS) && + (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | + BASE_JD_REQ_T))) + return false; + + if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) && + (katom->jobslot >= BASE_JM_MAX_NR_SLOTS)) + return false; + + return true; +} + +static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if (katom->core_req & BASE_JD_REQ_JOB_SLOT) + return katom->jobslot; + + if (katom->core_req & BASE_JD_REQ_FS) + return 0; + + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + if (katom->device_nr == 1 && + kbdev->gpu_props.num_core_groups == 2) + return 2; + } + + return 1; +} + +bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + bool enqueue_required, add_required = true; + + katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->jctx.lock); + + /* If slot will transition from unpullable to pullable then add to + * pullable list */ + if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { + enqueue_required = true; + } else { + enqueue_required = false; + } + + if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || + (katom->pre_dep && (katom->pre_dep->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + + dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + (void *)katom, js); + + list_add_tail(&katom->queue, &queue->x_dep_head); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + if (kbase_js_atom_blocked_on_x_dep(katom)) { + enqueue_required = false; + add_required = false; + } + } else { + dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + (void *)katom); + } + + if (add_required) { + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + + /* Add atom to ring buffer. */ + jsctx_tree_add(kctx, katom); + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } + + dev_dbg(kctx->kbdev->dev, + "Enqueue of kctx %p is %srequired to submit atom %p\n", + kctx, enqueue_required ? "" : "not ", katom); + + return enqueue_required; +} + +/** + * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the + * runnable_tree, ready for execution + * @katom: Atom to submit + * + * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, + * but is still present in the x_dep list. If @katom has a same-slot dependent + * atom then that atom (and any dependents) will also be moved. + */ +static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) +{ + struct kbase_context *const kctx = katom->kctx; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + while (katom) { + WARN_ON(!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); + + if (!kbase_js_atom_blocked_on_x_dep(katom)) { + dev_dbg(kctx->kbdev->dev, + "Del atom %p from X_DEP list in js_move_to_tree\n", + (void *)katom); + + list_del(&katom->queue); + katom->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; + /* For incremental rendering, an end-of-renderpass atom + * may have had its dependency on start-of-renderpass + * ignored and may therefore already be in the tree. + */ + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + jsctx_tree_add(kctx, katom); + katom->atom_flags |= + KBASE_KATOM_FLAG_JSCTX_IN_TREE; + } + } else { + dev_dbg(kctx->kbdev->dev, + "Atom %p blocked on x-dep in js_move_to_tree\n", + (void *)katom); + break; + } + + katom = katom->post_dep; + } +} + + +/** + * kbase_js_evict_deps - Evict dependencies of a failed atom. + * @kctx: Context pointer + * @katom: Pointer to the atom that has failed. + * @js: The job slot the katom was run on. + * @prio: Priority of the katom. + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold the HW access lock + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, int prio) +{ + struct kbase_jd_atom *x_dep = katom->x_post_dep; + struct kbase_jd_atom *next_katom = katom->post_dep; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (next_katom) { + KBASE_DEBUG_ASSERT(next_katom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED); + next_katom->will_fail_event_code = katom->event_code; + + } + + /* Has cross slot depenency. */ + if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | + KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { + /* Remove dependency.*/ + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + trace_sysgraph(SGR_DEP_RES, kctx->id, + kbase_jd_atom_id(kctx, x_dep)); + + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + + /* Fail if it had a data dependency. */ + if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { + x_dep->will_fail_event_code = katom->event_code; + } + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) + kbase_js_move_to_tree(x_dep); + } +} + +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) +{ + struct kbase_jd_atom *katom; + struct kbasep_js_device_data *js_devdata; + struct kbase_device *kbdev; + int pulled; + + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + (void *)kctx, js); + + js_devdata = &kbdev->js_data; + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + (void *)kctx); + return NULL; + } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) +#else + if (kbase_pm_is_suspending(kbdev)) +#endif + return NULL; + + katom = jsctx_rb_peek(kctx, js); + if (!katom) { + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + (void *)kctx, js); + return NULL; + } + if (kctx->blocked_js[js][katom->sched_priority]) { + dev_dbg(kbdev->dev, + "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + (void *)kctx, katom->sched_priority, js); + return NULL; + } + if (atomic_read(&katom->blocked)) { + dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + (void *)katom); + return NULL; + } + + /* Due to ordering restrictions when unpulling atoms on failure, we do + * not allow multiple runs of fail-dep atoms from the same context to be + * present on the same slot */ + if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { + struct kbase_jd_atom *prev_atom = + kbase_backend_inspect_tail(kbdev, js); + + if (prev_atom && prev_atom->kctx != kctx) + return NULL; + } + + if (kbase_js_atom_blocked_on_x_dep(katom)) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || + katom->x_pre_dep->will_fail_event_code) { + dev_dbg(kbdev->dev, + "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + (void *)katom->x_pre_dep); + return NULL; + } + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kbdev, js)) { + dev_dbg(kbdev->dev, + "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + (void *)katom, js); + return NULL; + } + } + + kbase_ctx_flag_set(kctx, KCTX_PULLED); + kbase_ctx_flag_set(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << js)); + + pulled = atomic_inc_return(&kctx->atoms_pulled); + if (pulled == 1 && !kctx->slots_pullable) { + WARN_ON(kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_set(kctx, KCTX_RUNNABLE_REF); + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); + kctx->atoms_pulled_slot_pri[katom->slot_nr][katom->sched_priority]++; + jsctx_rb_pull(kctx, katom); + + kbase_ctx_sched_retain_ctx_refcount(kctx); + + katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; + + katom->ticks = 0; + + dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + (void *)katom, (void *)kctx, js); + + return katom; +} + +/** + * js_return_of_start_rp() - Handle soft-stop of an atom that starts a + * renderpass + * @start_katom: Pointer to the start-of-renderpass atom that was soft-stopped + * + * This function is called to switch to incremental rendering if the tiler job + * chain at the start of a renderpass has used too much memory. It prevents the + * tiler job being pulled for execution in the job scheduler again until the + * next phase of incremental rendering is complete. + * + * If the end-of-renderpass atom is already in the job scheduler (because a + * previous attempt at tiling used too much memory during the same renderpass) + * then it is unblocked; otherwise, it is run by handing it to the scheduler. + */ +static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) +{ + struct kbase_context *const kctx = start_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *end_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return start atom %p in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return; + + /* The tiler job might have been soft-stopped for some reason other + * than running out of memory. + */ + if (rp->state == KBASE_JD_RP_START || rp->state == KBASE_JD_RP_RETRY) { + dev_dbg(kctx->kbdev->dev, + "JS return isn't OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + return; + } + + dev_dbg(kctx->kbdev->dev, + "JS return confirm OOM in state %d of RP %d\n", + (int)rp->state, start_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_PEND_OOM && + rp->state != KBASE_JD_RP_RETRY_PEND_OOM)) + return; + + /* Prevent the tiler job being pulled for execution in the + * job scheduler again. + */ + dev_dbg(kbdev->dev, "Blocking start atom %p\n", + (void *)start_katom); + atomic_inc(&start_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + rp->state = (rp->state == KBASE_JD_RP_PEND_OOM) ? + KBASE_JD_RP_OOM : KBASE_JD_RP_RETRY_OOM; + + /* Was the fragment job chain submitted to kbase yet? */ + end_katom = rp->end_katom; + if (end_katom) { + dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + (void *)end_katom); + + if (rp->state == KBASE_JD_RP_RETRY_OOM) { + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + WARN_ON(!(end_katom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + WARN_ON(end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + end_katom->slot_nr); + + /* Expect the fragment job chain to be scheduled without + * further action because this function is called when + * returning an atom to the job scheduler ringbuffer. + */ + end_katom = NULL; + } else { + WARN_ON(end_katom->status != + KBASE_JD_ATOM_STATE_QUEUED && + end_katom->status != KBASE_JD_ATOM_STATE_IN_JS); + } + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (end_katom) + kbase_jd_dep_clear_locked(end_katom); +} + +/** + * js_return_of_end_rp() - Handle completion of an atom that ends a renderpass + * @end_katom: Pointer to the end-of-renderpass atom that was completed + * + * This function is called to continue incremental rendering if the tiler job + * chain at the start of a renderpass used too much memory. It resets the + * mechanism for detecting excessive memory usage then allows the soft-stopped + * tiler job chain to be pulled for execution again. + * + * The start-of-renderpass atom must already been submitted to kbase. + */ +static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + struct kbase_jd_atom *start_katom; + unsigned long flags; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kctx->kbdev->dev, + "JS return end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM)) + return; + + /* Reduce the number of mapped pages in the memory regions that + * triggered out-of-memory last time so that we can detect excessive + * memory usage again. + */ + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, + "Reset backing to %zu pages for region %p\n", + reg->threshold_pages, (void *)reg); + + if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) + kbase_mem_shrink(kctx, reg, reg->threshold_pages); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + dev_dbg(kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + rp->state = KBASE_JD_RP_RETRY; + dev_dbg(kbdev->dev, "Changed state to %d for retry\n", rp->state); + + /* Allow the start of the renderpass to be pulled for execution again + * to begin/continue incremental rendering. + */ + start_katom = rp->start_katom; + if (!WARN_ON(!start_katom)) { + dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + (void *)start_katom); + atomic_dec(&start_katom->blocked); + (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, + start_katom->slot_nr); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static void js_return_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + struct kbasep_js_atom_retained_state retained_state; + int js = katom->slot_nr; + int prio = katom->sched_priority; + bool timer_sync = false; + bool context_idle = false; + unsigned long flags; + base_jd_core_req core_req = katom->core_req; + + dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + __func__, (void *)katom, katom->event_code); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom); + + kbase_backend_complete_wq(kbdev, katom); + + kbasep_js_atom_retained_state_copy(&retained_state, katom); + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + atomic_dec(&kctx->atoms_pulled); + atomic_dec(&kctx->atoms_pulled_slot[js]); + + if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) + atomic_dec(&katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kctx->atoms_pulled_slot_pri[js][katom->sched_priority]--; + + if (!atomic_read(&kctx->atoms_pulled_slot[js]) && + jsctx_rb_none_to_pull(kctx, js)) + timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); + + /* If this slot has been blocked due to soft-stopped atoms, and all + * atoms have now been processed, then unblock the slot */ + if (!kctx->atoms_pulled_slot_pri[js][prio] && + kctx->blocked_js[js][prio]) { + kctx->blocked_js[js][prio] = false; + + /* Only mark the slot as pullable if the context is not idle - + * that case is handled below */ + if (atomic_read(&kctx->atoms_pulled) && + kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + + if (!atomic_read(&kctx->atoms_pulled)) { + dev_dbg(kbdev->dev, + "No atoms currently pulled from context %p\n", + (void *)kctx); + + if (!kctx->slots_pullable) { + dev_dbg(kbdev->dev, + "Context %p %s counted as runnable\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? + "is" : "isn't"); + + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; + } + + if (kctx->as_nr != KBASEP_AS_NR_INVALID && + !kbase_ctx_flag(kctx, KCTX_DYING)) { + int num_slots = kbdev->gpu_props.num_job_slots; + int slot; + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + kbasep_js_set_submit_allowed(js_devdata, kctx); + + for (slot = 0; slot < num_slots; slot++) { + if (kbase_js_ctx_pullable(kctx, slot, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, slot); + } + } + + kbase_jm_idle_ctx(kbdev, kctx); + + context_idle = true; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (context_idle) { + dev_dbg(kbdev->dev, + "Context %p %s counted as active\n", + (void *)kctx, + kbase_ctx_flag(kctx, KCTX_ACTIVE) ? + "is" : "isn't"); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kbase_pm_context_idle(kbdev); + } + + if (timer_sync) + kbase_js_sync_timers(kbdev); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) { + mutex_lock(&kctx->jctx.lock); + js_return_of_start_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } else if (katom->event_code == BASE_JD_EVENT_END_RP_DONE) { + mutex_lock(&kctx->jctx.lock); + js_return_of_end_rp(katom); + mutex_unlock(&kctx->jctx.lock); + } + + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + dev_dbg(kbdev->dev, "JS: retained state %s finished", + kbasep_js_has_atom_finished(&retained_state) ? + "has" : "hasn't"); + + WARN_ON(kbasep_js_has_atom_finished(&retained_state)); + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, + &retained_state); + + kbase_js_sched_all(kbdev); + + kbase_backend_complete_wq_post_sched(kbdev, core_req); + + dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + __func__, (void *)katom); +} + +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + (void *)katom, (void *)kctx); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + jsctx_rb_unpull(kctx, katom); + + WARN_ON(work_pending(&katom->work)); + + /* Block re-submission until workqueue has run */ + atomic_inc(&katom->blocked); + + kbase_job_check_leave_disjoint(kctx->kbdev, katom); + + INIT_WORK(&katom->work, js_return_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +/** + * js_complete_start_rp() - Handle completion of atom that starts a renderpass + * @kctx: Context pointer + * @start_katom: Pointer to the atom that completed + * + * Put any references to virtual memory regions that might have been added by + * kbase_job_slot_softstop_start_rp() because the tiler job chain completed + * despite any pending soft-stop request. + * + * If the atom that just completed was soft-stopped during a previous attempt to + * run it then there should be a blocked end-of-renderpass atom waiting for it, + * which we must unblock to process the output of the tiler job chain. + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool js_complete_start_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const start_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + bool timer_sync = false; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(start_katom->core_req & BASE_JD_REQ_START_RENDERPASS))) + return false; + + compiletime_assert((1ull << (sizeof(start_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[start_katom->renderpass_id]; + + if (WARN_ON(rp->start_katom != start_katom)) + return false; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p is done in state %d of RP %d\n", + (void *)start_katom, (int)rp->state, + start_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return false; + + if (rp->state == KBASE_JD_RP_PEND_OOM || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, + "Start atom %p completed before soft-stop\n", + (void *)start_katom); + + kbase_gpu_vm_lock(kctx); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + while (!list_empty(&rp->oom_reg_list)) { + struct kbase_va_region *reg = + list_first_entry(&rp->oom_reg_list, + struct kbase_va_region, link); + + WARN_ON(reg->flags & KBASE_REG_VA_FREED); + dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + (void *)reg); + list_del_init(®->link); + kbase_va_region_alloc_put(kctx, reg); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_gpu_vm_unlock(kctx); + } else { + dev_dbg(kctx->kbdev->dev, + "Start atom %p did not exceed memory threshold\n", + (void *)start_katom); + + WARN_ON(rp->state != KBASE_JD_RP_START && + rp->state != KBASE_JD_RP_RETRY); + } + + if (rp->state == KBASE_JD_RP_RETRY || + rp->state == KBASE_JD_RP_RETRY_PEND_OOM) { + struct kbase_jd_atom *const end_katom = rp->end_katom; + + if (!WARN_ON(!end_katom)) { + unsigned long flags; + + /* Allow the end of the renderpass to be pulled for + * execution again to continue incremental rendering. + */ + dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + (void *)end_katom); + atomic_dec(&end_katom->blocked); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + timer_sync = kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, end_katom->slot_nr); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + } + + return timer_sync; +} + +/** + * js_complete_end_rp() - Handle final completion of atom that ends a renderpass + * @kctx: Context pointer + * @end_katom: Pointer to the atom that completed for the last time + * + * This function must only be called if the renderpass actually completed + * without the tiler job chain at the start using too much memory; otherwise + * completion of the end-of-renderpass atom is handled similarly to a soft-stop. + */ +static void js_complete_end_rp(struct kbase_context *kctx, + struct kbase_jd_atom *const end_katom) +{ + struct kbase_device *const kbdev = kctx->kbdev; + unsigned long flags; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->jctx.lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return; + + dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || + WARN_ON(rp->state == KBASE_JD_RP_OOM) || + WARN_ON(rp->state == KBASE_JD_RP_RETRY_OOM)) + return; + + /* Rendering completed without running out of memory. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + WARN_ON(!list_empty(&rp->oom_reg_list)); + rp->state = KBASE_JD_RP_COMPLETE; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Renderpass %d is complete\n", + end_katom->renderpass_id); +} + +bool kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + struct kbase_device *kbdev; + unsigned long flags; + bool timer_sync = false; + int atom_slot; + bool context_idle = false; + int prio = katom->sched_priority; + + kbdev = kctx->kbdev; + atom_slot = katom->slot_nr; + + dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + __func__, (void *)katom, atom_slot); + + /* Update the incremental rendering state machine. + */ + if (katom->core_req & BASE_JD_REQ_START_RENDERPASS) + timer_sync |= js_complete_start_rp(kctx, katom); + else if (katom->core_req & BASE_JD_REQ_END_RENDERPASS) + js_complete_end_rp(kctx, katom); + + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + (void *)katom); + + context_idle = !atomic_dec_return(&kctx->atoms_pulled); + atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); + kctx->atoms_pulled_slot_pri[atom_slot][prio]--; + + if (!atomic_read(&kctx->atoms_pulled) && + !kctx->slots_pullable) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF)); + kbase_ctx_flag_clear(kctx, KCTX_RUNNABLE_REF); + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + timer_sync = true; + } + + /* If this slot has been blocked due to soft-stopped atoms, and + * all atoms have now been processed, then unblock the slot */ + if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] + && kctx->blocked_js[atom_slot][prio]) { + dev_dbg(kbdev->dev, + "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + (void *)kctx, atom_slot, prio); + + kctx->blocked_js[atom_slot][prio] = false; + if (kbase_js_ctx_pullable(kctx, atom_slot, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, atom_slot); + } + } + WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); + + if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && + jsctx_rb_none_to_pull(kctx, atom_slot)) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) + timer_sync |= kbase_js_ctx_list_remove_nolock( + kctx->kbdev, kctx, atom_slot); + } + + /* + * If submission is disabled on this context (most likely due to an + * atom failure) and there are now no atoms left in the system then + * re-enable submission so that context can be scheduled again. + */ + if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && + !atomic_read(&kctx->atoms_pulled) && + !kbase_ctx_flag(kctx, KCTX_DYING)) { + int js; + + kbasep_js_set_submit_allowed(js_devdata, kctx); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + } else if (katom->x_post_dep && + kbasep_js_is_submit_allowed(js_devdata, kctx)) { + int js; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kbdev, kctx, js); + } + } + + /* Mark context as inactive. The pm reference will be dropped later in + * jd_done_worker(). + */ + if (context_idle) { + dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + (void *)kctx); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + + dev_dbg(kbdev->dev, "Leaving %s\n", __func__); + return context_idle; +} + +/** + * js_end_rp_is_complete() - Check whether an atom that ends a renderpass has + * completed for the last time. + * + * @end_katom: Pointer to the atom that completed on the hardware. + * + * An atom that ends a renderpass may be run on the hardware several times + * before notifying userspace or allowing dependent atoms to be executed. + * + * This function is used to decide whether or not to allow end-of-renderpass + * atom completion. It only returns false if the atom at the start of the + * renderpass was soft-stopped because it used too much memory during the most + * recent attempt at tiling. + * + * Return: True if the atom completed for the last time. + */ +static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) +{ + struct kbase_context *const kctx = end_katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (WARN_ON(!(end_katom->core_req & BASE_JD_REQ_END_RENDERPASS))) + return true; + + compiletime_assert((1ull << (sizeof(end_katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; + + if (WARN_ON(rp->end_katom != end_katom)) + return true; + + dev_dbg(kbdev->dev, + "JS complete end atom %p in state %d of RP %d\n", + (void *)end_katom, (int)rp->state, + end_katom->renderpass_id); + + if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE)) + return true; + + /* Failure of end-of-renderpass atoms must not return to the + * start of the renderpass. + */ + if (end_katom->event_code != BASE_JD_EVENT_DONE) + return true; + + if (rp->state != KBASE_JD_RP_OOM && + rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + dev_dbg(kbdev->dev, "Suppressing end atom completion\n"); + return false; +} + +struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp) +{ + struct kbase_device *kbdev; + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_atom *x_dep = katom->x_post_dep; + + kbdev = kctx->kbdev; + dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + (void *)katom, (void *)kctx, (void *)x_dep); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if ((katom->core_req & BASE_JD_REQ_END_RENDERPASS) && + !js_end_rp_is_complete(katom)) { + katom->event_code = BASE_JD_EVENT_END_RP_DONE; + kbase_js_unpull(kctx, katom); + return NULL; + } + + if (katom->will_fail_event_code) + katom->event_code = katom->will_fail_event_code; + + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); + + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_js_evict_deps(kctx, katom, katom->slot_nr, + katom->sched_priority); + } + + KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL, + katom->slot_nr, 0, TL_JS_EVENT_STOP); + + trace_sysgraph_gpu(SGR_COMPLETE, kctx->id, + kbase_jd_atom_id(katom->kctx, katom), katom->slot_nr); + + kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); + + /* Unblock cross dependency if present */ + if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || + !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && + (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)) { + bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false); + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + trace_sysgraph(SGR_DEP_RES, kctx->id, + kbase_jd_atom_id(katom->kctx, x_dep)); + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + (void *)x_dep); + + kbase_js_move_to_tree(x_dep); + + if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, + false)) + kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, + x_dep->slot_nr); + + if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + (void *)x_dep); + return x_dep; + } + } else { + dev_dbg(kbdev->dev, + "No cross-slot dep to unblock for atom %p\n", + (void *)katom); + } + + return NULL; +} + +/** + * kbase_js_atom_blocked_on_x_dep - Decide whether to ignore a cross-slot + * dependency + * @katom: Pointer to an atom in the slot ringbuffer + * + * A cross-slot dependency is ignored if necessary to unblock incremental + * rendering. If the atom at the start of a renderpass used too much memory + * and was soft-stopped then the atom at the end of a renderpass is submitted + * to hardware regardless of its dependency on the start-of-renderpass atom. + * This can happen multiple times for the same pair of atoms. + * + * Return: true to block the atom or false to allow it to be submitted to + * hardware + */ +bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_jd_renderpass *rp; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!(katom->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { + dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + (void *)katom); + return false; + } + + if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { + dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + (void *)katom); + return true; + } + + compiletime_assert((1ull << (sizeof(katom->renderpass_id) * 8)) <= + ARRAY_SIZE(kctx->jctx.renderpasses), + "Should check invalid access to renderpasses"); + + rp = &kctx->jctx.renderpasses[katom->renderpass_id]; + /* We can read a subset of renderpass state without holding + * higher-level locks (but not end_katom, for example). + */ + + WARN_ON(rp->state == KBASE_JD_RP_COMPLETE); + + dev_dbg(kbdev->dev, "End atom has cross-slot dep in state %d\n", + (int)rp->state); + + if (rp->state != KBASE_JD_RP_OOM && rp->state != KBASE_JD_RP_RETRY_OOM) + return true; + + /* Tiler ran out of memory so allow the fragment job chain to run + * if it only depends on the tiler job chain. + */ + if (katom->x_pre_dep != rp->start_katom) { + dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + (void *)katom->x_pre_dep, (void *)rp->start_katom); + return true; + } + + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + (void *)katom->x_pre_dep); + + return false; +} + +void kbase_js_sched(struct kbase_device *kbdev, int js_mask) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_context *last_active[BASE_JM_MAX_NR_SLOTS]; + bool timer_sync = false; + bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; + int js; + + dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + __func__, (void *)kbdev, (unsigned int)js_mask); + + js_devdata = &kbdev->js_data; + + down(&js_devdata->schedule_sem); + mutex_lock(&js_devdata->queue_mutex); + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + last_active[js] = kbdev->hwaccess.active_kctx[js]; + ctx_waiting[js] = false; + } + + while (js_mask) { + js = ffs(js_mask) - 1; + + while (1) { + struct kbase_context *kctx; + unsigned long flags; + bool context_idle = false; + + kctx = kbase_js_ctx_list_pop_head(kbdev, js); + + if (!kctx) { + js_mask &= ~(1 << js); + dev_dbg(kbdev->dev, + "No kctx on pullable list (s:%d)\n", + js); + break; + } + + if (!kbase_ctx_flag(kctx, KCTX_ACTIVE)) { + context_idle = true; + + dev_dbg(kbdev->dev, + "kctx %p is not active (s:%d)\n", + (void *)kctx, js); + + if (kbase_pm_context_active_handle_suspend( + kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_dbg(kbdev->dev, + "Suspend pending (s:%d)\n", js); + /* Suspend pending - return context to + * queue and stop scheduling */ + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (kbase_js_ctx_list_add_pullable_head( + kctx->kbdev, kctx, js)) + kbase_js_sync_timers(kbdev); + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + up(&js_devdata->schedule_sem); + return; + } + kbase_ctx_flag_set(kctx, KCTX_ACTIVE); + } + + if (!kbase_js_use_ctx(kbdev, kctx, js)) { + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + + dev_dbg(kbdev->dev, + "kctx %p cannot be used at this time\n", + kctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_js_ctx_pullable(kctx, js, false) + || kbase_ctx_flag(kctx, KCTX_PRIVILEGED)) + timer_sync |= + kbase_js_ctx_list_add_pullable_head_nolock( + kctx->kbdev, kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, kctx, js); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (context_idle) { + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kbase_pm_context_idle(kbdev); + } + + /* No more jobs can be submitted on this slot */ + js_mask &= ~(1 << js); + break; + } + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbase_ctx_flag_clear(kctx, KCTX_PULLED); + + if (!kbase_jm_kick(kbdev, 1 << js)) { + dev_dbg(kbdev->dev, + "No more jobs can be submitted (s:%d)\n", + js); + js_mask &= ~(1 << js); + } + if (!kbase_ctx_flag(kctx, KCTX_PULLED)) { + bool pullable; + + dev_dbg(kbdev->dev, + "No atoms pulled from kctx %p (s:%d)\n", + (void *)kctx, js); + + pullable = kbase_js_ctx_pullable(kctx, js, + true); + + /* Failed to pull jobs - push to head of list. + * Unless this context is already 'active', in + * which case it's effectively already scheduled + * so push it to the back of the list. */ + if (pullable && kctx == last_active[js] && + kbase_ctx_flag(kctx, + (KCTX_PULLED_SINCE_ACTIVE_JS0 << + js))) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kctx->kbdev, + kctx, js); + else if (pullable) + timer_sync |= + kbase_js_ctx_list_add_pullable_head_nolock( + kctx->kbdev, + kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, + kctx, js); + + /* If this context is not the active context, + * but the active context is pullable on this + * slot, then we need to remove the active + * marker to prevent it from submitting atoms in + * the IRQ handler, which would prevent this + * context from making progress. */ + if (last_active[js] && kctx != last_active[js] + && kbase_js_ctx_pullable( + last_active[js], js, true)) + ctx_waiting[js] = true; + + if (context_idle) { + kbase_jm_idle_ctx(kbdev, kctx); + spin_unlock_irqrestore( + &kbdev->hwaccess_lock, + flags); + WARN_ON(!kbase_ctx_flag(kctx, KCTX_ACTIVE)); + kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); + kbase_pm_context_idle(kbdev); + } else { + spin_unlock_irqrestore( + &kbdev->hwaccess_lock, + flags); + } + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + + js_mask &= ~(1 << js); + break; /* Could not run atoms on this slot */ + } + + dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + (void *)kctx); + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_nolock( + kctx->kbdev, kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable_nolock( + kctx->kbdev, kctx, js); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + } + } + + if (timer_sync) + kbase_js_sync_timers(kbdev); + + for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { + if (kbdev->hwaccess.active_kctx[js] == last_active[js] && + ctx_waiting[js]) { + dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + (void *)last_active[js], js); + kbdev->hwaccess.active_kctx[js] = NULL; + } + } + + mutex_unlock(&js_devdata->queue_mutex); + up(&js_devdata->schedule_sem); +} + +void kbase_js_zap_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + + /* + * Critical assumption: No more submission is possible outside of the + * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) + * whilst the struct kbase_context is terminating. + */ + + /* First, atomically do the following: + * - mark the context as dying + * - try to evict it from the queue */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbase_ctx_flag_set(kctx, KCTX_DYING); + + dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + + /* + * At this point we know: + * - If eviction succeeded, it was in the queue, but now no + * longer is + * - We must cancel the jobs here. No Power Manager active reference to + * release. + * - This happens asynchronously - kbase_jd_zap_context() will wait for + * those jobs to be killed. + * - If eviction failed, then it wasn't in the queue. It is one + * of the following: + * - a. it didn't have any jobs, and so is not in the Queue or + * the Run Pool (not scheduled) + * - Hence, no more work required to cancel jobs. No Power Manager + * active reference to release. + * - b. it was in the middle of a scheduling transaction (and thus must + * have at least 1 job). This can happen from a syscall or a + * kernel thread. We still hold the jsctx_mutex, and so the thread + * must be waiting inside kbasep_js_try_schedule_head_ctx(), + * before checking whether the runpool is full. That thread will + * continue after we drop the mutex, and will notice the context + * is dying. It will rollback the transaction, killing all jobs at + * the same time. kbase_jd_zap_context() will wait for those jobs + * to be killed. + * - Hence, no more work required to cancel jobs, or to release the + * Power Manager active reference. + * - c. it is scheduled, and may or may not be running jobs + * - We must cause it to leave the runpool by stopping it from + * submitting any more jobs. When it finally does leave, + * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs + * (because it is dying), release the Power Manager active reference, + * and will not requeue the context in the queue. + * kbase_jd_zap_context() will wait for those jobs to be killed. + * - Hence, work required just to make it leave the runpool. Cancelling + * jobs and releasing the Power manager active reference will be + * handled when it leaves the runpool. + */ + if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) { + unsigned long flags; + int js; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* The following events require us to kill off remaining jobs + * and update PM book-keeping: + * - we evicted it correctly (it must have jobs to be in the + * Queue) + * + * These events need no action, but take this path anyway: + * - Case a: it didn't have any jobs, and was never in the Queue + * - Case b: scheduling transaction will be partially rolled- + * back (this already cancels the jobs) + */ + + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + + /* Only cancel jobs when we evicted from the + * queue. No Power Manager active reference was held. + * + * Having is_dying set ensures that this kills, and + * doesn't requeue */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); + } else { + unsigned long flags; + bool was_retained; + + /* Case c: didn't evict, but it is scheduled - it's in the Run + * Pool */ + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + + /* Disable the ctx from submitting any more jobs */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Retain and (later) release the context whilst it is is now + * disallowed from submitting jobs - ensures that someone + * somewhere will be removing the context later on */ + was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); + + /* Since it's scheduled and we have the jsctx_mutex, it must be + * retained successfully */ + KBASE_DEBUG_ASSERT(was_retained); + + dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + + /* Cancel any remaining running jobs for this kctx - if any. + * Submit is disallowed which takes effect immediately, so no + * more new jobs will appear after we do this. */ + kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&kctx->jctx.lock); + + dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", + kctx); + + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + + KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + + /* After this, you must wait on both the + * kbase_jd_context::zero_jobs_wait and the + * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs + * to be destroyed, and the context to be de-scheduled (if it was on the + * runpool). + * + * kbase_jd_zap_context() will do this. */ +} + +static inline int trace_get_refcnt(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + return atomic_read(&kctx->refcount); +} + +/** + * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context + * @kctx: Pointer to context. + * @callback: Pointer to function to call for each job. + * + * Call a function on all jobs belonging to a non-queued, non-running + * context, and detach the jobs from the context as it goes. + * + * Due to the locks that might be held at the time of the call, the callback + * may need to defer work on a workqueue to complete its actions (e.g. when + * cancelling jobs) + * + * Atoms will be removed from the queue, so this must only be called when + * cancelling jobs (which occurs as part of context destruction). + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + kbasep_js_ctx_job_cb callback) +{ + struct kbase_device *kbdev; + unsigned long flags; + u32 js; + + kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + KBASE_KTRACE_ADD_JM_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, + 0u, trace_get_refcnt(kbdev, kctx)); + + /* Invoke callback on jobs on each slot in turn */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + jsctx_queue_foreach(kctx, js, callback); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h new file mode 100644 index 0000000..541acd4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_js.h + * Job Scheduler APIs. + */ + +#ifndef _KBASE_JS_H_ +#define _KBASE_JS_H_ + +#include "context/mali_kbase_context.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_debug.h" +#include +#include "jm/mali_kbase_jm_js.h" +#include "jm/mali_kbase_js_defs.h" + +#endif /* _KBASE_JS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c new file mode 100644 index 0000000..141d04a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -0,0 +1,283 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#include +#include + +/* + * Private functions follow + */ + +/** + * @brief Check whether a ctx has a certain attribute, and if so, retain that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); + ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { + /* First refcount indicates a state change */ + runpool_state_changed = true; + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Check whether a ctx has a certain attribute, and if so, release that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->hwaccess_lock); + KBASE_DEBUG_ASSERT(kbase_ctx_flag(kctx, KCTX_SCHEDULED)); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); + --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { + /* Last de-refcount indicates a state change */ + runpool_state_changed = true; + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Retain a certain attribute on a ctx, also retaining it on the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return true indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); + + ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + } + + return runpool_state_changed; +} + +/* + * @brief Release a certain attribute on a ctx, also releasing it from the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return true indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); + + if (kbase_ctx_flag(kctx, KCTX_SCHEDULED) && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->hwaccess_lock); + /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); + KBASE_KTRACE_ADD_JM(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); + } + + /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ + --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + return runpool_state_changed; +} + +/* + * More commonly used public functions + */ + +void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + bool runpool_state_changed; + int i; + + /* Retain any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + /* The context is being scheduled in, so update the runpool with the new attributes */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + + /* We don't need to know about state changed, because retaining a + * context occurs on scheduling it, and that itself will also try + * to run new atoms */ + CSTD_UNUSED(runpool_state_changed); + } + } +} + +bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + bool runpool_state_changed = false; + int i; + + /* Release any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + /* The context is being scheduled out, so update the runpool on the removed attributes */ + runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + } + } + + return runpool_state_changed; +} + +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + bool runpool_state_changed = false; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom); + core_req = katom->core_req; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + /* We don't need to know about state changed, because retaining an + * atom occurs on adding it, and that itself will also try to run + * new atoms */ + CSTD_UNUSED(runpool_state_changed); +} + +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) +{ + bool runpool_state_changed = false; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom_retained_state); + core_req = katom_retained_state->core_req; + + /* No-op for invalid atoms */ + if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) + return false; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + return runpool_state_changed; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h new file mode 100644 index 0000000..25fd397 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h @@ -0,0 +1,155 @@ +/* + * + * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_js_ctx_attr.h + * Job Scheduler Context Attribute APIs + */ + +#ifndef _KBASE_JS_CTX_ATTR_H_ +#define _KBASE_JS_CTX_ATTR_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ + +/** + * Retain all attributes of a context + * + * This occurs on scheduling in the context on the runpool (but after + * is_scheduled is set) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + */ +void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Release all attributes of a context + * + * This occurs on scheduling out the context from the runpool (but before + * is_scheduled is cleared) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Retain all attributes of an atom + * + * This occurs on adding an atom to a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + */ +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * Release all attributes of an atom, given its retained state. + * + * This occurs after (permanently) removing an atom from a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * This is a no-op when \a katom_retained_state is invalid. + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * Requires: + * - runpool_irq spinlock + */ +static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + + return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; +} + +/** + * Requires: + * - runpool_irq spinlock + */ +static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ + /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ + return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); +} + +/** + * Requires: + * - jsctx mutex + */ +static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ + return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); +} + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h new file mode 100644 index 0000000..003ac9e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h @@ -0,0 +1,48 @@ +/* + * + * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_linux.h + * Base kernel APIs, Linux implementation. + */ + +#ifndef _KBASE_LINUX_H_ +#define _KBASE_LINUX_H_ + +/* All things that are needed for the Linux port. */ +#include +#include +#include +#include +#include + +#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) + #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +#else + #define KBASE_EXPORT_TEST_API(func) +#endif + +#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) + +#endif /* _KBASE_LINUX_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c new file mode 100644 index 0000000..4a1004b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -0,0 +1,4517 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Base kernel memory APIs + */ +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_OF +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Alignment of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the alignment of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u) + +/* + * Maximum size of objects allocated by the GPU inside a just-in-time memory + * region whose size is given by an end address + * + * This is the maximum size of objects allocated by the GPU, but possibly not + * fully written to. When taken into account with + * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes + * that the JIT memory report size can exceed the actual backed memory size. + */ +#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u) + + +/* Forward declarations */ +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp); + +static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) +{ +#if defined(CONFIG_ARM64) + /* VA_BITS can be as high as 48 bits, but all bits are available for + * both user and kernel. + */ + size_t cpu_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + /* x86_64 can access 48 bits of VA, but the 48th is used to denote + * kernel (1) vs userspace (0), so the max here is 47. + */ + size_t cpu_va_bits = 47; +#elif defined(CONFIG_ARM) || defined(CONFIG_X86_32) + size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE; +#else +#error "Unknown CPU VA width for this architecture" +#endif + +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + cpu_va_bits = 32; +#endif + + return cpu_va_bits; +} + +/* This function finds out which RB tree the given pfn from the GPU VA belongs + * to based on the memory zone the pfn refers to */ +static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, + u64 gpu_pfn) +{ + struct rb_root *rbtree = NULL; + + /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA + * zone if this has been initialized. + */ + if (gpu_pfn >= kctx->exec_va_start) + rbtree = &kctx->reg_rbtree_exec; + else { + u64 same_va_end; + +#ifdef CONFIG_64BIT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) +#endif /* CONFIG_64BIT */ + same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; +#ifdef CONFIG_64BIT + else + same_va_end = kctx->same_va_end; +#endif /* CONFIG_64BIT */ + + if (gpu_pfn >= same_va_end) + rbtree = &kctx->reg_rbtree_custom; + else + rbtree = &kctx->reg_rbtree_same; + } + + return rbtree; +} + +/* This function inserts a region into the tree. */ +static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) +{ + u64 start_pfn = new_reg->start_pfn; + struct rb_node **link = NULL; + struct rb_node *parent = NULL; + struct rb_root *rbtree = NULL; + + rbtree = new_reg->rbtree; + + link = &(rbtree->rb_node); + /* Find the right place in the tree using tree search */ + while (*link) { + struct kbase_va_region *old_reg; + + parent = *link; + old_reg = rb_entry(parent, struct kbase_va_region, rblink); + + /* RBTree requires no duplicate entries. */ + KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + + if (old_reg->start_pfn > start_pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Put the new node there, and rebalance tree */ + rb_link_node(&(new_reg->rblink), parent, link); + + rb_insert_color(&(new_reg->rblink), rbtree); +} + +static struct kbase_va_region *find_region_enclosing_range_rbtree( + struct rb_root *rbtree, u64 start_pfn, size_t nr_pages) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + u64 end_pfn = start_pfn + nr_pages; + + rbnode = rbtree->rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (start_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (end_pfn > tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode; + struct kbase_va_region *reg; + + rbnode = rbtree->rb_node; + + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (gpu_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (gpu_pfn >= tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +/* Find region enclosing given address. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + + return kbase_find_region_enclosing_address(rbtree, gpu_addr); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); + +struct kbase_va_region *kbase_find_region_base_address( + struct rb_root *rbtree, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + + rbnode = rbtree->rb_node; + + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->start_pfn > gpu_pfn) + rbnode = rbnode->rb_left; + else if (reg->start_pfn < gpu_pfn) + rbnode = rbnode->rb_right; + else + return reg; + } + + return NULL; +} + +/* Find region with given base address */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_root *rbtree = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + + return kbase_find_region_base_address(rbtree, gpu_addr); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); + +/* Find region meeting given requirements */ +static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( + struct kbase_va_region *reg_reqs, + size_t nr_pages, size_t align_offset, size_t align_mask, + u64 *out_start_pfn) +{ + struct rb_node *rbnode = NULL; + struct kbase_va_region *reg = NULL; + struct rb_root *rbtree = NULL; + + /* Note that this search is a linear search, as we do not have a target + address in mind, so does not benefit from the rbtree search */ + rbtree = reg_reqs->rbtree; + + for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if ((reg->nr_pages >= nr_pages) && + (reg->flags & KBASE_REG_FREE)) { + /* Check alignment */ + u64 start_pfn = reg->start_pfn; + + /* When align_offset == align, this sequence is + * equivalent to: + * (start_pfn + align_mask) & ~(align_mask) + * + * Otherwise, it aligns to n*align + offset, for the + * lowest value n that makes this still >start_pfn */ + start_pfn += align_mask; + start_pfn -= (start_pfn - align_offset) & (align_mask); + + if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) { + /* Can't end at 4GB boundary */ + if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + /* Can't start at 4GB boundary */ + if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB)) + start_pfn += align_offset; + + if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) || + !(start_pfn & BASE_MEM_PFN_MASK_4GB)) + continue; + } else if (reg_reqs->flags & + KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + u64 end_pfn = start_pfn + nr_pages - 1; + + if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) != + (end_pfn & ~BASE_MEM_PFN_MASK_4GB)) + start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB; + } + + if ((start_pfn >= reg->start_pfn) && + (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && + ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) { + *out_start_pfn = start_pfn; + return reg; + } + } + } + + return NULL; +} + +/** + * @brief Remove a region object from the global list. + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context + * region lock held. The associated memory is not released (see + * kbase_free_alloced_region). Internal use only. + */ +int kbase_remove_va_region(struct kbase_va_region *reg) +{ + struct rb_node *rbprev; + struct kbase_va_region *prev = NULL; + struct rb_node *rbnext; + struct kbase_va_region *next = NULL; + struct rb_root *reg_rbtree = NULL; + + int merged_front = 0; + int merged_back = 0; + int err = 0; + + reg_rbtree = reg->rbtree; + + /* Try to merge with the previous block first */ + rbprev = rb_prev(&(reg->rblink)); + if (rbprev) { + prev = rb_entry(rbprev, struct kbase_va_region, rblink); + if (prev->flags & KBASE_REG_FREE) { + /* We're compatible with the previous VMA, + * merge with it */ + WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != + (reg->flags & KBASE_REG_ZONE_MASK)); + prev->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), reg_rbtree); + reg = prev; + merged_front = 1; + } + } + + /* Try to merge with the next block second */ + /* Note we do the lookup here as the tree may have been rebalanced. */ + rbnext = rb_next(&(reg->rblink)); + if (rbnext) { + /* We're compatible with the next VMA, merge with it */ + next = rb_entry(rbnext, struct kbase_va_region, rblink); + if (next->flags & KBASE_REG_FREE) { + WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != + (reg->flags & KBASE_REG_ZONE_MASK)); + next->start_pfn = reg->start_pfn; + next->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), reg_rbtree); + merged_back = 1; + if (merged_front) { + /* We already merged with prev, free it */ + kfree(reg); + } + } + } + + /* If we failed to merge then we need to add a new block */ + if (!(merged_front || merged_back)) { + /* + * We didn't merge anything. Add a new free + * placeholder and remove the original one. + */ + struct kbase_va_region *free_reg; + + free_reg = kbase_alloc_free_region(reg_rbtree, + reg->start_pfn, reg->nr_pages, + reg->flags & KBASE_REG_ZONE_MASK); + if (!free_reg) { + err = -ENOMEM; + goto out; + } + rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree); + } + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_remove_va_region); + +/** + * kbase_insert_va_region_nolock - Insert a VA region to the list, + * replacing the existing one. + * + * @new_reg: The new region to insert + * @at_reg: The region to replace + * @start_pfn: The Page Frame Number to insert at + * @nr_pages: The number of pages of the region + */ +static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg, + struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +{ + struct rb_root *reg_rbtree = NULL; + int err = 0; + + reg_rbtree = at_reg->rbtree; + + /* Must be a free region */ + KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); + /* start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); + /* at least nr_pages from start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + /* Regions are a whole use, so swap and delete old one. */ + if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { + rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), + reg_rbtree); + kfree(at_reg); + } + /* New region replaces the start of the old one, so insert before. */ + else if (at_reg->start_pfn == start_pfn) { + at_reg->start_pfn += nr_pages; + KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(new_reg); + } + /* New region replaces the end of the old one, so insert after. */ + else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(new_reg); + } + /* New region splits the old one, so insert and create new */ + else { + struct kbase_va_region *new_front_reg; + + new_front_reg = kbase_alloc_free_region(reg_rbtree, + at_reg->start_pfn, + start_pfn - at_reg->start_pfn, + at_reg->flags & KBASE_REG_ZONE_MASK); + + if (new_front_reg) { + at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; + at_reg->start_pfn = start_pfn + nr_pages; + + kbase_region_tracker_insert(new_front_reg); + kbase_region_tracker_insert(new_reg); + } else { + err = -ENOMEM; + } + } + + return err; +} + +/** + * kbase_add_va_region - Add a VA region to the region list for a context. + * + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages + */ +int kbase_add_va_region(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align) +{ + int err = 0; + struct kbase_device *kbdev = kctx->kbdev; + int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx); + int gpu_pc_bits = + kbdev->gpu_props.props.core_props.log2_program_counter_size; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + lockdep_assert_held(&kctx->reg_lock); + + /* The executable allocation from the SAME_VA zone would already have an + * appropriately aligned GPU VA chosen for it. + * Also the executable allocation from EXEC_VA zone doesn't need the + * special alignment. + */ + if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && + ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { + if (cpu_va_bits > gpu_pc_bits) { + align = max(align, (size_t)((1ULL << gpu_pc_bits) + >> PAGE_SHIFT)); + } + } + + do { + err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages, + align); + if (err != -ENOMEM) + break; + + /* + * If the allocation is not from the same zone as JIT + * then don't retry, we're out of VA and there is + * nothing which can be done about it. + */ + if ((reg->flags & KBASE_REG_ZONE_MASK) != + KBASE_REG_ZONE_CUSTOM_VA) + break; + } while (kbase_jit_evict(kctx)); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** + * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree + * + * Insert a region into the rbtree that was specified when the region was + * created. If addr is 0 a free area in the rbtree is used, otherwise the + * specified address is used. + * + * @kbdev: The kbase device + * @reg: The region to add + * @addr: The address to add the region at, or 0 to map at any available address + * @nr_pages: The size of the region in pages + * @align: The minimum alignment in pages + */ +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align) +{ + struct device *const dev = kbdev->dev; + struct rb_root *rbtree = NULL; + struct kbase_va_region *tmp; + u64 gpu_pfn = addr >> PAGE_SHIFT; + int err = 0; + + rbtree = reg->rbtree; + + if (!align) + align = 1; + + /* must be a power of 2 */ + KBASE_DEBUG_ASSERT(is_power_of_2(align)); + KBASE_DEBUG_ASSERT(nr_pages > 0); + + /* Path 1: Map a specific address. Find the enclosing region, + * which *must* be free. + */ + if (gpu_pfn) { + KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + + tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn, + nr_pages); + if (kbase_is_region_invalid(tmp)) { + dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } else if (!kbase_is_region_free(tmp)) { + dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", + tmp->start_pfn, tmp->flags, + tmp->nr_pages, gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } + + err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn, + nr_pages); + if (err) { + dev_warn(dev, "Failed to insert va region"); + err = -ENOMEM; + } + } else { + /* Path 2: Map any free address which meets the requirements. */ + u64 start_pfn; + size_t align_offset = align; + size_t align_mask = align - 1; + + if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { + WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", + __func__, + (unsigned long)align); + align_mask = reg->extent - 1; + align_offset = reg->extent - reg->initial_commit; + } + + tmp = kbase_region_tracker_find_region_meeting_reqs(reg, + nr_pages, align_offset, align_mask, + &start_pfn); + if (tmp) { + err = kbase_insert_va_region_nolock(reg, tmp, + start_pfn, nr_pages); + if (unlikely(err)) { + dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages", + start_pfn, nr_pages); + } + } else { + dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n", + nr_pages, align_offset, align_mask); + err = -ENOMEM; + } + } + +exit: + return err; +} + +/** + * @brief Initialize the internal region tracker data structure. + */ +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, + struct kbase_va_region *same_va_reg, + struct kbase_va_region *custom_va_reg) +{ + kctx->reg_rbtree_same = RB_ROOT; + kbase_region_tracker_insert(same_va_reg); + + /* Although custom_va_reg and exec_va_reg don't always exist, + * initialize unconditionally because of the mem_view debugfs + * implementation which relies on them being empty. + * + * The difference between the two is that the EXEC_VA region + * is never initialized at this stage. + */ + kctx->reg_rbtree_custom = RB_ROOT; + kctx->reg_rbtree_exec = RB_ROOT; + + if (custom_va_reg) + kbase_region_tracker_insert(custom_va_reg); +} + +static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + do { + rbnode = rb_first(rbtree); + if (rbnode) { + rb_erase(rbnode, rbtree); + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + WARN_ON(reg->va_refcnt != 1); + /* Reset the start_pfn - as the rbtree is being + * destroyed and we've already erased this region, there + * is no further need to attempt to remove it. + * This won't affect the cleanup if the region was + * being used as a sticky resource as the cleanup + * related to sticky resources anyways need to be + * performed before the term of region tracker. + */ + reg->start_pfn = 0; + kbase_free_alloced_region(reg); + } + } while (rbnode); +} + +void kbase_region_tracker_term(struct kbase_context *kctx) +{ + kbase_gpu_vm_lock(kctx); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); + kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); + kbase_gpu_vm_unlock(kctx); +} + +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) +{ + kbase_region_tracker_erase_rbtree(rbtree); +} + +static size_t kbase_get_same_va_bits(struct kbase_context *kctx) +{ + return min(kbase_get_num_cpu_va_bits(kctx), + (size_t) kctx->kbdev->gpu_props.mmu.va_bits); +} + +int kbase_region_tracker_init(struct kbase_context *kctx) +{ + struct kbase_va_region *same_va_reg; + struct kbase_va_region *custom_va_reg = NULL; + size_t same_va_bits = kbase_get_same_va_bits(kctx); + u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; + u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + u64 same_va_pages; + int err; + + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); + + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + /* all have SAME_VA */ + same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, + same_va_pages, + KBASE_REG_ZONE_SAME_VA); + + if (!same_va_reg) { + err = -ENOMEM; + goto fail_unlock; + } + +#ifdef CONFIG_64BIT + /* 32-bit clients have custom VA zones */ + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { +#endif + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { + err = -EINVAL; + goto fail_free_same_va; + } + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit + */ + if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) + custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + + custom_va_reg = kbase_alloc_free_region( + &kctx->reg_rbtree_custom, + KBASE_REG_ZONE_CUSTOM_VA_BASE, + custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + + if (!custom_va_reg) { + err = -ENOMEM; + goto fail_free_same_va; + } +#ifdef CONFIG_64BIT + } else { + custom_va_size = 0; + } +#endif + + kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); + + kctx->same_va_end = same_va_pages + 1; + kctx->gpu_va_end = kctx->same_va_end + custom_va_size; + kctx->exec_va_start = U64_MAX; + kctx->jit_va = false; + + + kbase_gpu_vm_unlock(kctx); + return 0; + +fail_free_same_va: + kbase_free_alloced_region(same_va_reg); +fail_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +#ifdef CONFIG_64BIT +static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, + u64 jit_va_pages) +{ + struct kbase_va_region *same_va; + struct kbase_va_region *custom_va_reg; + + lockdep_assert_held(&kctx->reg_lock); + + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) + return -EINVAL; + + /* + * Modify the same VA free region after creation. Be careful to ensure + * that allocations haven't been made as they could cause an overlap + * to happen with existing same VA allocations and the custom VA zone. + */ + same_va = kbase_region_tracker_find_region_base_address(kctx, + PAGE_SIZE); + if (!same_va) + return -ENOMEM; + + if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) + return -ENOMEM; + + /* It's safe to adjust the same VA zone now */ + same_va->nr_pages -= jit_va_pages; + kctx->same_va_end -= jit_va_pages; + + /* + * Create a custom VA zone at the end of the VA for allocations which + * JIT can use so it doesn't have to allocate VA from the kernel. + */ + custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + kctx->same_va_end, + jit_va_pages, + KBASE_REG_ZONE_CUSTOM_VA); + + /* + * The context will be destroyed if we fail here so no point + * reverting the change we made to same_va. + */ + if (!custom_va_reg) + return -ENOMEM; + + kbase_region_tracker_insert(custom_va_reg); + return 0; +} +#endif + +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit) +{ + int err = 0; + + if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL) + return -EINVAL; + + if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) + return -EINVAL; + +#if MALI_JIT_PRESSURE_LIMIT + if (phys_pages_limit > jit_va_pages) +#else + if (phys_pages_limit != jit_va_pages) +#endif /* MALI_JIT_PRESSURE_LIMIT */ + return -EINVAL; + + kbase_gpu_vm_lock(kctx); + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); +#endif + /* + * Nothing to do for 32-bit clients, JIT uses the existing + * custom VA zone. + */ + + if (!err) { + kctx->jit_max_allocations = max_allocations; + kctx->trim_level = trim_level; + kctx->jit_va = true; + kctx->jit_group_id = group_id; +#if MALI_JIT_PRESSURE_LIMIT + kctx->jit_phys_pages_limit = phys_pages_limit; + dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", + phys_pages_limit); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + } + + kbase_gpu_vm_unlock(kctx); + + return err; +} + +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) +{ + struct kbase_va_region *shrinking_va_reg; + struct kbase_va_region *exec_va_reg; + u64 exec_va_start, exec_va_base_addr; + int err; + + /* The EXEC_VA zone shall be created by making space at the end of the + * address space. Firstly, verify that the number of EXEC_VA pages + * requested by the client is reasonable and then make sure that it is + * not greater than the address space itself before calculating the base + * address of the new zone. + */ + if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) + return -EINVAL; + + kbase_gpu_vm_lock(kctx); + + /* First verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EPERM; + goto exit_unlock; + } + + if (exec_va_pages > kctx->gpu_va_end) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_start = kctx->gpu_va_end - exec_va_pages; + exec_va_base_addr = exec_va_start << PAGE_SHIFT; + + shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, + exec_va_base_addr); + if (!shrinking_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + /* Make sure that the EXEC_VA region is still uninitialized */ + if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_EXEC_VA) { + err = -EPERM; + goto exit_unlock; + } + + if (shrinking_va_reg->nr_pages <= exec_va_pages) { + err = -ENOMEM; + goto exit_unlock; + } + + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, + exec_va_start, + exec_va_pages, + KBASE_REG_ZONE_EXEC_VA); + if (!exec_va_reg) { + err = -ENOMEM; + goto exit_unlock; + } + + shrinking_va_reg->nr_pages -= exec_va_pages; +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) + kctx->same_va_end -= exec_va_pages; +#endif + kctx->exec_va_start = exec_va_start; + + kbase_region_tracker_insert(exec_va_reg); + err = 0; + +exit_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + + +int kbase_mem_init(struct kbase_device *kbdev) +{ + int err = 0; + struct kbasep_mem_device *memdev; +#ifdef CONFIG_OF + struct device_node *mgm_node = NULL; +#endif + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults, + KBASE_MEM_POOL_MAX_SIZE_KCTX); + + /* Initialize memory usage */ + atomic_set(&memdev->used_pages, 0); + +#ifdef IR_THRESHOLD + atomic_set(&memdev->ir_threshold, IR_THRESHOLD); +#else + atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD); +#endif + + kbdev->mgm_dev = &kbase_native_mgm_dev; + +#ifdef CONFIG_OF + /* Check to see whether or not a platform-specific memory group manager + * is configured and available. + */ + mgm_node = of_parse_phandle(kbdev->dev->of_node, + "physical-memory-group-manager", 0); + if (!mgm_node) { + dev_info(kbdev->dev, + "No memory group manager is configured\n"); + } else { + struct platform_device *const pdev = + of_find_device_by_node(mgm_node); + + if (!pdev) { + dev_err(kbdev->dev, + "The configured memory group manager was not found\n"); + } else { + kbdev->mgm_dev = platform_get_drvdata(pdev); + if (!kbdev->mgm_dev) { + dev_info(kbdev->dev, + "Memory group manager is not ready\n"); + err = -EPROBE_DEFER; + } else if (!try_module_get(kbdev->mgm_dev->owner)) { + dev_err(kbdev->dev, + "Failed to get memory group manger module\n"); + err = -ENODEV; + kbdev->mgm_dev = NULL; + } else { + dev_info(kbdev->dev, + "Memory group manager successfully loaded\n"); + } + } + of_node_put(mgm_node); + } +#endif + + if (likely(!err)) { + struct kbase_mem_pool_group_config mem_pool_defaults; + + kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults, + KBASE_MEM_POOL_MAX_SIZE_KBDEV); + + err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, + &mem_pool_defaults, NULL); + } + + return err; +} + +void kbase_mem_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_mem_term(struct kbase_device *kbdev) +{ + struct kbasep_mem_device *memdev; + int pages; + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + pages = atomic_read(&memdev->used_pages); + if (pages != 0) + dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + + kbase_mem_pool_group_term(&kbdev->mem_pools); + + if (kbdev->mgm_dev) + module_put(kbdev->mgm_dev->owner); +} +KBASE_EXPORT_TEST_API(kbase_mem_term); + +/** + * @brief Allocate a free region object. + * + * The allocated object is not part of any list yet, and is flagged as + * KBASE_REG_FREE. No mapping is allocated yet. + * + * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. + * + */ +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone) +{ + struct kbase_va_region *new_reg; + + KBASE_DEBUG_ASSERT(rbtree != NULL); + + /* zone argument should only contain zone related region flags */ + KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); + KBASE_DEBUG_ASSERT(nr_pages > 0); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); + + new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); + + if (!new_reg) + return NULL; + + new_reg->va_refcnt = 1; + new_reg->cpu_alloc = NULL; /* no alloc bound yet */ + new_reg->gpu_alloc = NULL; /* no alloc bound yet */ + new_reg->rbtree = rbtree; + new_reg->flags = zone | KBASE_REG_FREE; + + new_reg->flags |= KBASE_REG_GROWABLE; + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + INIT_LIST_HEAD(&new_reg->jit_node); + INIT_LIST_HEAD(&new_reg->link); + + return new_reg; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_free_region); + +static struct kbase_context *kbase_reg_flags_to_kctx( + struct kbase_va_region *reg) +{ + struct kbase_context *kctx = NULL; + struct rb_root *rbtree = reg->rbtree; + + switch (reg->flags & KBASE_REG_ZONE_MASK) { + case KBASE_REG_ZONE_CUSTOM_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_custom); + break; + case KBASE_REG_ZONE_SAME_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_same); + break; + case KBASE_REG_ZONE_EXEC_VA: + kctx = container_of(rbtree, struct kbase_context, + reg_rbtree_exec); + break; + default: + WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); + break; + } + + return kctx; +} + +/** + * @brief Free a region object. + * + * The described region must be freed of any mapping. + * + * If the region is not flagged as KBASE_REG_FREE, the region's + * alloc object will be released. + * It is a bug if no alloc object exists for non-free regions. + * + */ +void kbase_free_alloced_region(struct kbase_va_region *reg) +{ + if (!(reg->flags & KBASE_REG_FREE)) { + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(kbase_is_region_invalid(reg))) + return; + + dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + (void *)reg); + + mutex_lock(&kctx->jit_evict_lock); + + /* + * The physical allocation should have been removed from the + * eviction list before this function is called. However, in the + * case of abnormal process termination or the app leaking the + * memory kbase_mem_free_region is not called so it can still be + * on the list at termination time of the region tracker. + */ + if (!list_empty(®->gpu_alloc->evict_node)) { + mutex_unlock(&kctx->jit_evict_lock); + + /* + * Unlink the physical allocation before unmaking it + * evictable so that the allocation isn't grown back to + * its last backed size as we're going to unmap it + * anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must + * unmake it before trying to free it. + * If the memory hasn't been reclaimed it will be + * unmapped and freed below, if it has been reclaimed + * then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } else { + mutex_unlock(&kctx->jit_evict_lock); + } + + /* + * Remove the region from the sticky resource metadata + * list should it be there. + */ + kbase_sticky_resource_release_force(kctx, NULL, + reg->start_pfn << PAGE_SHIFT); + + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + + reg->flags |= KBASE_REG_VA_FREED; + kbase_va_region_alloc_put(kctx, reg); + } else { + kfree(reg); + } +} + +KBASE_EXPORT_TEST_API(kbase_free_alloced_region); + +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) +{ + int err; + size_t i = 0; + unsigned long attr; + unsigned long mask = ~KBASE_REG_MEMATTR_MASK; + unsigned long gwt_mask = ~0; + int group_id; + struct kbase_mem_phy_alloc *alloc; + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + + if ((kctx->kbdev->system_coherency == COHERENCY_ACE) && + (reg->flags & KBASE_REG_SHARE_BOTH)) + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); + else + attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); + if (err) + return err; + + alloc = reg->gpu_alloc; + group_id = alloc->group_id; + + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + u64 const stride = alloc->imported.alias.stride; + + KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); + for (i = 0; i < alloc->imported.alias.nents; i++) { + if (alloc->imported.alias.aliased[i].alloc) { + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn + (i * stride), + alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, + alloc->imported.alias.aliased[i].length, + reg->flags & gwt_mask, + kctx->as_nr, + group_id); + if (err) + goto bad_insert; + + kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); + } else { + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + i * stride, + kctx->aliasing_sink_page, + alloc->imported.alias.aliased[i].length, + (reg->flags & mask & gwt_mask) | attr, + group_id); + + if (err) + goto bad_insert; + } + } + } else { + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, + kctx->as_nr, + group_id); + if (err) + goto bad_insert; + kbase_mem_phy_alloc_gpu_mapped(alloc); + } + + if (reg->flags & KBASE_REG_IMPORT_PAD && + !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) && + reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM && + reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + /* For padded imported dma-buf memory, map the dummy aliasing + * page from the end of the dma-buf pages, to the end of the + * region using a read only mapping. + * + * Only map when it's imported dma-buf memory that is currently + * mapped. + * + * Assume reg->gpu_alloc->nents is the number of actual pages + * in the dma-buf memory. + */ + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + reg->gpu_alloc->nents, + kctx->aliasing_sink_page, + reg->nr_pages - reg->gpu_alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & + ~KBASE_REG_GPU_WR, + KBASE_MEM_GROUP_SINK); + if (err) + goto bad_insert; + } + + return err; + +bad_insert: + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, reg->nr_pages, + kctx->as_nr); + + if (alloc->type == KBASE_MEM_TYPE_ALIAS) { + KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); + while (i--) + if (alloc->imported.alias.aliased[i].alloc) + kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc); + } + + kbase_remove_va_region(reg); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_mmap); + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable); + +int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int err = 0; + size_t i; + + if (reg->start_pfn == 0) + return 0; + + if (!reg->gpu_alloc) + return -EINVAL; + + /* Tear down down GPU page tables, depending on memory type. */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_ALIAS: /* Fall-through */ + case KBASE_MEM_TYPE_IMPORTED_UMM: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, reg->nr_pages, kctx->as_nr); + break; + default: + err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, kbase_reg_current_backed_size(reg), + kctx->as_nr); + break; + } + + /* Update tracking, and other cleanup, depending on memory type. */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_ALIAS: + KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); + for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) + if (reg->gpu_alloc->imported.alias.aliased[i].alloc) + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + struct kbase_alloc_import_user_buf *user_buf = + ®->gpu_alloc->imported.user_buf; + + if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) { + user_buf->current_mapping_usage_count &= + ~PINNED_ON_IMPORT; + + /* The allocation could still have active mappings. */ + if (user_buf->current_mapping_usage_count == 0) { + kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc, + (reg->flags & KBASE_REG_GPU_WR)); + } + } + } + /* Fall-through */ + default: + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); + break; + } + + return err; +} + +static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( + struct kbase_context *kctx, + unsigned long uaddr, size_t size, u64 *offset) +{ + struct vm_area_struct *vma; + struct kbase_cpu_mapping *map; + unsigned long vm_pgoff_in_region; + unsigned long vm_off_in_region; + unsigned long map_start; + size_t map_size; + + lockdep_assert_held(¤t->mm->mmap_sem); + + if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ + return NULL; + + vma = find_vma_intersection(current->mm, uaddr, uaddr+size); + + if (!vma || vma->vm_start > uaddr) + return NULL; + if (vma->vm_ops != &kbase_vm_ops) + /* Not ours! */ + return NULL; + + map = vma->vm_private_data; + + if (map->kctx != kctx) + /* Not from this context! */ + return NULL; + + vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn; + vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT; + map_start = vma->vm_start - vm_off_in_region; + map_size = map->region->nr_pages << PAGE_SHIFT; + + if ((uaddr + size) > (map_start + map_size)) + /* Not within the CPU mapping */ + return NULL; + + *offset = (uaddr - vma->vm_start) + vm_off_in_region; + + return map; +} + +int kbasep_find_enclosing_cpu_mapping_offset( + struct kbase_context *kctx, + unsigned long uaddr, size_t size, u64 *offset) +{ + struct kbase_cpu_mapping *map; + + kbase_os_mem_map_lock(kctx); + + map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset); + + kbase_os_mem_map_unlock(kctx); + + if (!map) + return -EINVAL; + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); + +int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx, + u64 gpu_addr, size_t size, u64 *start, u64 *offset) +{ + struct kbase_va_region *region; + + kbase_gpu_vm_lock(kctx); + + region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + + if (!region) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + *start = region->start_pfn << PAGE_SHIFT; + + *offset = gpu_addr - *start; + + if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + + kbase_gpu_vm_unlock(kctx); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset); + +void kbase_sync_single(struct kbase_context *kctx, + struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa, + off_t offset, size_t size, enum kbase_sync_type sync_fn) +{ + struct page *cpu_page; + phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa); + phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa); + + cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); + + if (likely(cpu_pa == gpu_pa)) { + dma_addr_t dma_addr; + + BUG_ON(!cpu_page); + BUG_ON(offset + size > PAGE_SIZE); + + dma_addr = kbase_dma_addr(cpu_page) + offset; + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, + size, DMA_BIDIRECTIONAL); + else if (sync_fn == KBASE_SYNC_TO_DEVICE) + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + size, DMA_BIDIRECTIONAL); + } else { + void *src = NULL; + void *dst = NULL; + struct page *gpu_page; + + if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) + return; + + gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); + + if (sync_fn == KBASE_SYNC_TO_DEVICE) { + src = ((unsigned char *)kmap(cpu_page)) + offset; + dst = ((unsigned char *)kmap(gpu_page)) + offset; + } else if (sync_fn == KBASE_SYNC_TO_CPU) { + dma_sync_single_for_cpu(kctx->kbdev->dev, + kbase_dma_addr(gpu_page) + offset, + size, DMA_BIDIRECTIONAL); + src = ((unsigned char *)kmap(gpu_page)) + offset; + dst = ((unsigned char *)kmap(cpu_page)) + offset; + } + memcpy(dst, src, size); + kunmap(gpu_page); + kunmap(cpu_page); + if (sync_fn == KBASE_SYNC_TO_DEVICE) + dma_sync_single_for_device(kctx->kbdev->dev, + kbase_dma_addr(gpu_page) + offset, + size, DMA_BIDIRECTIONAL); + } +} + +static int kbase_do_syncset(struct kbase_context *kctx, + struct basep_syncset *sset, enum kbase_sync_type sync_fn) +{ + int err = 0; + struct kbase_va_region *reg; + struct kbase_cpu_mapping *map; + unsigned long start; + size_t size; + struct tagged_addr *cpu_pa; + struct tagged_addr *gpu_pa; + u64 page_off, page_count; + u64 i; + u64 offset; + + kbase_os_mem_map_lock(kctx); + kbase_gpu_vm_lock(kctx); + + /* find the region where the virtual address is contained */ + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + sset->mem_handle.basep.handle); + if (kbase_is_region_invalid_or_free(reg)) { + dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX", + sset->mem_handle.basep.handle); + err = -EINVAL; + goto out_unlock; + } + + /* + * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The + * CPU mapping cacheability is defined by the owner of the imported + * memory, and not by kbase, therefore we must assume that any imported + * memory may be cached. + */ + if (kbase_mem_is_imported(reg->gpu_alloc->type)) { + err = kbase_mem_do_sync_imported(kctx, reg, sync_fn); + goto out_unlock; + } + + if (!(reg->flags & KBASE_REG_CPU_CACHED)) + goto out_unlock; + + start = (uintptr_t)sset->user_addr; + size = (size_t)sset->size; + + map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset); + if (!map) { + dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", + start, sset->mem_handle.basep.handle); + err = -EINVAL; + goto out_unlock; + } + + page_off = offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + cpu_pa = kbase_get_cpu_phy_pages(reg); + gpu_pa = kbase_get_gpu_phy_pages(reg); + + if (page_off > reg->nr_pages || + page_off + page_count > reg->nr_pages) { + /* Sync overflows the region */ + err = -EINVAL; + goto out_unlock; + } + + /* Sync first page */ + if (as_phys_addr_t(cpu_pa[page_off])) { + size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + + kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], + offset, sz, sync_fn); + } + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + /* we grow upwards, so bail on first non-present page */ + if (!as_phys_addr_t(cpu_pa[page_off + i])) + break; + + kbase_sync_single(kctx, cpu_pa[page_off + i], + gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); + } + + /* Sync last page (if any) */ + if (page_count > 1 && + as_phys_addr_t(cpu_pa[page_off + page_count - 1])) { + size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; + + kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], + gpu_pa[page_off + page_count - 1], 0, sz, + sync_fn); + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); + kbase_os_mem_map_unlock(kctx); + return err; +} + +int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset) +{ + int err = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(sset != NULL); + + if (sset->mem_handle.basep.handle & ~PAGE_MASK) { + dev_warn(kctx->kbdev->dev, + "mem_handle: passed parameter is invalid"); + return -EINVAL; + } + + switch (sset->type) { + case BASE_SYNCSET_OP_MSYNC: + err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE); + break; + + case BASE_SYNCSET_OP_CSYNC: + err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU); + break; + + default: + dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); + break; + } + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_sync_now); + +/* vm lock must be held */ +int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int err; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + __func__, (void *)reg, (void *)kctx); + lockdep_assert_held(&kctx->reg_lock); + + if (reg->flags & KBASE_REG_NO_USER_FREE) { + dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n"); + return -EINVAL; + } + + /* + * Unlink the physical allocation before unmaking it evictable so + * that the allocation isn't grown back to its last backed size + * as we're going to unmap it anyway. + */ + reg->cpu_alloc->reg = NULL; + if (reg->cpu_alloc != reg->gpu_alloc) + reg->gpu_alloc->reg = NULL; + + /* + * If a region has been made evictable then we must unmake it + * before trying to free it. + * If the memory hasn't been reclaimed it will be unmapped and freed + * below, if it has been reclaimed then the operations below are no-ops. + */ + if (reg->flags & KBASE_REG_DONT_NEED) { + KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == + KBASE_MEM_TYPE_NATIVE); + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + + err = kbase_gpu_munmap(kctx, reg); + if (err) { + dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n"); + goto out; + } + + /* This will also free the physical pages */ + kbase_free_alloced_region(reg); + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free_region); + +/** + * @brief Free the region from the GPU and unregister it. + * + * This function implements the free operation on a memory segment. + * It will loudly fail if called with outstanding mappings. + */ +int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) +{ + int err = 0; + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + __func__, gpu_addr, (void *)kctx); + + if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { + dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid"); + return -EINVAL; + } + + if (0 == gpu_addr) { + dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); + return -EINVAL; + } + kbase_gpu_vm_lock(kctx); + + if (gpu_addr >= BASE_MEM_COOKIE_BASE && + gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { + int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); + + reg = kctx->pending_regions[cookie]; + if (!reg) { + err = -EINVAL; + goto out_unlock; + } + + /* ask to unlink the cookie as we'll free it */ + + kctx->pending_regions[cookie] = NULL; + bitmap_set(kctx->cookies, cookie, 1); + + kbase_free_alloced_region(reg); + } else { + /* A real GPU va */ + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) { + dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", + gpu_addr); + err = -EINVAL; + goto out_unlock; + } + + if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + /* SAME_VA must be freed through munmap */ + dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, + gpu_addr); + err = -EINVAL; + goto out_unlock; + } + err = kbase_mem_free_region(kctx, reg); + } + + out_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free); + +int kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + + reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); + /* all memory is now growable */ + reg->flags |= KBASE_REG_GROWABLE; + + if (flags & BASE_MEM_GROW_ON_GPF) + reg->flags |= KBASE_REG_PF_GROW; + + if (flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + if (0 == (flags & BASE_MEM_PROT_GPU_EX)) + reg->flags |= KBASE_REG_GPU_NX; + + if (!kbase_device_is_cpu_coherent(kctx->kbdev)) { + if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED && + !(flags & BASE_MEM_UNCACHED_GPU)) + return -EINVAL; + } else if (flags & (BASE_MEM_COHERENT_SYSTEM | + BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { + reg->flags |= KBASE_REG_SHARE_BOTH; + } + + if (!(reg->flags & KBASE_REG_SHARE_BOTH) && + flags & BASE_MEM_COHERENT_LOCAL) { + reg->flags |= KBASE_REG_SHARE_IN; + } + + if (flags & BASE_MEM_TILER_ALIGN_TOP) + reg->flags |= KBASE_REG_TILER_ALIGN_TOP; + + + /* Set up default MEMATTR usage */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + if (kctx->kbdev->mmu_mode->flags & + KBASE_MMU_MODE_HAS_NON_CACHEABLE) { + /* Override shareability, and MEMATTR for uncached */ + reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + } else { + dev_warn(kctx->kbdev->dev, + "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); + return -EINVAL; + } + } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && + (reg->flags & KBASE_REG_SHARE_BOTH)) { + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + } else { + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + } + + if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING) + reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + + if (flags & BASEP_MEM_NO_USER_FREE) + reg->flags |= KBASE_REG_NO_USER_FREE; + + if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) + reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE; + + return 0; +} + +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested) +{ + int new_page_count __maybe_unused; + size_t nr_left = nr_pages_requested; + int res; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct tagged_addr *tp; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || + WARN_ON(alloc->imported.native.kctx == NULL) || + WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + + if (alloc->reg) { + if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) + goto invalid_request; + } + + kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + new_page_count = atomic_add_return( + nr_pages_requested, &kctx->used_pages); + atomic_add(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; + +#ifdef CONFIG_MALI_2MB_ALLOC + /* Check if we have enough pages requested so we can allocate a large + * page (512 * 4KB = 2MB ) + */ + if (nr_left >= (SZ_2M / SZ_4K)) { + int nr_lp = nr_left / (SZ_2M / SZ_4K); + + res = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.large[alloc->group_id], + nr_lp * (SZ_2M / SZ_4K), + tp, + true); + + if (res > 0) { + nr_left -= res; + tp += res; + } + + if (nr_left) { + struct kbase_sub_alloc *sa, *temp_sa; + + spin_lock(&kctx->mem_partials_lock); + + list_for_each_entry_safe(sa, temp_sa, + &kctx->mem_partials, link) { + int pidx = 0; + + while (nr_left) { + pidx = find_next_zero_bit(sa->sub_pages, + SZ_2M / SZ_4K, + pidx); + bitmap_set(sa->sub_pages, pidx, 1); + *tp++ = as_tagged_tag(page_to_phys(sa->page + + pidx), + FROM_PARTIAL); + nr_left--; + + if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) { + /* unlink from partial list when full */ + list_del_init(&sa->link); + break; + } + } + } + spin_unlock(&kctx->mem_partials_lock); + } + + /* only if we actually have a chunk left <512. If more it indicates + * that we couldn't allocate a 2MB above, so no point to retry here. + */ + if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + /* create a new partial and suballocate the rest from it */ + struct page *np = NULL; + + do { + int err; + + np = kbase_mem_pool_alloc( + &kctx->mem_pools.large[ + alloc->group_id]); + if (np) + break; + + err = kbase_mem_pool_grow( + &kctx->mem_pools.large[alloc->group_id], + 1); + if (err) + break; + } while (1); + + if (np) { + int i; + struct kbase_sub_alloc *sa; + struct page *p; + + sa = kmalloc(sizeof(*sa), GFP_KERNEL); + if (!sa) { + kbase_mem_pool_free( + &kctx->mem_pools.large[ + alloc->group_id], + np, + false); + goto no_new_partial; + } + + /* store pointers back to the control struct */ + np->lru.next = (void *)sa; + for (p = np; p < np + SZ_2M / SZ_4K; p++) + p->lru.prev = (void *)np; + INIT_LIST_HEAD(&sa->link); + bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + sa->page = np; + + for (i = 0; i < nr_left; i++) + *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL); + + bitmap_set(sa->sub_pages, 0, nr_left); + nr_left = 0; + + /* expose for later use */ + spin_lock(&kctx->mem_partials_lock); + list_add(&sa->link, &kctx->mem_partials); + spin_unlock(&kctx->mem_partials_lock); + } + } + } +no_new_partial: +#endif + + if (nr_left) { + res = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.small[alloc->group_id], + nr_left, tp, false); + if (res <= 0) + goto alloc_failed; + } + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + + alloc->nents += nr_pages_requested; +done: + return 0; + +alloc_failed: + /* rollback needed if got one or more 2MB but failed later */ + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + alloc->nents += nr_pages_to_free; + + kbase_process_page_usage_inc(kctx, nr_pages_to_free); + atomic_add(nr_pages_to_free, &kctx->used_pages); + atomic_add(nr_pages_to_free, + &kctx->kbdev->memdev.used_pages); + + kbase_free_phy_pages_helper(alloc, nr_pages_to_free); + } + + kbase_process_page_usage_dec(kctx, nr_pages_requested); + atomic_sub(nr_pages_requested, &kctx->used_pages); + atomic_sub(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + +invalid_request: + return -ENOMEM; +} + +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested, + struct kbase_sub_alloc **prealloc_sa) +{ + int new_page_count __maybe_unused; + size_t nr_left = nr_pages_requested; + int res; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct tagged_addr *tp; + struct tagged_addr *new_pages = NULL; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); + + lockdep_assert_held(&pool->pool_lock); + +#if !defined(CONFIG_MALI_2MB_ALLOC) + WARN_ON(pool->order); +#endif + + if (alloc->reg) { + if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents) + goto invalid_request; + } + + kctx = alloc->imported.native.kctx; + kbdev = kctx->kbdev; + + lockdep_assert_held(&kctx->mem_partials_lock); + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + new_page_count = atomic_add_return( + nr_pages_requested, &kctx->used_pages); + atomic_add(nr_pages_requested, + &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer + */ + kbase_process_page_usage_inc(kctx, nr_pages_requested); + + tp = alloc->pages + alloc->nents; + new_pages = tp; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + int nr_lp = nr_left / (SZ_2M / SZ_4K); + + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_lp * (SZ_2M / SZ_4K), + tp); + + if (res > 0) { + nr_left -= res; + tp += res; + } + + if (nr_left) { + struct kbase_sub_alloc *sa, *temp_sa; + + list_for_each_entry_safe(sa, temp_sa, + &kctx->mem_partials, link) { + int pidx = 0; + + while (nr_left) { + pidx = find_next_zero_bit(sa->sub_pages, + SZ_2M / SZ_4K, + pidx); + bitmap_set(sa->sub_pages, pidx, 1); + *tp++ = as_tagged_tag(page_to_phys( + sa->page + pidx), + FROM_PARTIAL); + nr_left--; + + if (bitmap_full(sa->sub_pages, + SZ_2M / SZ_4K)) { + /* unlink from partial list when + * full + */ + list_del_init(&sa->link); + break; + } + } + } + } + + /* only if we actually have a chunk left <512. If more it + * indicates that we couldn't allocate a 2MB above, so no point + * to retry here. + */ + if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) { + /* create a new partial and suballocate the rest from it + */ + struct page *np = NULL; + + np = kbase_mem_pool_alloc_locked(pool); + + if (np) { + int i; + struct kbase_sub_alloc *const sa = *prealloc_sa; + struct page *p; + + /* store pointers back to the control struct */ + np->lru.next = (void *)sa; + for (p = np; p < np + SZ_2M / SZ_4K; p++) + p->lru.prev = (void *)np; + INIT_LIST_HEAD(&sa->link); + bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K); + sa->page = np; + + for (i = 0; i < nr_left; i++) + *tp++ = as_tagged_tag( + page_to_phys(np + i), + FROM_PARTIAL); + + bitmap_set(sa->sub_pages, 0, nr_left); + nr_left = 0; + /* Indicate to user that we'll free this memory + * later. + */ + *prealloc_sa = NULL; + + /* expose for later use */ + list_add(&sa->link, &kctx->mem_partials); + } + } + if (nr_left) + goto alloc_failed; + } else { +#endif + res = kbase_mem_pool_alloc_pages_locked(pool, + nr_left, + tp); + if (res <= 0) + goto alloc_failed; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + + alloc->nents += nr_pages_requested; +done: + return new_pages; + +alloc_failed: + /* rollback needed if got one or more 2MB but failed later */ + if (nr_left != nr_pages_requested) { + size_t nr_pages_to_free = nr_pages_requested - nr_left; + + struct tagged_addr *start_free = alloc->pages + alloc->nents; + +#ifdef CONFIG_MALI_2MB_ALLOC + if (pool->order) { + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + kbase_mem_pool_free_pages_locked( + pool, 512, + start_free, + false, /* not dirty */ + true); /* return to pool */ + nr_pages_to_free -= 512; + start_free += 512; + } else if (is_partial(*start_free)) { + free_partial_locked(kctx, pool, + *start_free); + nr_pages_to_free--; + start_free++; + } + } + } else { +#endif + kbase_mem_pool_free_pages_locked(pool, + nr_pages_to_free, + start_free, + false, /* not dirty */ + true); /* return to pool */ +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + } + + kbase_process_page_usage_dec(kctx, nr_pages_requested); + atomic_sub(nr_pages_requested, &kctx->used_pages); + atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); + +invalid_request: + return NULL; +} + +static void free_partial(struct kbase_context *kctx, int group_id, struct + tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + + p = as_page(tp); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + spin_lock(&kctx->mem_partials_lock); + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + list_del(&sa->link); + kbase_mem_pool_free( + &kctx->mem_pools.large[group_id], + head_page, + true); + kfree(sa); + } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == + SZ_2M / SZ_4K - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } + spin_unlock(&kctx->mem_partials_lock); +} + +int kbase_free_phy_pages_helper( + struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_to_free) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + bool syncback; + bool reclaimed = (alloc->evicted != 0); + struct tagged_addr *start_free; + int new_page_count __maybe_unused; + size_t freed = 0; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) || + WARN_ON(alloc->imported.native.kctx == NULL) || + WARN_ON(alloc->nents < nr_pages_to_free) || + WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + + /* early out if nothing to do */ + if (0 == nr_pages_to_free) + return 0; + + start_free = alloc->pages + alloc->nents - nr_pages_to_free; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + /* pad start_free to a valid start location */ + while (nr_pages_to_free && is_huge(*start_free) && + !is_huge_head(*start_free)) { + nr_pages_to_free--; + start_free++; + } + + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + /* This is a 2MB entry, so free all the 512 pages that + * it points to + */ + kbase_mem_pool_free_pages( + &kctx->mem_pools.large[alloc->group_id], + 512, + start_free, + syncback, + reclaimed); + nr_pages_to_free -= 512; + start_free += 512; + freed += 512; + } else if (is_partial(*start_free)) { + free_partial(kctx, alloc->group_id, *start_free); + nr_pages_to_free--; + start_free++; + freed++; + } else { + struct tagged_addr *local_end_free; + + local_end_free = start_free; + while (nr_pages_to_free && + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { + local_end_free++; + nr_pages_to_free--; + } + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[alloc->group_id], + local_end_free - start_free, + start_free, + syncback, + reclaimed); + freed += local_end_free - start_free; + start_free += local_end_free - start_free; + } + } + + alloc->nents -= freed; + + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + kbase_process_page_usage_dec(kctx, freed); + new_page_count = atomic_sub_return(freed, + &kctx->used_pages); + atomic_sub(freed, + &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + } + + return 0; +} + +static void free_partial_locked(struct kbase_context *kctx, + struct kbase_mem_pool *pool, struct tagged_addr tp) +{ + struct page *p, *head_page; + struct kbase_sub_alloc *sa; + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + p = as_page(tp); + head_page = (struct page *)p->lru.prev; + sa = (struct kbase_sub_alloc *)head_page->lru.next; + clear_bit(p - head_page, sa->sub_pages); + if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) { + list_del(&sa->link); + kbase_mem_pool_free_locked(pool, head_page, true); + kfree(sa); + } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) == + SZ_2M / SZ_4K - 1) { + /* expose the partial again */ + list_add(&sa->link, &kctx->mem_partials); + } +} + +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + bool syncback; + bool reclaimed = (alloc->evicted != 0); + struct tagged_addr *start_free; + size_t freed = 0; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.native.kctx); + KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + + lockdep_assert_held(&pool->pool_lock); + lockdep_assert_held(&kctx->mem_partials_lock); + + /* early out if nothing to do */ + if (!nr_pages_to_free) + return; + + start_free = pages; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + /* pad start_free to a valid start location */ + while (nr_pages_to_free && is_huge(*start_free) && + !is_huge_head(*start_free)) { + nr_pages_to_free--; + start_free++; + } + + while (nr_pages_to_free) { + if (is_huge_head(*start_free)) { + /* This is a 2MB entry, so free all the 512 pages that + * it points to + */ + WARN_ON(!pool->order); + kbase_mem_pool_free_pages_locked(pool, + 512, + start_free, + syncback, + reclaimed); + nr_pages_to_free -= 512; + start_free += 512; + freed += 512; + } else if (is_partial(*start_free)) { + WARN_ON(!pool->order); + free_partial_locked(kctx, pool, *start_free); + nr_pages_to_free--; + start_free++; + freed++; + } else { + struct tagged_addr *local_end_free; + + WARN_ON(pool->order); + local_end_free = start_free; + while (nr_pages_to_free && + !is_huge(*local_end_free) && + !is_partial(*local_end_free)) { + local_end_free++; + nr_pages_to_free--; + } + kbase_mem_pool_free_pages_locked(pool, + local_end_free - start_free, + start_free, + syncback, + reclaimed); + freed += local_end_free - start_free; + start_free += local_end_free - start_free; + } + } + + alloc->nents -= freed; + + /* + * If the allocation was not evicted (i.e. evicted == 0) then + * the page accounting needs to be done. + */ + if (!reclaimed) { + int new_page_count; + + kbase_process_page_usage_dec(kctx, freed); + new_page_count = atomic_sub_return(freed, + &kctx->used_pages); + atomic_sub(freed, + &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); + } +} + + +void kbase_mem_kref_free(struct kref *kref) +{ + struct kbase_mem_phy_alloc *alloc; + + alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); + + switch (alloc->type) { + case KBASE_MEM_TYPE_NATIVE: { + + if (!WARN_ON(!alloc->imported.native.kctx)) { + if (alloc->permanent_map) + kbase_phy_alloc_mapping_term( + alloc->imported.native.kctx, + alloc); + + /* + * The physical allocation must have been removed from + * the eviction list before trying to free it. + */ + mutex_lock( + &alloc->imported.native.kctx->jit_evict_lock); + WARN_ON(!list_empty(&alloc->evict_node)); + mutex_unlock( + &alloc->imported.native.kctx->jit_evict_lock); + + kbase_process_page_usage_dec( + alloc->imported.native.kctx, + alloc->imported.native.nr_struct_pages); + } + kbase_free_phy_pages_helper(alloc, alloc->nents); + break; + } + case KBASE_MEM_TYPE_ALIAS: { + /* just call put on the underlying phy allocs */ + size_t i; + struct kbase_aliased *aliased; + + aliased = alloc->imported.alias.aliased; + if (aliased) { + for (i = 0; i < alloc->imported.alias.nents; i++) + if (aliased[i].alloc) + kbase_mem_phy_alloc_put(aliased[i].alloc); + vfree(aliased); + } + break; + } + case KBASE_MEM_TYPE_RAW: + /* raw pages, external cleanup */ + break; + case KBASE_MEM_TYPE_IMPORTED_UMM: + if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1, + "WARNING: expected excatly 1 mapping, got %d", + alloc->imported.umm.current_mapping_usage_count); + dma_buf_unmap_attachment( + alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, + DMA_BIDIRECTIONAL); + } + dma_buf_detach(alloc->imported.umm.dma_buf, + alloc->imported.umm.dma_attachment); + dma_buf_put(alloc->imported.umm.dma_buf); + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + if (alloc->imported.user_buf.mm) + mmdrop(alloc->imported.user_buf.mm); + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + vfree(alloc->imported.user_buf.pages); + else + kfree(alloc->imported.user_buf.pages); + break; + default: + WARN(1, "Unexecpted free of type %d\n", alloc->type); + break; + } + + /* Free based on allocation type */ + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + vfree(alloc); + else + kfree(alloc); +} + +KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(vsize > 0); + + /* validate user provided arguments */ + if (size > vsize || vsize > reg->nr_pages) + goto out_term; + + /* Prevent vsize*sizeof from wrapping around. + * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. + */ + if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) + goto out_term; + + KBASE_DEBUG_ASSERT(0 != vsize); + + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) + goto out_term; + + reg->cpu_alloc->reg = reg; + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) + goto out_rollback; + reg->gpu_alloc->reg = reg; + } + + return 0; + +out_rollback: + kbase_free_phy_pages_helper(reg->cpu_alloc, size); +out_term: + return -1; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); + +bool kbase_check_alloc_flags(unsigned long flags) +{ + /* Only known input flags should be set. */ + if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) + return false; + + /* At least one flag should be set */ + if (flags == 0) + return false; + + /* Either the GPU or CPU must be reading from the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) + return false; + + /* Either the GPU or CPU must be writing to the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* GPU executable memory cannot: + * - Be written by the GPU + * - Be grown on GPU page fault + * - Have the top of its initial commit aligned to 'extent' */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_TILER_ALIGN_TOP))) + return false; + + /* To have an allocation lie within a 4GB chunk is required only for + * TLS memory, which will never be used to contain executable code + * and also used for Tiler heap. + */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) + return false; + + /* GPU should have at least read or write access otherwise there is no + reason for allocating. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ + if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) + return false; + + /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported + * memory */ + if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == + BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return false; + + /* Should not combine BASE_MEM_COHERENT_LOCAL with + * BASE_MEM_COHERENT_SYSTEM */ + if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == + (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) + return false; + + return true; +} + +bool kbase_check_import_flags(unsigned long flags) +{ + /* Only known input flags should be set. */ + if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) + return false; + + /* At least one flag should be set */ + if (flags == 0) + return false; + + /* Imported memory cannot be GPU executable */ + if (flags & BASE_MEM_PROT_GPU_EX) + return false; + + /* Imported memory cannot grow on page fault */ + if (flags & BASE_MEM_GROW_ON_GPF) + return false; + + /* Imported memory cannot be aligned to the end of its initial commit */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) + return false; + + /* GPU should have at least read or write access otherwise there is no + reason for importing. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* Protected memory cannot be read by the CPU */ + if ((flags & BASE_MEM_PROTECTED) && (flags & BASE_MEM_PROT_CPU_RD)) + return false; + + return true; +} + +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + u64 va_pages, u64 commit_pages, u64 large_extent) +{ + struct device *dev = kctx->kbdev->dev; + int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; + struct kbase_va_region test_reg; + + /* kbase_va_region's extent member can be of variable size, so check against that type */ + test_reg.extent = large_extent; + +#define KBASE_MSG_PRE "GPU allocation attempted with " + + if (0 == va_pages) { + dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); + return -EINVAL; + } + + if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) { + dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!", + (unsigned long long)va_pages); + return -ENOMEM; + } + + /* Note: commit_pages is checked against va_pages during + * kbase_alloc_phy_pages() */ + + /* Limit GPU executable allocs to GPU PC size */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld", + (unsigned long long)va_pages, + (unsigned long long)gpu_pc_pages_max); + + return -EINVAL; + } + + if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && + test_reg.extent == 0) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); + return -EINVAL; + } + + if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && + test_reg.extent != 0) { + dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); + return -EINVAL; + } + + /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ + if (flags & BASE_MEM_TILER_ALIGN_TOP) { +#define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " + unsigned long small_extent; + + if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", + (unsigned long long)large_extent, + BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); + return -EINVAL; + } + /* For use with is_power_of_2, which takes unsigned long, so + * must ensure e.g. on 32-bit kernel it'll fit in that type */ + small_extent = (unsigned long)large_extent; + + if (!is_power_of_2(small_extent)) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", + small_extent); + return -EINVAL; + } + + if (commit_pages > large_extent) { + dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", + (unsigned long)commit_pages, + (unsigned long)large_extent); + return -EINVAL; + } +#undef KBASE_MSG_PRE_FLAG + } + + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && + (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { + dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space", + (unsigned long long)va_pages); + return -EINVAL; + } + + return 0; +#undef KBASE_MSG_PRE +} + +/** + * @brief Acquire the per-context region list lock + */ +void kbase_gpu_vm_lock(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); + +/** + * @brief Release the per-context region list lock + */ +void kbase_gpu_vm_unlock(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_unlock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); + +#ifdef CONFIG_DEBUG_FS +struct kbase_jit_debugfs_data { + int (*func)(struct kbase_jit_debugfs_data *); + struct mutex lock; + struct kbase_context *kctx; + u64 active_value; + u64 pool_value; + u64 destroy_value; + char buffer[50]; +}; + +static int kbase_jit_debugfs_common_open(struct inode *inode, + struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) +{ + struct kbase_jit_debugfs_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->func = func; + mutex_init(&data->lock); + data->kctx = (struct kbase_context *) inode->i_private; + + file->private_data = data; + + return nonseekable_open(inode, file); +} + +static ssize_t kbase_jit_debugfs_common_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + struct kbase_jit_debugfs_data *data; + size_t size; + int ret; + + data = (struct kbase_jit_debugfs_data *) file->private_data; + mutex_lock(&data->lock); + + if (*ppos) { + size = strnlen(data->buffer, sizeof(data->buffer)); + } else { + if (!data->func) { + ret = -EACCES; + goto out_unlock; + } + + if (data->func(data)) { + ret = -EACCES; + goto out_unlock; + } + + size = scnprintf(data->buffer, sizeof(data->buffer), + "%llu,%llu,%llu", data->active_value, + data->pool_value, data->destroy_value); + } + + ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); + +out_unlock: + mutex_unlock(&data->lock); + return ret; +} + +static int kbase_jit_debugfs_common_release(struct inode *inode, + struct file *file) +{ + kfree(file->private_data); + return 0; +} + +#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + return kbase_jit_debugfs_common_open(inode, file, __func); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = kbase_jit_debugfs_common_release, \ + .read = kbase_jit_debugfs_common_read, \ + .write = NULL, \ + .llseek = generic_file_llseek, \ +} + +static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct list_head *tmp; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each(tmp, &kctx->jit_active_head) { + data->active_value++; + } + + list_for_each(tmp, &kctx->jit_pool_head) { + data->pool_value++; + } + + list_for_each(tmp, &kctx->jit_destroy_head) { + data->destroy_value++; + } + mutex_unlock(&kctx->jit_evict_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, + kbase_jit_debugfs_count_get); + +static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->nr_pages; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->nr_pages; + } + mutex_unlock(&kctx->jit_evict_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, + kbase_jit_debugfs_vm_get); + +static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { + data->pool_value += reg->gpu_alloc->nents; + } + + list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { + data->destroy_value += reg->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_evict_lock); + + return 0; +} +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, + kbase_jit_debugfs_phys_get); + +#if MALI_JIT_PRESSURE_LIMIT +static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + data->active_value += reg->used_pages; + } + mutex_unlock(&kctx->jit_evict_lock); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops, + kbase_jit_debugfs_used_get); + +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink); + +static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) +{ + struct kbase_context *kctx = data->kctx; + struct kbase_va_region *reg; + + mutex_lock(&kctx->jctx.lock); + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + SIZE_MAX, &freed, false); + + if (err) { + /* Failed to calculate, try the next region */ + continue; + } + + data->active_value += freed; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + +KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, + kbase_jit_debugfs_trim_get); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +void kbase_jit_debugfs_init(struct kbase_context *kctx) +{ + /* prevent unprivileged use of debug file system + * in old kernel version + */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif + + /* Caller already ensures this, but we keep the pattern for + * maintenance safety. + */ + if (WARN_ON(!kctx) || + WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + + + /* Debugfs entry for getting the number of JIT allocations. */ + debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_count_fops); + + /* + * Debugfs entry for getting the total number of virtual pages + * used by JIT allocations. + */ + debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_vm_fops); + + /* + * Debugfs entry for getting the number of physical pages used + * by JIT allocations. + */ + debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_phys_fops); +#if MALI_JIT_PRESSURE_LIMIT + /* + * Debugfs entry for getting the number of pages used + * by JIT allocations for estimating the physical pressure + * limit. + */ + debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_used_fops); + + /* + * Debugfs entry for getting the number of pages that could + * be trimmed to free space for more JIT allocations. + */ + debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, + kctx, &kbase_jit_debugfs_trim_fops); +#endif /* MALI_JIT_PRESSURE_LIMIT */ +} +#endif /* CONFIG_DEBUG_FS */ + +/** + * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations + * @work: Work item + * + * This function does the work of freeing JIT allocations whose physical + * backing has been released. + */ +static void kbase_jit_destroy_worker(struct work_struct *work) +{ + struct kbase_context *kctx; + struct kbase_va_region *reg; + + kctx = container_of(work, struct kbase_context, jit_work); + do { + mutex_lock(&kctx->jit_evict_lock); + if (list_empty(&kctx->jit_destroy_head)) { + mutex_unlock(&kctx->jit_evict_lock); + break; + } + + reg = list_first_entry(&kctx->jit_destroy_head, + struct kbase_va_region, jit_node); + + list_del(®->jit_node); + mutex_unlock(&kctx->jit_evict_lock); + + kbase_gpu_vm_lock(kctx); + reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, reg); + kbase_gpu_vm_unlock(kctx); + } while (1); +} + +int kbase_jit_init(struct kbase_context *kctx) +{ + mutex_lock(&kctx->jit_evict_lock); + INIT_LIST_HEAD(&kctx->jit_active_head); + INIT_LIST_HEAD(&kctx->jit_pool_head); + INIT_LIST_HEAD(&kctx->jit_destroy_head); + INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); + + INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); + INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); + mutex_unlock(&kctx->jit_evict_lock); + + kctx->jit_max_allocations = 0; + kctx->jit_current_allocations = 0; + kctx->trim_level = 0; + + return 0; +} + +/* Check if the allocation from JIT pool is of the same size as the new JIT + * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets + * the alignment requirements. + */ +static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, + struct kbase_va_region *walker, const struct base_jit_alloc_info *info) +{ + bool meet_reqs = true; + + if (walker->nr_pages != info->va_pages) + meet_reqs = false; + else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { + size_t align = info->extent; + size_t align_mask = align - 1; + + if ((walker->start_pfn + info->commit_pages) & align_mask) + meet_reqs = false; + } + + return meet_reqs; +} + +#if MALI_JIT_PRESSURE_LIMIT +/* Function will guarantee *@freed will not exceed @pages_needed + */ +static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t pages_needed, + size_t *freed, bool shrink) +{ + int err = 0; + size_t available_pages = 0u; + const size_t old_pages = kbase_reg_current_backed_size(reg); + size_t new_pages = old_pages; + size_t to_free = 0u; + size_t max_allowed_pages = old_pages; + + lockdep_assert_held(&kctx->jctx.lock); + lockdep_assert_held(&kctx->reg_lock); + + /* Is this a JIT allocation that has been reported on? */ + if (reg->used_pages == reg->nr_pages) + goto out; + + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) { + /* For address based memory usage calculation, the GPU + * allocates objects of up to size 's', but aligns every object + * to alignment 'a', with a < s. + * + * It also doesn't have to write to all bytes in an object of + * size 's'. + * + * Hence, we can observe the GPU's address for the end of used + * memory being up to (s - a) bytes into the first unallocated + * page. + * + * We allow for this and only warn when it exceeds this bound + * (rounded up to page sized units). Note, this is allowed to + * exceed reg->nr_pages. + */ + max_allowed_pages += PFN_UP( + KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES - + KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* The GPU could report being ready to write to the next + * 'extent' sized chunk, but didn't actually write to it, so we + * can report up to 'extent' size pages more than the backed + * size. + * + * Note, this is allowed to exceed reg->nr_pages. + */ + max_allowed_pages += reg->extent; + + /* Also note that in these GPUs, the GPU may make a large (>1 + * page) initial allocation but not actually write out to all + * of it. Hence it might report that a much higher amount of + * memory was used than actually was written to. This does not + * result in a real warning because on growing this memory we + * round up the size of the allocation up to an 'extent' sized + * chunk, hence automatically bringing the backed size up to + * the reported size. + */ + } + + if (old_pages < reg->used_pages) { + /* Prevent overflow on available_pages, but only report the + * problem if it's in a scenario where used_pages should have + * been consistent with the backed size + * + * Note: In case of a size-based report, this legitimately + * happens in common use-cases: we allow for up to this size of + * memory being used, but depending on the content it doesn't + * have to use all of it. + * + * Hence, we're much more quiet about that in the size-based + * report case - it's not indicating a real problem, it's just + * for information + */ + if (max_allowed_pages < reg->used_pages) { + if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) + dev_warn(kctx->kbdev->dev, + "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + max_allowed_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + else + dev_dbg(kctx->kbdev->dev, + "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n", + __func__, + old_pages, reg->used_pages, + reg->start_pfn << PAGE_SHIFT, + reg->nr_pages); + } + /* In any case, no error condition to report here, caller can + * try other regions + */ + + goto out; + } + available_pages = old_pages - reg->used_pages; + to_free = min(available_pages, pages_needed); + + if (shrink) { + new_pages -= to_free; + + err = kbase_mem_shrink(kctx, reg, new_pages); + } +out: + trace_mali_jit_trim_from_region(reg, to_free, old_pages, + available_pages, new_pages); + *freed = to_free; + return err; +} + + +/** + * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been + * freed + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked. + * @pages_needed: The maximum number of pages to trim. + * + * This functions checks all active JIT allocations in @kctx for unused pages + * at the end, and trim the backed memory regions of those allocations down to + * the used portion and free the unused pages into the page pool. + * + * Specifying @pages_needed allows us to stop early when there's enough + * physical memory freed to sufficiently bring down the total JIT physical page + * usage (e.g. to below the pressure limit) + * + * Return: Total number of successfully freed pages + */ +static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, + size_t pages_needed) +{ + struct kbase_va_region *reg, *tmp; + size_t total_freed = 0; + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { + int err; + size_t freed = 0u; + + err = kbase_mem_jit_trim_pages_from_region(kctx, reg, + pages_needed, &freed, true); + + if (err) { + /* Failed to trim, try the next region */ + continue; + } + + total_freed += freed; + WARN_ON(freed > pages_needed); + pages_needed -= freed; + if (!pages_needed) + break; + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + trace_mali_jit_trim(total_freed); + + return total_freed; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +static int kbase_jit_grow(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + struct kbase_va_region *reg) +{ + size_t delta; + size_t pages_required; + size_t old_size; + struct kbase_mem_pool *pool; + int ret = -ENOMEM; + struct tagged_addr *gpu_pages; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; + + if (info->commit_pages > reg->nr_pages) { + /* Attempted to grow larger than maximum size */ + return -EINVAL; + } + + kbase_gpu_vm_lock(kctx); + + /* Make the physical backing no longer reclaimable */ + if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) + goto update_failed; + + if (reg->gpu_alloc->nents >= info->commit_pages) + goto done; + + /* Grow the backing */ + old_size = reg->gpu_alloc->nents; + + /* Allocate some more pages */ + delta = info->commit_pages - reg->gpu_alloc->nents; + pages_required = delta; + +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), + GFP_KERNEL); + if (!prealloc_sas[i]) + goto update_failed; + } + + if (pages_required >= (SZ_2M / SZ_4K)) { + pool = &kctx->mem_pools.large[kctx->jit_group_id]; + /* Round up to number of 2 MB pages required */ + pages_required += ((SZ_2M / SZ_4K) - 1); + pages_required /= (SZ_2M / SZ_4K); + } else { +#endif + pool = &kctx->mem_pools.small[kctx->jit_group_id]; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (reg->cpu_alloc != reg->gpu_alloc) + pages_required *= 2; + + spin_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + + /* As we can not allocate memory from the kernel with the vm_lock held, + * grow the pool to the required size with the lock dropped. We hold the + * pool lock to prevent another thread from allocating from the pool + * between the grow and allocation. + */ + while (kbase_mem_pool_size(pool) < pages_required) { + int pool_delta = pages_required - kbase_mem_pool_size(pool); + + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + kbase_gpu_vm_unlock(kctx); + + if (kbase_mem_pool_grow(pool, pool_delta)) + goto update_failed_unlocked; + + kbase_gpu_vm_lock(kctx); + spin_lock(&kctx->mem_partials_lock); + kbase_mem_pool_lock(pool); + } + + gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool, + delta, &prealloc_sas[0]); + if (!gpu_pages) { + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + + if (reg->cpu_alloc != reg->gpu_alloc) { + struct tagged_addr *cpu_pages; + + cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc, + pool, delta, &prealloc_sas[1]); + if (!cpu_pages) { + kbase_free_phy_pages_helper_locked(reg->gpu_alloc, + pool, gpu_pages, delta); + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + goto update_failed; + } + } + kbase_mem_pool_unlock(pool); + spin_unlock(&kctx->mem_partials_lock); + + ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages, + old_size); + /* + * The grow failed so put the allocation back in the + * pool and return failure. + */ + if (ret) + goto update_failed; + +done: + ret = 0; + + /* Update attributes of JIT allocation taken from the pool */ + reg->initial_commit = info->commit_pages; + reg->extent = info->extent; + +update_failed: + kbase_gpu_vm_unlock(kctx); +update_failed_unlocked: + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); + + return ret; +} + +static void trace_jit_stats(struct kbase_context *kctx, + u32 bin_id, u32 max_allocations) +{ + const u32 alloc_count = + kctx->jit_current_allocations_per_bin[bin_id]; + struct kbase_device *kbdev = kctx->kbdev; + + struct kbase_va_region *walker; + u32 va_pages = 0; + u32 ph_pages = 0; + + mutex_lock(&kctx->jit_evict_lock); + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + if (walker->jit_bin_id != bin_id) + continue; + + va_pages += walker->nr_pages; + ph_pages += walker->gpu_alloc->nents; + } + mutex_unlock(&kctx->jit_evict_lock); + + KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id, + max_allocations, alloc_count, va_pages, ph_pages); +} + +#if MALI_JIT_PRESSURE_LIMIT +/** + * get_jit_backed_pressure() - calculate the physical backing of all JIT + * allocations + * + * @kctx: Pointer to the kbase context whose active JIT allocations will be + * checked + * + * Return: number of pages that are committed by JIT allocations + */ +static size_t get_jit_backed_pressure(struct kbase_context *kctx) +{ + size_t backed_pressure = 0; + int jit_id; + + lockdep_assert_held(&kctx->jctx.lock); + + kbase_gpu_vm_lock(kctx); + for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) { + struct kbase_va_region *reg = kctx->jit_alloc[jit_id]; + + if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) { + /* If region has no report, be pessimistic */ + if (reg->used_pages == reg->nr_pages) { + backed_pressure += reg->nr_pages; + } else { + backed_pressure += + kbase_reg_current_backed_size(reg); + } + } + } + kbase_gpu_vm_unlock(kctx); + + return backed_pressure; +} + +/** + * jit_trim_necessary_pages() - calculate and trim the least pages possible to + * satisfy a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @info: Pointer to JIT allocation information for the new allocation + * + * Before allocating a new just-in-time memory region or reusing a previous + * one, ensure that the total JIT physical page usage also will not exceed the + * pressure limit. + * + * If there are no reported-on allocations, then we already guarantee this will + * be the case - because our current pressure then only comes from the va_pages + * of each JIT region, hence JIT physical page usage is guaranteed to be + * bounded by this. + * + * However as soon as JIT allocations become "reported on", the pressure is + * lowered to allow new JIT regions to be allocated. It is after such a point + * that the total JIT physical page usage could (either now or in the future on + * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly + * allocated JIT regions. Hence, trim any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + */ +static void jit_trim_necessary_pages(struct kbase_context *kctx, + const struct base_jit_alloc_info *info) +{ + size_t backed_pressure = 0; + size_t needed_pages = 0; + + backed_pressure = get_jit_backed_pressure(kctx); + + /* It is possible that this is the case - if this is the first + * allocation after "ignore_pressure_limit" allocation. + */ + if (backed_pressure > kctx->jit_phys_pages_limit) { + needed_pages += + (backed_pressure - kctx->jit_phys_pages_limit) + + info->va_pages; + } else { + size_t backed_diff = + kctx->jit_phys_pages_limit - backed_pressure; + + if (info->va_pages > backed_diff) + needed_pages += info->va_pages - backed_diff; + } + + if (needed_pages) { + size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, + needed_pages); + + /* This should never happen - we already asserted that + * we are not violating JIT pressure limit in earlier + * checks, which means that in-flight JIT allocations + * must have enough unused pages to satisfy the new + * allocation + */ + WARN_ON(trimmed_pages < needed_pages); + } +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * jit_allow_allocate() - check whether basic conditions are satisfied to allow + * a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @info: Pointer to JIT allocation information for the new allocation + * @ignore_pressure_limit: Flag to indicate whether JIT pressure limit check + * should be ignored + * + * Return: true if allocation can be executed, false otherwise + */ +static bool jit_allow_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) +{ + lockdep_assert_held(&kctx->jctx.lock); + +#if MALI_JIT_PRESSURE_LIMIT + if (likely(!ignore_pressure_limit) && + ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || + (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { + dev_dbg(kctx->kbdev->dev, + "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n", + kctx->jit_current_phys_pressure + info->va_pages, + kctx->jit_phys_pages_limit); + return false; + } +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { + /* Too many current allocations */ + dev_dbg(kctx->kbdev->dev, + "Max JIT allocations limit reached: active allocations %d, max allocations %d\n", + kctx->jit_current_allocations, + kctx->jit_max_allocations); + return false; + } + + if (info->max_allocations > 0 && + kctx->jit_current_allocations_per_bin[info->bin_id] >= + info->max_allocations) { + /* Too many current allocations in this bin */ + dev_dbg(kctx->kbdev->dev, + "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n", + info->bin_id, + kctx->jit_current_allocations_per_bin[info->bin_id], + info->max_allocations); + return false; + } + + return true; +} + +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->jctx.lock); + + if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) + return NULL; + +#if MALI_JIT_PRESSURE_LIMIT + if (!ignore_pressure_limit) + jit_trim_necessary_pages(kctx, info); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + mutex_lock(&kctx->jit_evict_lock); + + /* + * Scan the pool for an existing allocation which meets our + * requirements and remove it. + */ + if (info->usage_id != 0) { + /* First scan for an allocation with the same usage ID */ + struct kbase_va_region *walker; + size_t current_diff = SIZE_MAX; + + list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { + + if (walker->jit_usage_id == info->usage_id && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements( + kctx, walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * met, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match */ + if (current_diff == 0) + break; + } + } + } + + if (!reg) { + /* No allocation with the same usage ID, or usage IDs not in + * use. Search for an allocation we can reuse. + */ + struct kbase_va_region *walker; + size_t current_diff = SIZE_MAX; + + list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { + + if (walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements( + kctx, walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been + * met, it's suitable but other allocations + * might be a better fit. + */ + min_size = min_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, + walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + reg = walker; + } + + /* The allocation is an exact match, so stop + * looking. + */ + if (current_diff == 0) + break; + } + } + } + + if (reg) { + /* + * Remove the found region from the pool and add it to the + * active list. + */ + list_move(®->jit_node, &kctx->jit_active_head); + + /* + * Remove the allocation from the eviction list as it's no + * longer eligible for eviction. This must be done before + * dropping the jit_evict_lock + */ + list_del_init(®->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + + if (kbase_jit_grow(kctx, info, reg) < 0) { + /* + * An update to an allocation from the pool failed, + * chances are slim a new allocation would fair any + * better so return the allocation to the pool and + * return the function with failure. + */ + dev_dbg(kctx->kbdev->dev, + "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); + mutex_lock(&kctx->jit_evict_lock); + list_move(®->jit_node, &kctx->jit_pool_head); + mutex_unlock(&kctx->jit_evict_lock); + return NULL; + } + } else { + /* No suitable JIT allocation was found so create a new one */ + u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | + BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | + BASE_MEM_COHERENT_LOCAL | + BASEP_MEM_NO_USER_FREE; + u64 gpu_addr; + + mutex_unlock(&kctx->jit_evict_lock); + + if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) + flags |= BASE_MEM_TILER_ALIGN_TOP; + + flags |= base_mem_group_id_set(kctx->jit_group_id); + + reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, + info->extent, &flags, &gpu_addr); + if (!reg) { + /* Most likely not enough GPU virtual space left for + * the new JIT allocation. + */ + dev_dbg(kctx->kbdev->dev, + "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", + info->va_pages, info->commit_pages); + return NULL; + } + + mutex_lock(&kctx->jit_evict_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } + + trace_mali_jit_alloc(reg, info->id); + + kctx->jit_current_allocations++; + kctx->jit_current_allocations_per_bin[info->bin_id]++; + + trace_jit_stats(kctx, info->bin_id, info->max_allocations); + + reg->jit_usage_id = info->usage_id; + reg->jit_bin_id = info->bin_id; +#if MALI_JIT_PRESSURE_LIMIT + if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; + reg->heap_info_gpu_addr = info->heap_info_gpu_addr; + kbase_jit_report_update_pressure(kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + return reg; +} + +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + u64 old_pages; + + /* JIT id not immediately available here, so use 0u */ + trace_mali_jit_free(reg, 0u); + + /* Get current size of JIT region */ + old_pages = kbase_reg_current_backed_size(reg); + if (reg->initial_commit < old_pages) { + /* Free trim_level % of region, but don't go below initial + * commit size + */ + u64 new_size = MAX(reg->initial_commit, + div_u64(old_pages * (100 - kctx->trim_level), 100)); + u64 delta = old_pages - new_size; + + if (delta) + kbase_mem_shrink(kctx, reg, old_pages - delta); + } + +#if MALI_JIT_PRESSURE_LIMIT + reg->heap_info_gpu_addr = 0; + kbase_jit_report_update_pressure(kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + kctx->jit_current_allocations--; + kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; + + trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX); + + kbase_mem_evictable_mark_reclaim(reg->gpu_alloc); + + kbase_gpu_vm_lock(kctx); + reg->flags |= KBASE_REG_DONT_NEED; + kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); + kbase_gpu_vm_unlock(kctx); + + /* + * Add the allocation to the eviction list and the jit pool, after this + * point the shrink can reclaim it, or it may be reused. + */ + mutex_lock(&kctx->jit_evict_lock); + + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(®->gpu_alloc->evict_node)); + list_add(®->gpu_alloc->evict_node, &kctx->evict_list); + + list_move(®->jit_node, &kctx->jit_pool_head); + + mutex_unlock(&kctx->jit_evict_lock); +} + +void kbase_jit_backing_lost(struct kbase_va_region *reg) +{ + struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + + if (WARN_ON(!kctx)) + return; + + lockdep_assert_held(&kctx->jit_evict_lock); + + /* + * JIT allocations will always be on a list, if the region + * is not on a list then it's not a JIT allocation. + */ + if (list_empty(®->jit_node)) + return; + + /* + * Freeing the allocation requires locks we might not be able + * to take now, so move the allocation to the free list and kick + * the worker which will do the freeing. + */ + list_move(®->jit_node, &kctx->jit_destroy_head); + + schedule_work(&kctx->jit_work); +} + +bool kbase_jit_evict(struct kbase_context *kctx) +{ + struct kbase_va_region *reg = NULL; + + lockdep_assert_held(&kctx->reg_lock); + + /* Free the oldest allocation from the pool */ + mutex_lock(&kctx->jit_evict_lock); + if (!list_empty(&kctx->jit_pool_head)) { + reg = list_entry(kctx->jit_pool_head.prev, + struct kbase_va_region, jit_node); + list_del(®->jit_node); + list_del_init(®->gpu_alloc->evict_node); + } + mutex_unlock(&kctx->jit_evict_lock); + + if (reg) { + reg->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, reg); + } + + return (reg != NULL); +} + +void kbase_jit_term(struct kbase_context *kctx) +{ + struct kbase_va_region *walker; + + /* Free all allocations for this context */ + + kbase_gpu_vm_lock(kctx); + mutex_lock(&kctx->jit_evict_lock); + /* Free all allocations from the pool */ + while (!list_empty(&kctx->jit_pool_head)) { + walker = list_first_entry(&kctx->jit_pool_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + list_del_init(&walker->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + walker->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, walker); + mutex_lock(&kctx->jit_evict_lock); + } + + /* Free all allocations from active list */ + while (!list_empty(&kctx->jit_active_head)) { + walker = list_first_entry(&kctx->jit_active_head, + struct kbase_va_region, jit_node); + list_del(&walker->jit_node); + list_del_init(&walker->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + walker->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, walker); + mutex_lock(&kctx->jit_evict_lock); + } + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); + + /* + * Flush the freeing of allocations whose backing has been freed + * (i.e. everything in jit_destroy_head). + */ + cancel_work_sync(&kctx->jit_work); +} + +#if MALI_JIT_PRESSURE_LIMIT +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags) +{ + /* Offset to the location used for a JIT report within the GPU memory + * + * This constants only used for this debugging function - not useful + * anywhere else in kbase + */ + const u64 jit_report_gpu_mem_offset = sizeof(u64)*2; + + u64 addr_start; + struct kbase_vmap_struct mapping; + u64 *ptr; + + if (reg->heap_info_gpu_addr == 0ull) + goto out; + + /* Nothing else to trace in the case the memory just contains the + * size. Other tracepoints already record the relevant area of memory. + */ + if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE) + goto out; + + addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset; + + ptr = kbase_vmap(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE, + &mapping); + if (!ptr) { + dev_warn(kctx->kbdev->dev, + "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n", + __func__, reg->start_pfn << PAGE_SHIFT, + addr_start); + goto out; + } + + trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT, + ptr, flags); + + kbase_vunmap(kctx, &mapping); +out: + return; +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags) +{ + u64 diff; + + lockdep_assert_held(&kctx->jctx.lock); + + trace_mali_jit_report_pressure(reg, new_used_pages, + kctx->jit_current_phys_pressure + new_used_pages - + reg->used_pages, + flags); + + if (WARN_ON(new_used_pages > reg->nr_pages)) + return; + + if (reg->used_pages > new_used_pages) { + /* We reduced the number of used pages */ + diff = reg->used_pages - new_used_pages; + + if (!WARN_ON(diff > kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure -= diff; + + reg->used_pages = new_used_pages; + } else { + /* We increased the number of used pages */ + diff = new_used_pages - reg->used_pages; + + if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure)) + kctx->jit_current_phys_pressure += diff; + + reg->used_pages = new_used_pages; + } + +} +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = (kctx->exec_va_start != U64_MAX); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} + + +int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + struct page **pages = alloc->imported.user_buf.pages; + unsigned long address = alloc->imported.user_buf.address; + struct mm_struct *mm = alloc->imported.user_buf.mm; + long pinned_pages; + long i; + + if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF)) + return -EINVAL; + + if (alloc->nents) { + if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages)) + return -EINVAL; + else + return 0; + } + + if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) + return -EINVAL; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + pinned_pages = get_user_pages(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, +#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ +KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else + pinned_pages = get_user_pages_remote(NULL, mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL, NULL); +#endif + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + alloc->nents = pinned_pages; + + return 0; +} + +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + struct tagged_addr *pa; + long i; + unsigned long address; + struct device *dev; + unsigned long offset; + unsigned long local_size; + unsigned long gwt_mask = ~0; + int err = kbase_jd_user_buf_pin_pages(kctx, reg); + + if (err) + return err; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + pinned_pages = alloc->nents; + pages = alloc->imported.user_buf.pages; + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + alloc->imported.user_buf.dma_addrs[i] = dma_addr; + pa[i] = as_tagged(page_to_phys(pages[i])); + + local_size -= min; + offset = 0; + } + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + pa, kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id); + if (err == 0) + return 0; + + /* fall down */ +unwind: + alloc->nents = 0; + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + + while (++i < pinned_pages) { + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +/* This function would also perform the work of unpinning pages on Job Manager + * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT + * have a corresponding call to kbase_jd_user_buf_unpin_pages(). + */ +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset) +{ + void *target_page = kmap(dest_pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; + + if (!target_page) { + pr_err("%s: kmap failure", __func__); + return -ENOMEM; + } + + chunk = min(chunk, *to_copy); + + memcpy(target_page + offset, src_page, chunk); + *to_copy -= chunk; + + kunmap(dest_pages[*target_page_nr]); + + *target_page_nr += 1; + if (*target_page_nr >= nr_pages || *to_copy == 0) + return 0; + + target_page = kmap(dest_pages[*target_page_nr]); + if (!target_page) { + pr_err("%s: kmap failure", __func__); + return -ENOMEM; + } + + KBASE_DEBUG_ASSERT(target_page); + + chunk = min(offset, *to_copy); + memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); + *to_copy -= chunk; + + kunmap(dest_pages[*target_page_nr]); + + return 0; +} + +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm) +{ + int err; + + lockdep_assert_held(&kctx->reg_lock); + + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) && + (!reg->gpu_alloc->nents)) + goto exit; + + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + err = kbase_jd_user_buf_map(kctx, reg); + if (err) { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto exit; + } + } + } + break; + case KBASE_MEM_TYPE_IMPORTED_UMM: { + err = kbase_mem_umm_map(kctx, reg); + if (err) + goto exit; + break; + } + default: + goto exit; + } + + return kbase_mem_phy_alloc_get(reg->gpu_alloc); +exit: + return NULL; +} + +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + switch (alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_UMM: { + kbase_mem_umm_unmap(kctx, reg, alloc); + } + break; + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + bool writeable = true; + + if (!kbase_is_region_invalid_or_free(reg) && + reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + kbase_reg_current_backed_size(reg), + kctx->as_nr); + + if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) + writeable = false; + + kbase_jd_user_buf_unmap(kctx, alloc, writeable); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(alloc); +} + +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *meta = NULL; + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context external resource metadata list for the + * metadata which matches the region which is being acquired. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { + if (walker->gpu_addr == gpu_addr) { + meta = walker; + meta->ref++; + break; + } + } + + /* No metadata exists so create one. */ + if (!meta) { + struct kbase_va_region *reg; + + /* Find the region */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto failed; + + /* Allocate the metadata object */ + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto failed; + + /* + * Fill in the metadata object and acquire a reference + * for the physical resource. + */ + meta->alloc = kbase_map_external_resource(kctx, reg, NULL); + meta->ref = 1; + + if (!meta->alloc) + goto fail_map; + + meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; + + list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); + } + + return meta; + +fail_map: + kfree(meta); +failed: + return NULL; +} + +static struct kbase_ctx_ext_res_meta * +find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Walk the per context external resource metadata list for the + * metadata which matches the region which is being released. + */ + list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) + if (walker->gpu_addr == gpu_addr) + return walker; + + return NULL; +} + +static void release_sticky_resource_meta(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta) +{ + struct kbase_va_region *reg; + + /* Drop the physical memory reference and free the metadata. */ + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, + meta->gpu_addr); + + kbase_unmap_external_resource(kctx, reg, meta->alloc); + list_del(&meta->ext_res_node); + kfree(meta); +} + +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) + meta = find_sticky_resource_meta(kctx, gpu_addr); + + /* No metadata so just return. */ + if (!meta) + return false; + + if (--meta->ref != 0) + return true; + + release_sticky_resource_meta(kctx, meta); + + return true; +} + +bool kbase_sticky_resource_release_force(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) +{ + lockdep_assert_held(&kctx->reg_lock); + + /* Search of the metadata if one isn't provided. */ + if (!meta) + meta = find_sticky_resource_meta(kctx, gpu_addr); + + /* No metadata so just return. */ + if (!meta) + return false; + + release_sticky_resource_meta(kctx, meta); + + return true; +} + +int kbase_sticky_resource_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->ext_res_meta_head); + + return 0; +} + +void kbase_sticky_resource_term(struct kbase_context *kctx) +{ + struct kbase_ctx_ext_res_meta *walker; + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Free any sticky resources which haven't been unmapped. + * + * Note: + * We don't care about refcounts at this point as no future + * references to the meta data will be made. + * Region termination would find these if we didn't free them + * here, but it's more efficient if we do the clean up here. + */ + while (!list_empty(&kctx->ext_res_meta_head)) { + walker = list_first_entry(&kctx->ext_res_meta_head, + struct kbase_ctx_ext_res_meta, ext_res_node); + + kbase_sticky_resource_release_force(kctx, walker, 0); + } +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h new file mode 100644 index 0000000..6e921ec --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -0,0 +1,1771 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem.h + * Base kernel memory APIs + */ + +#ifndef _KBASE_MEM_H_ +#define _KBASE_MEM_H_ + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include +#include "mali_base_kernel.h" +#include +#include "mali_kbase_pm.h" +#include "mali_kbase_defs.h" +/* Required for kbase_mem_evictable_unmake */ +#include "mali_kbase_mem_linux.h" + +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, + int pages); + +/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + +/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. +The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and +page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table +updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ + +/* This must always be a power of 2 */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/** + * A CPU mapping + */ +struct kbase_cpu_mapping { + struct list_head mappings_list; + struct kbase_mem_phy_alloc *alloc; + struct kbase_context *kctx; + struct kbase_va_region *region; + int count; + int free_on_close; +}; + +enum kbase_memory_type { + KBASE_MEM_TYPE_NATIVE, + KBASE_MEM_TYPE_IMPORTED_UMM, + KBASE_MEM_TYPE_IMPORTED_USER_BUF, + KBASE_MEM_TYPE_ALIAS, + KBASE_MEM_TYPE_RAW +}; + +/* internal structure, mirroring base_mem_aliasing_info, + * but with alloc instead of a gpu va (handle) */ +struct kbase_aliased { + struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ + u64 offset; /* in pages */ + u64 length; /* in pages */ +}; + +/** + * @brief Physical pages tracking object properties + */ +#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) +#define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) + +/* struct kbase_mem_phy_alloc - Physical pages tracking object. + * + * Set up to track N pages. + * N not stored here, the creator holds that info. + * This object only tracks how many elements are actually valid (present). + * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc + * is not shared with another region or client. CPU mappings are OK to + * exist when changing, as long as the tracked mappings objects are + * updated as part of the change. + * + * @kref: number of users of this alloc + * @gpu_mappings: count number of times mapped on the GPU + * @nents: 0..N + * @pages: N elements, only 0..nents are valid + * @mappings: List of CPU mappings of this physical memory allocation. + * @evict_node: Node used to store this allocation on the eviction list + * @evicted: Physical backing size when the pages where evicted + * @reg: Back reference to the region structure which created this + * allocation, or NULL if it has been freed. + * @type: type of buffer + * @permanent_map: Kernel side mapping of the alloc, shall never be + * referred directly. kbase_phy_alloc_mapping_get() & + * kbase_phy_alloc_mapping_put() pair should be used + * around access to the kernel-side CPU mapping so that + * mapping doesn't disappear whilst it is being accessed. + * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE. + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @imported: member in union valid based on @a type + */ +struct kbase_mem_phy_alloc { + struct kref kref; + atomic_t gpu_mappings; + size_t nents; + struct tagged_addr *pages; + struct list_head mappings; + struct list_head evict_node; + size_t evicted; + struct kbase_va_region *reg; + enum kbase_memory_type type; + struct kbase_vmap_struct *permanent_map; + u8 properties; + u8 group_id; + + union { + struct { + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + unsigned int current_mapping_usage_count; + struct sg_table *sgt; + bool need_sync; + } umm; + struct { + u64 stride; + size_t nents; + struct kbase_aliased *aliased; + } alias; + struct { + struct kbase_context *kctx; + /* Number of pages in this structure, including *pages. + * Used for kernel memory tracking. + */ + size_t nr_struct_pages; + } native; + struct kbase_alloc_import_user_buf { + unsigned long address; + unsigned long size; + unsigned long nr_pages; + struct page **pages; + /* top bit (1<<31) of current_mapping_usage_count + * specifies that this import was pinned on import + * See PINNED_ON_IMPORT + */ + u32 current_mapping_usage_count; + struct mm_struct *mm; + dma_addr_t *dma_addrs; + } user_buf; + } imported; +}; + +/* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is + * used to signify that a buffer was pinned when it was imported. Since the + * reference count is limited by the number of atoms that can be submitted at + * once there should be no danger of overflowing into this bit. + * Stealing the top bit also has the benefit that + * current_mapping_usage_count != 0 if and only if the buffer is mapped. + */ +#define PINNED_ON_IMPORT (1<<31) + +/** + * enum kbase_jit_report_flags - Flags for just-in-time memory allocation + * pressure limit functions + * @KBASE_JIT_REPORT_ON_ALLOC_OR_FREE: Notifying about an update happening due + * to a just-in-time memory allocation or free + * + * Used to control flow within pressure limit related functions, or to provide + * extra debugging information + */ +enum kbase_jit_report_flags { + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0) +}; + +static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + atomic_inc(&alloc->gpu_mappings); +} + +static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + if (0 > atomic_dec_return(&alloc->gpu_mappings)) { + pr_err("Mismatched %s:\n", __func__); + dump_stack(); + } +} + +/** + * kbase_mem_is_imported - Indicate whether a memory type is imported + * + * @type: the memory type + * + * Return: true if the memory type is imported, false otherwise + */ +static inline bool kbase_mem_is_imported(enum kbase_memory_type type) +{ + return (type == KBASE_MEM_TYPE_IMPORTED_UMM) || + (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); +} + +void kbase_mem_kref_free(struct kref *kref); + +int kbase_mem_init(struct kbase_device *kbdev); +void kbase_mem_halt(struct kbase_device *kbdev); +void kbase_mem_term(struct kbase_device *kbdev); + +static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) +{ + kref_get(&alloc->kref); + return alloc; +} + +static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) +{ + kref_put(&alloc->kref, kbase_mem_kref_free); + return NULL; +} + +/** + * A GPU memory region, and attributes for CPU mappings. + * + * @rblink: Node in a red-black tree of memory regions within the same zone of + * the GPU's virtual address space. + * @link: Links to neighboring items in a list of growable memory regions + * that triggered incremental rendering by growing too much. + * @rbtree: Backlink to the red-black tree of memory regions. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * @initial_commit: Initial commit, for aligning the start address and + * correctly growing KBASE_REG_TILER_ALIGN_TOP regions. + * @threshold_pages: If non-zero and the amount of memory committed to a region + * that can grow on page fault exceeds this number of pages + * then the driver switches to incremental rendering. + * @extent: Number of pages allocated on page fault. + * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. + * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. + * @jit_node: Links to neighboring regions in the just-in-time memory pool. + * @jit_usage_id: The last just-in-time memory usage ID for this region. + * @jit_bin_id: The just-in-time memory bin this region came from. + * @va_refcnt: Number of users of this region. Protected by reg_lock. + */ +struct kbase_va_region { + struct rb_node rblink; + struct list_head link; + struct rb_root *rbtree; + u64 start_pfn; + size_t nr_pages; + size_t initial_commit; + size_t threshold_pages; + +/* Free region */ +#define KBASE_REG_FREE (1ul << 0) +/* CPU write access */ +#define KBASE_REG_CPU_WR (1ul << 1) +/* GPU write access */ +#define KBASE_REG_GPU_WR (1ul << 2) +/* No eXecute flag */ +#define KBASE_REG_GPU_NX (1ul << 3) +/* Is CPU cached? */ +#define KBASE_REG_CPU_CACHED (1ul << 4) +/* Is GPU cached? + * Some components within the GPU might only be able to access memory that is + * GPU cacheable. Refer to the specific GPU implementation for more details. + */ +#define KBASE_REG_GPU_CACHED (1ul << 5) + +#define KBASE_REG_GROWABLE (1ul << 6) +/* Can grow on pf? */ +#define KBASE_REG_PF_GROW (1ul << 7) + +/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ +#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) + +/* inner shareable coherency */ +#define KBASE_REG_SHARE_IN (1ul << 9) +/* inner & outer shareable coherency */ +#define KBASE_REG_SHARE_BOTH (1ul << 10) + +/* Space for 4 different zones */ +#define KBASE_REG_ZONE_MASK (3ul << 11) +#define KBASE_REG_ZONE(x) (((x) & 3) << 11) + +/* GPU read access */ +#define KBASE_REG_GPU_RD (1ul<<13) +/* CPU read access */ +#define KBASE_REG_CPU_RD (1ul<<14) + +/* Index of chosen MEMATTR for this region (0..7) */ +#define KBASE_REG_MEMATTR_MASK (7ul << 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) + +#define KBASE_REG_PROTECTED (1ul << 19) + +#define KBASE_REG_DONT_NEED (1ul << 20) + +/* Imported buffer is padded? */ +#define KBASE_REG_IMPORT_PAD (1ul << 21) + +/* Bit 22 is reserved. + * + * Do not remove, use the next unreserved bit for new flags */ +#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) + +/* The top of the initial commit is aligned to extent pages. + * Extent must be a power of 2 */ +#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) + +/* Whilst this flag is set the GPU allocation is not supposed to be freed by + * user space. The flag will remain set for the lifetime of JIT allocations. + */ +#define KBASE_REG_NO_USER_FREE (1ul << 24) + +/* Memory has permanent kernel side mapping */ +#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) + +/* GPU VA region has been freed by the userspace, but still remains allocated + * due to the reference held by CPU mappings created on the GPU VA region. + * + * A region with this flag set has had kbase_gpu_munmap() called on it, but can + * still be looked-up in the region tracker as a non-free region. Hence must + * not create or update any more GPU mappings on such regions because they will + * not be unmapped when the region is finally destroyed. + * + * Since such regions are still present in the region tracker, new allocations + * attempted with BASE_MEM_SAME_VA might fail if their address intersects with + * a region with this flag set. + * + * In addition, this flag indicates the gpu_alloc member might no longer valid + * e.g. in infinite cache simulation. + */ +#define KBASE_REG_VA_FREED (1ul << 26) + +/* If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + +#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + +/* only used with 32-bit clients */ +/* + * On a 32bit platform, custom VA should be wired from 4GB + * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface + * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). + * So we put the default limit to the maximum possible on Linux and shrink + * it down, if required by the GPU, during initialization. + */ + +#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +/* end 32-bit clients only */ + +/* The starting address and size of the GPU-executable zone are dynamic + * and depend on the platform and the number of pages requested by the + * user process, with an upper limit of 4 GB. + */ +#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) +#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ + + + unsigned long flags; + size_t extent; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + struct list_head jit_node; + u16 jit_usage_id; + u8 jit_bin_id; +#if MALI_JIT_PRESSURE_LIMIT + /* Pointer to an object in GPU memory defining an end of an allocated + * region + * + * The object can be one of: + * - u32 value defining the size of the region + * - u64 pointer first unused byte in the region + * + * The interpretation of the object depends on + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in jit_info_flags - if it is + * set, the heap info object should be interpreted as size. + */ + u64 heap_info_gpu_addr; + + /* The current estimate of the number of pages used, which in normal + * use is either: + * - the initial estimate == va_pages + * - the actual pages used, as found by a JIT usage report + * + * Note that since the value is calculated from GPU memory after a JIT + * usage report, at any point in time it is allowed to take a random + * value that is no greater than va_pages (e.g. it may be greater than + * gpu_alloc->nents) + */ + size_t used_pages; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + int va_refcnt; +}; + +/* Special marker for failed JIT allocations that still must be marked as + * in-use + */ +#define KBASE_RESERVED_REG_JIT_ALLOC ((struct kbase_va_region *)-1) + +static inline bool kbase_is_region_free(struct kbase_va_region *reg) +{ + return (!reg || reg->flags & KBASE_REG_FREE); +} + +static inline bool kbase_is_region_invalid(struct kbase_va_region *reg) +{ + return (!reg || reg->flags & KBASE_REG_VA_FREED); +} + +static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg) +{ + /* Possibly not all functions that find regions would be using this + * helper, so they need to be checked when maintaining this function. + */ + return (kbase_is_region_invalid(reg) || kbase_is_region_free(reg)); +} + +int kbase_remove_va_region(struct kbase_va_region *reg); +static inline void kbase_region_refcnt_free(struct kbase_va_region *reg) +{ + /* If region was mapped then remove va region*/ + if (reg->start_pfn) + kbase_remove_va_region(reg); + + /* To detect use-after-free in debug builds */ + KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); + kfree(reg); +} + +static inline struct kbase_va_region *kbase_va_region_alloc_get( + struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(!region->va_refcnt); + + /* non-atomic as kctx->reg_lock is held */ + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + region->va_refcnt, (void *)region); + region->va_refcnt++; + + return region; +} + +static inline struct kbase_va_region *kbase_va_region_alloc_put( + struct kbase_context *kctx, struct kbase_va_region *region) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(region->va_refcnt <= 0); + WARN_ON(region->flags & KBASE_REG_FREE); + + /* non-atomic as kctx->reg_lock is held */ + region->va_refcnt--; + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + region->va_refcnt, (void *)region); + if (!region->va_refcnt) + kbase_region_refcnt_free(region); + + return NULL; +} + +/* Common functions */ +static inline struct tagged_addr *kbase_get_cpu_phy_pages( + struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->cpu_alloc->pages; +} + +static inline struct tagged_addr *kbase_get_gpu_phy_pages( + struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->gpu_alloc->pages; +} + +static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + /* if no alloc object the backed size naturally is 0 */ + if (!reg->cpu_alloc) + return 0; + + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->cpu_alloc->nents; +} + +#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ + +static inline struct kbase_mem_phy_alloc *kbase_alloc_create( + struct kbase_context *kctx, size_t nr_pages, + enum kbase_memory_type type, int group_id) +{ + struct kbase_mem_phy_alloc *alloc; + size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; + size_t per_page_size = sizeof(*alloc->pages); + + /* Imported pages may have page private data already in use */ + if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + alloc_size += nr_pages * + sizeof(*alloc->imported.user_buf.dma_addrs); + per_page_size += sizeof(*alloc->imported.user_buf.dma_addrs); + } + + /* + * Prevent nr_pages*per_page_size + sizeof(*alloc) from + * wrapping around. + */ + if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) + / per_page_size)) + return ERR_PTR(-ENOMEM); + + /* Allocate based on the size to reduce internal fragmentation of vmem */ + if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) + alloc = vzalloc(alloc_size); + else + alloc = kzalloc(alloc_size, GFP_KERNEL); + + if (!alloc) + return ERR_PTR(-ENOMEM); + + if (type == KBASE_MEM_TYPE_NATIVE) { + alloc->imported.native.nr_struct_pages = + (alloc_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + kbase_process_page_usage_inc(kctx, + alloc->imported.native.nr_struct_pages); + } + + /* Store allocation method */ + if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) + alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; + + kref_init(&alloc->kref); + atomic_set(&alloc->gpu_mappings, 0); + alloc->nents = 0; + alloc->pages = (void *)(alloc + 1); + INIT_LIST_HEAD(&alloc->mappings); + alloc->type = type; + alloc->group_id = group_id; + + if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) + alloc->imported.user_buf.dma_addrs = + (void *) (alloc->pages + nr_pages); + + return alloc; +} + +static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, + struct kbase_context *kctx, int group_id) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(!reg->cpu_alloc); + KBASE_DEBUG_ASSERT(!reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); + + reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, + KBASE_MEM_TYPE_NATIVE, group_id); + if (IS_ERR(reg->cpu_alloc)) + return PTR_ERR(reg->cpu_alloc); + else if (!reg->cpu_alloc) + return -ENOMEM; + + reg->cpu_alloc->imported.native.kctx = kctx; + if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE) + && (reg->flags & KBASE_REG_CPU_CACHED)) { + reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages, + KBASE_MEM_TYPE_NATIVE, group_id); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) { + kbase_mem_phy_alloc_put(reg->cpu_alloc); + return -ENOMEM; + } + reg->gpu_alloc->imported.native.kctx = kctx; + } else { + reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + } + + mutex_lock(&kctx->jit_evict_lock); + INIT_LIST_HEAD(®->cpu_alloc->evict_node); + INIT_LIST_HEAD(®->gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + + reg->flags &= ~KBASE_REG_FREE; + + return 0; +} + +/* + * Max size for kbdev memory pool (in pages) + */ +#define KBASE_MEM_POOL_MAX_SIZE_KBDEV (SZ_64M >> PAGE_SHIFT) + +/* + * Max size for kctx memory pool (in pages) + */ +#define KBASE_MEM_POOL_MAX_SIZE_KCTX (SZ_64M >> PAGE_SHIFT) + +/* + * The order required for a 2MB page allocation (2^order * 4KB = 2MB) + */ +#define KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER 9 + +/* + * The order required for a 4KB page allocation + */ +#define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER 0 + +/** + * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in + * initial configuration of a memory pool + * + * @config: Initial configuration for a physical memory pool + * @max_size: Maximum number of free pages that a pool created from + * @config can hold + */ +static inline void kbase_mem_pool_config_set_max_size( + struct kbase_mem_pool_config *const config, size_t const max_size) +{ + WRITE_ONCE(config->max_size, max_size); +} + +/** + * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from + * initial configuration of a memory pool + * + * @config: Initial configuration for a physical memory pool + * + * Return: Maximum number of free pages that a pool created from @config + * can hold + */ +static inline size_t kbase_mem_pool_config_get_max_size( + const struct kbase_mem_pool_config *const config) +{ + return READ_ONCE(config->max_size); +} + +/** + * kbase_mem_pool_init - Create a memory pool for a kbase device + * @pool: Memory pool to initialize + * @config: Initial configuration for the memory pool + * @order: Page order for physical page size (order=0=>4kB, order=9=>2MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @kbdev: Kbase device where memory is used + * @next_pool: Pointer to the next pool or NULL. + * + * Allocations from @pool are in whole pages. Each @pool has a free list where + * pages can be quickly allocated from. The free list is initially empty and + * filled whenever pages are freed back to the pool. The number of free pages + * in the pool will in general not exceed @max_size, but the pool may in + * certain corner cases grow above @max_size. + * + * If @next_pool is not NULL, we will allocate from @next_pool before going to + * the memory group manager. Similarly pages can spill over to @next_pool when + * @pool is full. Pages are zeroed before they spill over to another pool, to + * prevent leaking information between applications. + * + * A shrinker is registered so that Linux mm can reclaim pages from the pool as + * needed. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_mem_pool_init(struct kbase_mem_pool *pool, + const struct kbase_mem_pool_config *config, + unsigned int order, + int group_id, + struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool); + +/** + * kbase_mem_pool_term - Destroy a memory pool + * @pool: Memory pool to destroy + * + * Pages in the pool will spill over to @next_pool (if available) or freed to + * the kernel. + */ +void kbase_mem_pool_term(struct kbase_mem_pool *pool); + +/** + * kbase_mem_pool_alloc - Allocate a page from memory pool + * @pool: Memory pool to allocate from + * + * Allocations from the pool are made as follows: + * 1. If there are free pages in the pool, allocate a page from @pool. + * 2. Otherwise, if @next_pool is not NULL and has free pages, allocate a page + * from @next_pool. + * 3. Return NULL if no memory in the pool + * + * Return: Pointer to allocated page, or NULL if allocation failed. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_alloc_locked() instead. + */ +struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool); + +/** + * kbase_mem_pool_alloc_locked - Allocate a page from memory pool + * @pool: Memory pool to allocate from + * + * If there are free pages in the pool, this function allocates a page from + * @pool. This function does not use @next_pool. + * + * Return: Pointer to allocated page, or NULL if allocation failed. + * + * Note : Caller must hold the pool lock. + */ +struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool); + +/** + * kbase_mem_pool_free - Free a page to memory pool + * @pool: Memory pool where page should be freed + * @page: Page to free to the pool + * @dirty: Whether some of the page may be dirty in the cache. + * + * Pages are freed to the pool as follows: + * 1. If @pool is not full, add @page to @pool. + * 2. Otherwise, if @next_pool is not NULL and not full, add @page to + * @next_pool. + * 3. Finally, free @page to the kernel. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_free_locked() instead. + */ +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *page, + bool dirty); + +/** + * kbase_mem_pool_free_locked - Free a page to memory pool + * @pool: Memory pool where page should be freed + * @p: Page to free to the pool + * @dirty: Whether some of the page may be dirty in the cache. + * + * If @pool is not full, this function adds @page to @pool. Otherwise, @page is + * freed to the kernel. This function does not use @next_pool. + * + * Note : Caller must hold the pool lock. + */ +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, + bool dirty); + +/** + * kbase_mem_pool_alloc_pages - Allocate pages from memory pool + * @pool: Memory pool to allocate from + * @nr_4k_pages: Number of pages to allocate + * @pages: Pointer to array where the physical address of the allocated + * pages will be stored. + * @partial_allowed: If fewer pages allocated is allowed + * + * Like kbase_mem_pool_alloc() but optimized for allocating many pages. + * + * Return: + * On success number of pages allocated (could be less than nr_pages if + * partial_allowed). + * On error an error code. + * + * Note : This function should not be used if the pool lock is held. Use + * kbase_mem_pool_alloc_pages_locked() instead. + * + * The caller must not hold vm_lock, as this could cause a deadlock if + * the kernel OoM killer runs. If the caller must allocate pages while holding + * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + */ +int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, + struct tagged_addr *pages, bool partial_allowed); + +/** + * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool + * @pool: Memory pool to allocate from + * @nr_4k_pages: Number of pages to allocate + * @pages: Pointer to array where the physical address of the allocated + * pages will be stored. + * + * Like kbase_mem_pool_alloc() but optimized for allocating many pages. This + * version does not allocate new pages from the kernel, and therefore will never + * trigger the OoM killer. Therefore, it can be run while the vm_lock is held. + * + * As new pages can not be allocated, the caller must ensure there are + * sufficient pages in the pool. Usage of this function should look like : + * + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * while (kbase_mem_pool_size(pool) < pages_required) { + * kbase_mem_pool_unlock(pool) + * kbase_gpu_vm_unlock(kctx); + * kbase_mem_pool_grow(pool) + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * } + * kbase_mem_pool_alloc_pages_locked(pool) + * kbase_mem_pool_unlock(pool) + * Perform other processing that requires vm_lock... + * kbase_gpu_vm_unlock(kctx); + * + * This ensures that the pool can be grown to the required size and that the + * allocation can complete without another thread using the newly grown pages. + * + * Return: + * On success number of pages allocated. + * On error an error code. + * + * Note : Caller must hold the pool lock. + */ +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, + size_t nr_4k_pages, struct tagged_addr *pages); + +/** + * kbase_mem_pool_free_pages - Free pages to memory pool + * @pool: Memory pool where pages should be freed + * @nr_pages: Number of pages to free + * @pages: Pointer to array holding the physical addresses of the pages to + * free. + * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. + * + * Like kbase_mem_pool_free() but optimized for freeing many pages. + */ +void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool dirty, bool reclaimed); + +/** + * kbase_mem_pool_free_pages_locked - Free pages to memory pool + * @pool: Memory pool where pages should be freed + * @nr_pages: Number of pages to free + * @pages: Pointer to array holding the physical addresses of the pages to + * free. + * @dirty: Whether any pages may be dirty in the cache. + * @reclaimed: Whether the pages where reclaimable and thus should bypass + * the pool and go straight to the kernel. + * + * Like kbase_mem_pool_free() but optimized for freeing many pages. + */ +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, bool dirty, + bool reclaimed); + +/** + * kbase_mem_pool_size - Get number of free pages in memory pool + * @pool: Memory pool to inspect + * + * Note: the size of the pool may in certain corner cases exceed @max_size! + * + * Return: Number of free pages in the pool + */ +static inline size_t kbase_mem_pool_size(struct kbase_mem_pool *pool) +{ + return READ_ONCE(pool->cur_size); +} + +/** + * kbase_mem_pool_max_size - Get maximum number of free pages in memory pool + * @pool: Memory pool to inspect + * + * Return: Maximum number of free pages in the pool + */ +static inline size_t kbase_mem_pool_max_size(struct kbase_mem_pool *pool) +{ + return pool->max_size; +} + + +/** + * kbase_mem_pool_set_max_size - Set maximum number of free pages in memory pool + * @pool: Memory pool to inspect + * @max_size: Maximum number of free pages the pool can hold + * + * If @max_size is reduced, the pool will be shrunk to adhere to the new limit. + * For details see kbase_mem_pool_shrink(). + */ +void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size); + +/** + * kbase_mem_pool_grow - Grow the pool + * @pool: Memory pool to grow + * @nr_to_grow: Number of pages to add to the pool + * + * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to + * become larger than the maximum size specified. + * + * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages + */ +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow); + +/** + * kbase_mem_pool_trim - Grow or shrink the pool to a new size + * @pool: Memory pool to trim + * @new_size: New number of pages in the pool + * + * If @new_size > @cur_size, fill the pool with new pages from the kernel, but + * not above the max_size for the pool. + * If @new_size < @cur_size, shrink the pool by freeing pages to the kernel. + */ +void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); + +/** + * kbase_mem_pool_mark_dying - Mark that this pool is dying + * @pool: Memory pool + * + * This will cause any ongoing allocation operations (eg growing on page fault) + * to be terminated. + */ +void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool); + +/** + * kbase_mem_alloc_page - Allocate a new page for a device + * @pool: Memory pool to allocate a page from + * + * Most uses should use kbase_mem_pool_alloc to allocate a page. However that + * function can fail in the event the pool is empty. + * + * Return: A new page or NULL if no memory + */ +struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool); + +/** + * kbase_region_tracker_init - Initialize the region tracker data structure + * @kctx: kbase context + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init(struct kbase_context *kctx); + +/** + * kbase_region_tracker_init_jit - Initialize the just-in-time memory + * allocation region + * @kctx: Kbase context. + * @jit_va_pages: Size of the JIT region in pages. + * @max_allocations: Maximum number of allocations allowed for the JIT region. + * Valid range is 0..%BASE_JIT_ALLOC_COUNT. + * @trim_level: Trim level for the JIT region. + * Valid range is 0..%BASE_JIT_MAX_TRIM_LEVEL. + * @group_id: The physical group ID from which to allocate JIT memory. + * Valid range is 0..(%MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @phys_pages_limit: Maximum number of physical pages to use to back the JIT + * region. Must not exceed @jit_va_pages. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, + int max_allocations, int trim_level, int group_id, + u64 phys_pages_limit); + +/** + * kbase_region_tracker_init_exec - Initialize the GPU-executable memory region + * @kctx: kbase context + * @exec_va_pages: Size of the JIT region in pages. + * It must not be greater than 4 GB. + * + * Return: 0 if success, negative error code otherwise. + */ +int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages); + +/** + * kbase_region_tracker_term - Terminate the JIT region + * @kctx: kbase context + */ +void kbase_region_tracker_term(struct kbase_context *kctx); + +/** + * kbase_region_tracker_term_rbtree - Free memory for a region tracker + * + * This will free all the regions within the region tracker + * + * @rbtree: Region tracker tree root + */ +void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); + +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( + struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_find_region_enclosing_address( + struct rb_root *rbtree, u64 gpu_addr); + +/** + * @brief Check that a pointer is actually a valid region. + * + * Must be called with context lock held. + */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address( + struct kbase_context *kctx, u64 gpu_addr); +struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, + u64 gpu_addr); + +struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree, + u64 start_pfn, size_t nr_pages, int zone); +void kbase_free_alloced_region(struct kbase_va_region *reg); +int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, + u64 addr, size_t nr_pages, size_t align); +int kbase_add_va_region_rbtree(struct kbase_device *kbdev, + struct kbase_va_region *reg, u64 addr, size_t nr_pages, + size_t align); + +bool kbase_check_alloc_flags(unsigned long flags); +bool kbase_check_import_flags(unsigned long flags); + +/** + * kbase_check_alloc_sizes - check user space sizes parameters for an + * allocation + * + * @kctx: kbase context + * @flags: The flags passed from user space + * @va_pages: The size of the requested region, in pages. + * @commit_pages: Number of pages to commit initially. + * @extent: Number of pages to grow by on GPU page fault and/or alignment + * (depending on flags) + * + * Makes checks on the size parameters passed in from user space for a memory + * allocation call, with respect to the flags requested. + * + * Return: 0 if sizes are valid for these flags, negative error code otherwise + */ +int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, + u64 va_pages, u64 commit_pages, u64 extent); + +/** + * kbase_update_region_flags - Convert user space flags to kernel region flags + * + * @kctx: kbase context + * @reg: The region to update the flags on + * @flags: The flags passed from user space + * + * The user space flag BASE_MEM_COHERENT_SYSTEM_REQUIRED will be rejected and + * this function will fail if the system does not support system coherency. + * + * Return: 0 if successful, -EINVAL if the flags are not supported + */ +int kbase_update_region_flags(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned long flags); + +void kbase_gpu_vm_lock(struct kbase_context *kctx); +void kbase_gpu_vm_unlock(struct kbase_context *kctx); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + +/** + * @brief Register region and map it on the GPU. + * + * Call kbase_add_va_region() and map the region on the GPU. + */ +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + +/** + * @brief Remove the region from the GPU and unregister it. + * + * Must be called with context lock held. + */ +int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_mmu_update - Configure an address space on the GPU to the specified + * MMU tables + * + * The caller has the following locking conditions: + * - It must hold kbase_device->mmu_hw_mutex + * - It must hold the hwaccess_lock + * + * @kbdev: Kbase device structure + * @mmut: The set of MMU tables to be configured on the address space + * @as_nr: The address space to be configured + */ +void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int as_nr); + +/** + * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. + * @kctx: Kbase context + * + * Disable and perform the required cache maintenance to remove the all + * data from provided kbase context from the GPU caches. + * + * The caller has the following locking conditions: + * - It must hold kbase_device->mmu_hw_mutex + * - It must hold the hwaccess_lock + */ +void kbase_mmu_disable(struct kbase_context *kctx); + +/** + * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified + * address space. + * @kbdev: Kbase device + * @as_nr: The address space number to set to unmapped. + * + * This function must only be called during reset/power-up and it used to + * ensure the registers are in a known state. + * + * The caller must hold kbdev->mmu_hw_mutex. + */ +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** Dump the MMU tables to a buffer + * + * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the + * buffer is too small then the return value will be NULL. + * + * The GPU vm lock must be held when calling this function. + * + * The buffer returned should be freed with @ref vfree when it is no longer required. + * + * @param[in] kctx The kbase context to dump + * @param[in] nr_pages The number of pages to allocate for the buffer. + * + * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too + * small) + */ +void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); + +/** + * kbase_sync_now - Perform cache maintenance on a memory region + * + * @kctx: The kbase context of the region + * @sset: A syncset structure describing the region and direction of the + * synchronisation required + * + * Return: 0 on success or error code + */ +int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset); +void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr cpu_pa, + struct tagged_addr gpu_pa, off_t offset, size_t size, + enum kbase_sync_type sync_fn); + +/* OS specific functions */ +int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); +int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); +void kbase_os_mem_map_lock(struct kbase_context *kctx); +void kbase_os_mem_map_unlock(struct kbase_context *kctx); + +/** + * @brief Update the memory allocation counters for the current process + * + * OS specific call to updates the current memory allocation counters for the current process with + * the supplied delta. + * + * @param[in] kctx The kbase context + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); + +/** + * @brief Add to the memory allocation counters for the current process + * + * OS specific call to add to the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) +{ + kbasep_os_process_page_usage_update(kctx, pages); +} + +/** + * @brief Subtract from the memory allocation counters for the current process + * + * OS specific call to subtract from the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) +{ + kbasep_os_process_page_usage_update(kctx, 0 - pages); +} + +/** + * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU + * mapping of a memory allocation containing a given address range + * + * Searches for a CPU mapping of any part of any region that fully encloses the + * CPU virtual address range specified by @uaddr and @size. Returns a failure + * indication if only part of the address range lies within a CPU mapping. + * + * @kctx: The kernel base context used for the allocation. + * @uaddr: Start of the CPU virtual address range. + * @size: Size of the CPU virtual address range (in bytes). + * @offset: The offset from the start of the allocation to the specified CPU + * virtual address. + * + * Return: 0 if offset was obtained successfully. Error code otherwise. + */ +int kbasep_find_enclosing_cpu_mapping_offset( + struct kbase_context *kctx, + unsigned long uaddr, size_t size, u64 *offset); + +/** + * kbasep_find_enclosing_gpu_mapping_start_and_offset() - Find the address of + * the start of GPU virtual memory region which encloses @gpu_addr for the + * @size length in bytes + * + * Searches for the memory region in GPU virtual memory space which contains + * the region defined by the @gpu_addr and @size, where @gpu_addr is the + * beginning and @size the length in bytes of the provided region. If found, + * the location of the start address of the GPU virtual memory region is + * passed in @start pointer and the location of the offset of the region into + * the GPU virtual memory region is passed in @offset pointer. + * + * @kctx: The kernel base context within which the memory is searched. + * @gpu_addr: GPU virtual address for which the region is sought; defines + * the beginning of the provided region. + * @size: The length (in bytes) of the provided region for which the + * GPU virtual memory region is sought. + * @start: Pointer to the location where the address of the start of + * the found GPU virtual memory region is. + * @offset: Pointer to the location where the offset of @gpu_addr into + * the found GPU virtual memory region is. + */ +int kbasep_find_enclosing_gpu_mapping_start_and_offset( + struct kbase_context *kctx, + u64 gpu_addr, size_t size, u64 *start, u64 *offset); + +/** + * kbase_alloc_phy_pages_helper - Allocates physical pages. + * @alloc: allocation object to add pages to + * @nr_pages_requested: number of physical pages to allocate + * + * Allocates \a nr_pages_requested and updates the alloc object. + * + * Return: 0 if all pages have been successfully allocated. Error code otherwise + * + * Note : The caller must not hold vm_lock, as this could cause a deadlock if + * the kernel OoM killer runs. If the caller must allocate pages while holding + * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead. + * + * This function cannot be used from interrupt context + */ +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested); + +/** + * kbase_alloc_phy_pages_helper_locked - Allocates physical pages. + * @alloc: allocation object to add pages to + * @pool: Memory pool to allocate from + * @nr_pages_requested: number of physical pages to allocate + * @prealloc_sa: Information about the partial allocation if the amount + * of memory requested is not a multiple of 2MB. One + * instance of struct kbase_sub_alloc must be allocated by + * the caller iff CONFIG_MALI_2MB_ALLOC is enabled. + * + * Allocates \a nr_pages_requested and updates the alloc object. This function + * does not allocate new pages from the kernel, and therefore will never trigger + * the OoM killer. Therefore, it can be run while the vm_lock is held. + * + * As new pages can not be allocated, the caller must ensure there are + * sufficient pages in the pool. Usage of this function should look like : + * + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * while (kbase_mem_pool_size(pool) < pages_required) { + * kbase_mem_pool_unlock(pool) + * kbase_gpu_vm_unlock(kctx); + * kbase_mem_pool_grow(pool) + * kbase_gpu_vm_lock(kctx); + * kbase_mem_pool_lock(pool) + * } + * kbase_alloc_phy_pages_helper_locked(pool) + * kbase_mem_pool_unlock(pool) + * Perform other processing that requires vm_lock... + * kbase_gpu_vm_unlock(kctx); + * + * This ensures that the pool can be grown to the required size and that the + * allocation can complete without another thread using the newly grown pages. + * + * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then + * @pool must be alloc->imported.native.kctx->lp_mem_pool. Otherwise it must be + * alloc->imported.native.kctx->mem_pool. + * @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be + * pre-allocated because we must not sleep (due to the usage of kmalloc()) + * whilst holding pool->pool_lock. + * @prealloc_sa shall be set to NULL if it has been consumed by this function + * to indicate that the caller must not free it. + * + * Return: Pointer to array of allocated pages. NULL on failure. + * + * Note : Caller must hold pool->pool_lock + */ +struct tagged_addr *kbase_alloc_phy_pages_helper_locked( + struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool, + size_t nr_pages_requested, + struct kbase_sub_alloc **prealloc_sa); + +/** +* @brief Free physical pages. +* +* Frees \a nr_pages and updates the alloc object. +* +* @param[in] alloc allocation object to free pages from +* @param[in] nr_pages_to_free number of physical pages to free +* +* Return: 0 on success, otherwise a negative error code +*/ +int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); + +/** + * kbase_free_phy_pages_helper_locked - Free pages allocated with + * kbase_alloc_phy_pages_helper_locked() + * @alloc: Allocation object to free pages from + * @pool: Memory pool to return freed pages to + * @pages: Pages allocated by kbase_alloc_phy_pages_helper_locked() + * @nr_pages_to_free: Number of physical pages to free + * + * This function atomically frees pages allocated with + * kbase_alloc_phy_pages_helper_locked(). @pages is the pointer to the page + * array that is returned by that function. @pool must be the pool that the + * pages were originally allocated from. + * + * If the mem_pool has been unlocked since the allocation then + * kbase_free_phy_pages_helper() should be used instead. + */ +void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, + struct kbase_mem_pool *pool, struct tagged_addr *pages, + size_t nr_pages_to_free); + +static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) +{ + SetPagePrivate(p); + if (sizeof(dma_addr_t) > sizeof(p->private)) { + /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the + * private field stays the same. So we have to be clever and + * use the fact that we only store DMA addresses of whole pages, + * so the low bits should be zero */ + KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); + set_page_private(p, dma_addr >> PAGE_SHIFT); + } else { + set_page_private(p, dma_addr); + } +} + +static inline dma_addr_t kbase_dma_addr(struct page *p) +{ + if (sizeof(dma_addr_t) > sizeof(p->private)) + return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; + + return (dma_addr_t)page_private(p); +} + +static inline void kbase_clear_dma_addr(struct page *p) +{ + ClearPagePrivate(p); +} + +/** + * @brief Process a page fault. + * + * @param[in] data work_struct passed by queue_work() + */ +void page_fault_worker(struct work_struct *data); + +/** + * @brief Process a bus fault. + * + * @param[in] data work_struct passed by queue_work() + */ +void bus_fault_worker(struct work_struct *data); + +/** + * @brief Flush MMU workqueues. + * + * This function will cause any outstanding page or bus faults to be processed. + * It should be called prior to powering off the GPU. + * + * @param[in] kbdev Device pointer + */ +void kbase_flush_mmu_wqs(struct kbase_device *kbdev); + +/** + * kbase_sync_single_for_device - update physical memory and give GPU ownership + * @kbdev: Device pointer + * @handle: DMA address of region + * @size: Size of region to sync + * @dir: DMA data direction + */ + +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +/** + * kbase_sync_single_for_cpu - update physical memory and give CPU ownership + * @kbdev: Device pointer + * @handle: DMA address of region + * @size: Size of region to sync + * @dir: DMA data direction + */ + +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +#ifdef CONFIG_DEBUG_FS +/** + * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. + * @kctx: kbase context + */ +void kbase_jit_debugfs_init(struct kbase_context *kctx); +#endif /* CONFIG_DEBUG_FS */ + +/** + * kbase_jit_init - Initialize the JIT memory pool management + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_jit_init(struct kbase_context *kctx); + +/** + * kbase_jit_allocate - Allocate JIT memory + * @kctx: kbase context + * @info: JIT allocation information + * @ignore_pressure_limit: Whether the JIT memory pressure limit is ignored + * + * Return: JIT allocation on success or NULL on failure. + */ +struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, + const struct base_jit_alloc_info *info, + bool ignore_pressure_limit); + +/** + * kbase_jit_free - Free a JIT allocation + * @kctx: kbase context + * @reg: JIT allocation + * + * Frees a JIT allocation and places it into the free pool for later reuse. + */ +void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing + * @reg: JIT allocation + */ +void kbase_jit_backing_lost(struct kbase_va_region *reg); + +/** + * kbase_jit_evict - Evict a JIT allocation from the pool + * @kctx: kbase context + * + * Evict the least recently used JIT allocation from the pool. This can be + * required if normal VA allocations are failing due to VA exhaustion. + * + * Return: True if a JIT allocation was freed, false otherwise. + */ +bool kbase_jit_evict(struct kbase_context *kctx); + +/** + * kbase_jit_term - Terminate the JIT memory pool management + * @kctx: kbase context + */ +void kbase_jit_term(struct kbase_context *kctx); + +#if MALI_JIT_PRESSURE_LIMIT +/** + * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of + * kbase_trace_jit_report_gpu_mem() that should only be called once the + * corresponding tracepoint is verified to be enabled + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + */ +void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, + struct kbase_va_region *reg, unsigned int flags); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used + * to make a JIT report + * @kctx: kbase context + * @reg: Just-in-time memory region to trace + * @flags: combination of values from enum kbase_jit_report_flags + * + * Information is traced using the trace_mali_jit_report_gpu_mem() tracepoint. + * + * In case that tracepoint is not enabled, this function should have the same + * low overheads as a tracepoint itself (i.e. use of 'jump labels' to avoid + * conditional branches) + * + * This can take the reg_lock on @kctx, do not use in places where this lock is + * already held. + * + * Note: this has to be a macro because at this stage the tracepoints have not + * been included. Also gives no opportunity for the compiler to mess up + * inlining it. + */ +#if MALI_JIT_PRESSURE_LIMIT +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + do { \ + if (trace_mali_jit_report_gpu_mem_enabled()) \ + kbase_trace_jit_report_gpu_mem_trace_enabled( \ + (kctx), (reg), (flags)); \ + } while (0) +#else +#define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ + CSTD_NOP(kctx, reg, flags) +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if MALI_JIT_PRESSURE_LIMIT +/** + * kbase_jit_report_update_pressure - safely update the JIT physical page + * pressure and JIT region's estimate of used_pages + * @kctx: kbase context, to update the current physical pressure + * @reg: Just-in-time memory region to update with @new_used_pages + * @new_used_pages: new value of number of pages used in the JIT region + * @flags: combination of values from enum kbase_jit_report_flags + * + * Takes care of: + * - correctly updating the pressure given the current reg->used_pages and + * new_used_pages + * - then updating the %kbase_va_region used_pages member + * + * Precondition: + * - new_used_pages <= reg->nr_pages + */ +void kbase_jit_report_update_pressure(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_used_pages, + unsigned int flags); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * kbase_has_exec_va_zone - EXEC_VA zone predicate + * + * Determine whether an EXEC_VA zone has been created for the GPU address space + * of the given kbase context. + * + * @kctx: kbase context + * + * Return: True if the kbase context has an EXEC_VA zone. + */ +bool kbase_has_exec_va_zone(struct kbase_context *kctx); + +/** + * kbase_map_external_resource - Map an external resource to the GPU. + * @kctx: kbase context. + * @reg: The region to map. + * @locked_mm: The mm_struct which has been locked for this operation. + * + * Return: The physical allocation which backs the region on success or NULL + * on failure. + */ +struct kbase_mem_phy_alloc *kbase_map_external_resource( + struct kbase_context *kctx, struct kbase_va_region *reg, + struct mm_struct *locked_mm); + +/** + * kbase_unmap_external_resource - Unmap an external resource from the GPU. + * @kctx: kbase context. + * @reg: The region to unmap or NULL if it has already been released. + * @alloc: The physical allocation being unmapped. + */ +void kbase_unmap_external_resource(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + + +/** + * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer. + * @kctx: kbase context. + * @reg: The region associated with the imported user buffer. + * + * To successfully pin the pages for a user buffer the current mm_struct must + * be the same as the mm_struct of the user buffer. After successfully pinning + * the pages further calls to this function succeed without doing work. + * + * Return: zero on success or negative number on failure. + */ +int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_sticky_resource_init - Initialize sticky resource management. + * @kctx: kbase context + * + * Returns zero on success or negative error number on failure. + */ +int kbase_sticky_resource_init(struct kbase_context *kctx); + +/** + * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. + * @kctx: kbase context. + * @gpu_addr: The GPU address of the external resource. + * + * Return: The metadata object which represents the binding between the + * external resource and the kbase context on success or NULL on failure. + */ +struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( + struct kbase_context *kctx, u64 gpu_addr); + +/** + * kbase_sticky_resource_release - Release a reference on a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * Return: True if the release found the metadata and the reference was dropped. + */ +bool kbase_sticky_resource_release(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + +/** + * kbase_sticky_resource_release_force - Release a sticky resource. + * @kctx: kbase context. + * @meta: Binding metadata. + * @gpu_addr: GPU address of the external resource. + * + * If meta is NULL then gpu_addr will be used to scan the metadata list and + * find the matching metadata (if any), otherwise the provided meta will be + * used and gpu_addr will be ignored. + * + * Return: True if the release found the metadata and the resource was + * released. + */ +bool kbase_sticky_resource_release_force(struct kbase_context *kctx, + struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); + +/** + * kbase_sticky_resource_term - Terminate sticky resource management. + * @kctx: kbase context + */ +void kbase_sticky_resource_term(struct kbase_context *kctx); + +/** + * kbase_mem_pool_lock - Lock a memory pool + * @pool: Memory pool to lock + */ +static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) +{ + spin_lock(&pool->pool_lock); +} + +/** + * kbase_mem_pool_lock - Release a memory pool + * @pool: Memory pool to lock + */ +static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) +{ + spin_unlock(&pool->pool_lock); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); + + +/** + * kbase_mem_umm_map - Map dma-buf + * @kctx: Pointer to the kbase context + * @reg: Pointer to the region of the imported dma-buf to map + * + * Map a dma-buf on the GPU. The mappings are reference counted. + * + * Returns 0 on success, or a negative error code. + */ +int kbase_mem_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg); + +/** + * kbase_mem_umm_unmap - Unmap dma-buf + * @kctx: Pointer to the kbase context + * @reg: Pointer to the region of the imported dma-buf to unmap + * @alloc: Pointer to the alloc to release + * + * Unmap a dma-buf from the GPU. The mappings are reference counted. + * + * @reg must be the original region with GPU mapping of @alloc; or NULL. If + * @reg is NULL, or doesn't match @alloc, the GPU page table entries matching + * @reg will not be updated. + * + * @alloc must be a valid physical allocation of type + * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by + * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will + * release it's mapping reference, and if the refcount reaches 0, also be be + * unmapped, regardless of the value of @reg. + */ +void kbase_mem_umm_unmap(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_mem_do_sync_imported - Sync caches for imported memory + * @kctx: Pointer to the kbase context + * @reg: Pointer to the region with imported memory to sync + * @sync_fn: The type of sync operation to perform + * + * Sync CPU caches for supported (currently only dma-buf (UMM)) memory. + * Attempting to sync unsupported imported memory types will result in an error + * code, -EINVAL. + * + * Return: 0 on success, or a negative error code. + */ +int kbase_mem_do_sync_imported(struct kbase_context *kctx, + struct kbase_va_region *reg, enum kbase_sync_type sync_fn); + + +/** + * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to + * an unaligned address at a given offset from the start of a target page. + * + * @dest_pages: Pointer to the array of pages to which the content is + * to be copied from the provided @src_page. + * @src_page: Pointer to the page which correspond to the source page + * from which the copying will take place. + * @to_copy: Total number of bytes pending to be copied from + * @src_page to @target_page_nr within @dest_pages. + * This will get decremented by number of bytes we + * managed to copy from source page to target pages. + * @nr_pages: Total number of pages present in @dest_pages. + * @target_page_nr: Target page number to which @src_page needs to be + * copied. This will get incremented by one if + * we are successful in copying from source page. + * @offset: Offset in bytes into the target pages from which the + * copying is to be performed. + * + * Return: 0 on success, or a negative error code. + */ +int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, + void *src_page, size_t *to_copy, unsigned int nr_pages, + unsigned int *target_page_nr, size_t offset); + +#endif /* _KBASE_MEM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c new file mode 100644 index 0000000..b669f2a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -0,0 +1,3030 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem_linux.c + * Base kernel memory APIs, Linux implementation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +#include +#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ + (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) +/* Enable workaround for ion for kernels prior to v5.0.0 and from v5.3.0 + * onwards. + * + * For kernels prior to v4.12, workaround is needed as ion lacks the cache + * maintenance in begin_cpu_access and end_cpu_access methods. + * + * For kernels prior to v4.17.2, workaround is needed to avoid the potentially + * disruptive warnings which can come if begin_cpu_access and end_cpu_access + * methods are not called in pairs. + * Note that some long term maintenance kernel versions (e.g. 4.9.x, 4.14.x) + * only require this workaround on their earlier releases. However it is still + * safe to use it on such releases, and it simplifies the version check. + * + * For kernels later than v4.17.2, workaround is needed as ion can potentially + * end up calling dma_sync_sg_for_* for a dma-buf importer that hasn't mapped + * the attachment. This would result in a kernel panic as ion populates the + * dma_address when the attachment is mapped and kernel derives the physical + * address for cache maintenance from the dma_address. + * With some multi-threaded tests it has been seen that the same dma-buf memory + * gets imported twice on Mali DDK side and so the problem of sync happening + * with an importer having an unmapped attachment comes at the time of 2nd + * import. The same problem can if there is another importer of dma-buf + * memory. + * + * Workaround can be safely disabled for kernels between v5.0.0 and v5.2.2, + * as all the above stated issues are not there. + * + * dma_sync_sg_for_* calls will be made directly as a workaround using the + * Kbase's attachment to dma-buf that was previously mapped. + */ +#define KBASE_MEM_ION_SYNC_WORKAROUND +#endif + +#define IR_THRESHOLD_STEPS (256u) + + +static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, + struct kbase_vmap_struct *map); +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + struct kbase_vmap_struct *map); + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); + +static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/* Retrieve the associated region pointer if the GPU address corresponds to + * one of the event memory pages. The enclosing region, if found, shouldn't + * have been marked as free. + */ +static struct kbase_va_region *kbase_find_event_mem_region( + struct kbase_context *kctx, u64 gpu_addr) +{ + + return NULL; +} + +/** + * kbase_phy_alloc_mapping_init - Initialize the kernel side permanent mapping + * of the physical allocation belonging to a + * region + * @kctx: The kernel base context @reg belongs to. + * @reg: The region whose physical allocation is to be mapped + * @vsize: The size of the requested region, in pages + * @size: The size in pages initially committed to the region + * + * Return: 0 on success, otherwise an error code indicating failure + * + * Maps the physical allocation backing a non-free @reg, so it may be + * accessed directly from the kernel. This is only supported for physical + * allocations of type KBASE_MEM_TYPE_NATIVE, and will fail for other types of + * physical allocation. + * + * The mapping is stored directly in the allocation that backs @reg. The + * refcount is not incremented at this point. Instead, use of the mapping should + * be surrounded by kbase_phy_alloc_mapping_get() and + * kbase_phy_alloc_mapping_put() to ensure it does not disappear whilst the + * client is accessing it. + * + * Both cached and uncached regions are allowed, but any sync operations are the + * responsibility of the client using the permanent mapping. + * + * A number of checks are made to ensure that a region that needs a permanent + * mapping can actually be supported: + * - The region must be created as fully backed + * - The region must not be growable + * + * This function will fail if those checks are not satisfied. + * + * On success, the region will also be forced into a certain kind: + * - It will no longer be growable + */ +static int kbase_phy_alloc_mapping_init(struct kbase_context *kctx, + struct kbase_va_region *reg, size_t vsize, size_t size) +{ + size_t size_bytes = (size << PAGE_SHIFT); + struct kbase_vmap_struct *kern_mapping; + int err = 0; + + /* Can only map in regions that are always fully committed + * Don't setup the mapping twice + * Only support KBASE_MEM_TYPE_NATIVE allocations + */ + if (vsize != size || reg->cpu_alloc->permanent_map != NULL || + reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + return -EINVAL; + + if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - + atomic_read(&kctx->permanent_mapped_pages))) { + dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages", + (u64)size, + KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES, + atomic_read(&kctx->permanent_mapped_pages)); + return -ENOMEM; + } + + kern_mapping = kzalloc(sizeof(*kern_mapping), GFP_KERNEL); + if (!kern_mapping) + return -ENOMEM; + + err = kbase_vmap_phy_pages(kctx, reg, 0u, size_bytes, kern_mapping); + if (err < 0) + goto vmap_fail; + + /* No support for growing or shrinking mapped regions */ + reg->flags &= ~KBASE_REG_GROWABLE; + + reg->cpu_alloc->permanent_map = kern_mapping; + atomic_add(size, &kctx->permanent_mapped_pages); + + return 0; +vmap_fail: + kfree(kern_mapping); + return err; +} + +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + WARN_ON(!alloc->permanent_map); + kbase_vunmap_phy_pages(kctx, alloc->permanent_map); + kfree(alloc->permanent_map); + + alloc->permanent_map = NULL; + + /* Mappings are only done on cpu_alloc, so don't need to worry about + * this being reduced a second time if a separate gpu_alloc is + * freed + */ + WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages)); + atomic_sub(alloc->nents, &kctx->permanent_mapped_pages); +} + +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, + u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping) +{ + struct kbase_va_region *reg; + void *kern_mem_ptr = NULL; + struct kbase_vmap_struct *kern_mapping; + u64 mapping_offset; + + WARN_ON(!kctx); + WARN_ON(!out_kern_mapping); + + kbase_gpu_vm_lock(kctx); + + /* First do a quick lookup in the list of event memory regions */ + reg = kbase_find_event_mem_region(kctx, gpu_addr); + + if (!reg) { + reg = kbase_region_tracker_find_region_enclosing_address( + kctx, gpu_addr); + } + + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + kern_mapping = reg->cpu_alloc->permanent_map; + if (kern_mapping == NULL) + goto out_unlock; + + mapping_offset = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + + /* Refcount the allocations to prevent them disappearing */ + WARN_ON(reg->cpu_alloc != kern_mapping->cpu_alloc); + WARN_ON(reg->gpu_alloc != kern_mapping->gpu_alloc); + (void)kbase_mem_phy_alloc_get(kern_mapping->cpu_alloc); + (void)kbase_mem_phy_alloc_get(kern_mapping->gpu_alloc); + + kern_mem_ptr = (void *)(uintptr_t)((uintptr_t)kern_mapping->addr + mapping_offset); + *out_kern_mapping = kern_mapping; +out_unlock: + kbase_gpu_vm_unlock(kctx); + return kern_mem_ptr; +} + +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + struct kbase_vmap_struct *kern_mapping) +{ + WARN_ON(!kctx); + WARN_ON(!kern_mapping); + + WARN_ON(kctx != kern_mapping->cpu_alloc->imported.native.kctx); + WARN_ON(kern_mapping != kern_mapping->cpu_alloc->permanent_map); + + kbase_mem_phy_alloc_put(kern_mapping->cpu_alloc); + kbase_mem_phy_alloc_put(kern_mapping->gpu_alloc); + + /* kern_mapping and the gpu/cpu phy allocs backing it must not be used + * from now on + */ +} + +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, + u64 *gpu_va) +{ + int zone; + struct kbase_va_region *reg; + struct rb_root *rbtree; + struct device *dev; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(gpu_va); + + dev = kctx->kbdev->dev; + dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n", + va_pages, commit_pages, extent, *flags); + + if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) + *gpu_va = 0; /* return 0 on failure */ + else + dev_err(dev, + "Keeping requested GPU VA of 0x%llx\n", + (unsigned long long)*gpu_va); + + if (!kbase_check_alloc_flags(*flags)) { + dev_warn(dev, + "kbase_mem_alloc called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + +#ifdef CONFIG_DEBUG_FS + if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { + /* Mask coherency flags if infinite cache is enabled to prevent + * the skipping of syncs from BASE side. + */ + *flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED | + BASE_MEM_COHERENT_SYSTEM); + } +#endif + + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); + goto bad_flags; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + + if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent)) + goto bad_sizes; + +#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED + /* Ensure that memory is fully physically-backed. */ + if (*flags & BASE_MEM_GROW_ON_GPF) + commit_pages = va_pages; +#endif + + /* find out which VA zone to use */ + if (*flags & BASE_MEM_SAME_VA) { + rbtree = &kctx->reg_rbtree_same; + zone = KBASE_REG_ZONE_SAME_VA; + } else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { + rbtree = &kctx->reg_rbtree_exec; + zone = KBASE_REG_ZONE_EXEC_VA; + } else { + rbtree = &kctx->reg_rbtree_custom; + zone = KBASE_REG_ZONE_CUSTOM_VA; + } + + reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va), + va_pages, zone); + + if (!reg) { + dev_err(dev, "Failed to allocate free region"); + goto no_region; + } + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + if (kbase_reg_prepare_native(reg, kctx, + base_mem_group_id_get(*flags)) != 0) { + dev_err(dev, "Failed to prepare region"); + goto prepare_failed; + } + + if (*flags & BASE_MEM_GROW_ON_GPF) { + unsigned int const ir_threshold = atomic_read( + &kctx->kbdev->memdev.ir_threshold); + + reg->threshold_pages = ((va_pages * ir_threshold) + + (IR_THRESHOLD_STEPS / 2)) / IR_THRESHOLD_STEPS; + } else + reg->threshold_pages = 0; + + if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { + /* kbase_check_alloc_sizes() already checks extent is valid for + * assigning to reg->extent */ + reg->extent = extent; + } else { + reg->extent = 0; + } + + if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { + dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", + (unsigned long long)commit_pages, + (unsigned long long)va_pages); + goto no_mem; + } + reg->initial_commit = commit_pages; + + kbase_gpu_vm_lock(kctx); + + if (reg->flags & KBASE_REG_PERMANENT_KERNEL_MAPPING) { + /* Permanent kernel mappings must happen as soon as + * reg->cpu_alloc->pages is ready. Currently this happens after + * kbase_alloc_phy_pages(). If we move that to setup pages + * earlier, also move this call too + */ + int err = kbase_phy_alloc_mapping_init(kctx, reg, va_pages, + commit_pages); + if (err < 0) { + kbase_gpu_vm_unlock(kctx); + goto no_kern_mapping; + } + } + + + /* mmap needed to setup VA? */ + if (*flags & BASE_MEM_SAME_VA) { + unsigned long cookie, cookie_nr; + + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { + dev_err(dev, "No cookies available for allocation!"); + kbase_gpu_vm_unlock(kctx); + goto no_cookie; + } + /* return a cookie */ + cookie_nr = find_first_bit(kctx->cookies, BITS_PER_LONG); + bitmap_clear(kctx->cookies, cookie_nr, 1); + BUG_ON(kctx->pending_regions[cookie_nr]); + kctx->pending_regions[cookie_nr] = reg; + + /* relocate to correct base */ + cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE); + cookie <<= PAGE_SHIFT; + + *gpu_va = (u64) cookie; + } else /* we control the VA */ { + if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, 1) != 0) { + dev_warn(dev, "Failed to map memory on GPU"); + kbase_gpu_vm_unlock(kctx); + goto no_mmap; + } + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + kbase_gpu_vm_unlock(kctx); + return reg; + +no_mmap: +no_cookie: +no_kern_mapping: +no_mem: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +invalid_flags: +prepare_failed: + kfree(reg); +no_region: +bad_sizes: +bad_flags: + return NULL; +} +KBASE_EXPORT_TEST_API(kbase_mem_alloc); + +int kbase_mem_query(struct kbase_context *kctx, + u64 gpu_addr, u64 query, u64 * const out) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(out); + + if (gpu_addr & ~PAGE_MASK) { + dev_warn(kctx->kbdev->dev, "mem_query: gpu_addr: passed parameter is invalid"); + return -EINVAL; + } + + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + switch (query) { + case KBASE_MEM_QUERY_COMMIT_SIZE: + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { + *out = kbase_reg_current_backed_size(reg); + } else { + size_t i; + struct kbase_aliased *aliased; + *out = 0; + aliased = reg->cpu_alloc->imported.alias.aliased; + for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) + *out += aliased[i].length; + } + break; + case KBASE_MEM_QUERY_VA_SIZE: + *out = reg->nr_pages; + break; + case KBASE_MEM_QUERY_FLAGS: + { + *out = 0; + if (KBASE_REG_CPU_WR & reg->flags) + *out |= BASE_MEM_PROT_CPU_WR; + if (KBASE_REG_CPU_RD & reg->flags) + *out |= BASE_MEM_PROT_CPU_RD; + if (KBASE_REG_CPU_CACHED & reg->flags) + *out |= BASE_MEM_CACHED_CPU; + if (KBASE_REG_GPU_WR & reg->flags) + *out |= BASE_MEM_PROT_GPU_WR; + if (KBASE_REG_GPU_RD & reg->flags) + *out |= BASE_MEM_PROT_GPU_RD; + if (!(KBASE_REG_GPU_NX & reg->flags)) + *out |= BASE_MEM_PROT_GPU_EX; + if (KBASE_REG_SHARE_BOTH & reg->flags) + *out |= BASE_MEM_COHERENT_SYSTEM; + if (KBASE_REG_SHARE_IN & reg->flags) + *out |= BASE_MEM_COHERENT_LOCAL; + if (kctx->api_version >= KBASE_API_VERSION(11, 2)) { + /* Prior to 11.2, these were known about by user-side + * but we did not return them. Returning some of these + * caused certain clients that were not expecting them + * to fail, so we omit all of them as a special-case + * for compatibility reasons */ + if (KBASE_REG_PF_GROW & reg->flags) + *out |= BASE_MEM_GROW_ON_GPF; + if (KBASE_REG_PROTECTED & reg->flags) + *out |= BASE_MEM_PROTECTED; + } + if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) + *out |= BASE_MEM_TILER_ALIGN_TOP; + if (!(KBASE_REG_GPU_CACHED & reg->flags)) + *out |= BASE_MEM_UNCACHED_GPU; + if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) + *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; + + *out |= base_mem_group_id_set(reg->cpu_alloc->group_id); + + WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE, + "BASE_MEM_FLAGS_QUERYABLE needs updating\n"); + *out &= BASE_MEM_FLAGS_QUERYABLE; + break; + } + default: + *out = 0; + goto out_unlock; + } + + ret = 0; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +/** + * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the + * Ephemeral memory eviction list. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages which can be freed. + */ +static +unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + unsigned long pages = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + + mutex_lock(&kctx->jit_evict_lock); + + list_for_each_entry(alloc, &kctx->evict_list, evict_node) + pages += alloc->nents; + + mutex_unlock(&kctx->jit_evict_lock); + return pages; +} + +/** + * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction + * list for pages and try to reclaim them. + * @s: Shrinker + * @sc: Shrinker control + * + * Return: Number of pages freed (can be less then requested) or -1 if the + * shrinker failed to free pages in its pool. + * + * Note: + * This function accesses region structures without taking the region lock, + * this is required as the OOM killer can call the shrinker after the region + * lock has already been held. + * This is safe as we can guarantee that a region on the eviction list will + * not be freed (kbase_mem_free_region removes the allocation from the list + * before destroying it), or modified by other parts of the driver. + * The eviction list itself is guarded by the eviction lock and the MMU updates + * are protected by their own lock. + */ +static +unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_context *kctx; + struct kbase_mem_phy_alloc *alloc; + struct kbase_mem_phy_alloc *tmp; + unsigned long freed = 0; + + kctx = container_of(s, struct kbase_context, reclaim); + mutex_lock(&kctx->jit_evict_lock); + + list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { + int err; + + err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, + 0, alloc->nents); + if (err != 0) { + /* + * Failed to remove GPU mapping, tell the shrinker + * to stop trying to shrink our slab even though we + * have pages in it. + */ + freed = -1; + goto out_unlock; + } + + /* + * Update alloc->evicted before freeing the backing so the + * helper can determine that it needs to bypass the accounting + * and memory pool. + */ + alloc->evicted = alloc->nents; + + kbase_free_phy_pages_helper(alloc, alloc->evicted); + freed += alloc->evicted; + list_del_init(&alloc->evict_node); + + /* + * Inform the JIT allocator this region has lost backing + * as it might need to free the allocation. + */ + kbase_jit_backing_lost(alloc->reg); + + /* Enough pages have been freed so stop now */ + if (freed > sc->nr_to_scan) + break; + } +out_unlock: + mutex_unlock(&kctx->jit_evict_lock); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_evictable_reclaim_count_objects(s, sc); + + return kbase_mem_evictable_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_evictable_init(struct kbase_context *kctx) +{ + INIT_LIST_HEAD(&kctx->evict_list); + mutex_init(&kctx->jit_evict_lock); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; +#else + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; + kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; +#endif + kctx->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + kctx->reclaim.batch = 0; +#endif + register_shrinker(&kctx->reclaim); + return 0; +} + +void kbase_mem_evictable_deinit(struct kbase_context *kctx) +{ + unregister_shrinker(&kctx->reclaim); +} + +/** + * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. + * @alloc: The physical allocation + */ +void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + int __maybe_unused new_page_count; + + kbase_process_page_usage_dec(kctx, alloc->nents); + new_page_count = atomic_sub_return(alloc->nents, + &kctx->used_pages); + atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); +} + +/** + * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. + * @alloc: The physical allocation + */ +static +void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_context *kctx = alloc->imported.native.kctx; + struct kbase_device *kbdev = kctx->kbdev; + int __maybe_unused new_page_count; + + new_page_count = atomic_add_return(alloc->nents, + &kctx->used_pages); + atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages); + + /* Increase mm counters so that the allocation is accounted for + * against the process and thus is visible to the OOM killer, + */ + kbase_process_page_usage_inc(kctx, alloc->nents); + + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + kctx->id, + (u64)new_page_count); +} + +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.native.kctx; + + lockdep_assert_held(&kctx->reg_lock); + + kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, + 0, gpu_alloc->nents); + + mutex_lock(&kctx->jit_evict_lock); + /* This allocation can't already be on a list. */ + WARN_ON(!list_empty(&gpu_alloc->evict_node)); + + /* + * Add the allocation to the eviction list, after this point the shrink + * can reclaim it. + */ + list_add(&gpu_alloc->evict_node, &kctx->evict_list); + mutex_unlock(&kctx->jit_evict_lock); + kbase_mem_evictable_mark_reclaim(gpu_alloc); + + gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; + return 0; +} + +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) +{ + struct kbase_context *kctx = gpu_alloc->imported.native.kctx; + int err = 0; + + lockdep_assert_held(&kctx->reg_lock); + + mutex_lock(&kctx->jit_evict_lock); + /* + * First remove the allocation from the eviction list as it's no + * longer eligible for eviction. + */ + list_del_init(&gpu_alloc->evict_node); + mutex_unlock(&kctx->jit_evict_lock); + + if (gpu_alloc->evicted == 0) { + /* + * The backing is still present, update the VM stats as it's + * in use again. + */ + kbase_mem_evictable_unmark_reclaim(gpu_alloc); + } else { + /* If the region is still alive ... */ + if (gpu_alloc->reg) { + /* ... allocate replacement backing ... */ + err = kbase_alloc_phy_pages_helper(gpu_alloc, + gpu_alloc->evicted); + + /* + * ... and grow the mapping back to its + * pre-eviction size. + */ + if (!err) + err = kbase_mem_grow_gpu_mapping(kctx, + gpu_alloc->reg, + gpu_alloc->evicted, 0); + + gpu_alloc->evicted = 0; + } + } + + /* If the region is still alive remove the DONT_NEED attribute. */ + if (gpu_alloc->reg) + gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; + + return (err == 0); +} + +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + unsigned int real_flags = 0; + unsigned int new_flags = 0; + bool prev_needed, new_needed; + + KBASE_DEBUG_ASSERT(kctx); + + if (!gpu_addr) + return -EINVAL; + + if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) + return -EINVAL; + + /* nuke other bits */ + flags &= mask; + + /* check for only supported flags */ + if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) + goto out; + + /* mask covers bits we don't support? */ + if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) + goto out; + + /* convert flags */ + if (BASE_MEM_COHERENT_SYSTEM & flags) + real_flags |= KBASE_REG_SHARE_BOTH; + else if (BASE_MEM_COHERENT_LOCAL & flags) + real_flags |= KBASE_REG_SHARE_IN; + + /* now we can lock down the context, and find the region */ + down_write(¤t->mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + /* Is the region being transitioning between not needed and needed? */ + prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; + new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; + if (prev_needed != new_needed) { + /* Aliased allocations can't be made ephemeral */ + if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) + goto out_unlock; + + if (new_needed) { + /* Only native allocations can be marked not needed */ + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + ret = -EINVAL; + goto out_unlock; + } + ret = kbase_mem_evictable_make(reg->gpu_alloc); + if (ret) + goto out_unlock; + } else { + kbase_mem_evictable_unmake(reg->gpu_alloc); + } + } + + /* limit to imported memory */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) + goto out_unlock; + + /* shareability flags are ignored for GPU uncached memory */ + if (!(reg->flags & KBASE_REG_GPU_CACHED)) { + ret = 0; + goto out_unlock; + } + + /* no change? */ + if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { + ret = 0; + goto out_unlock; + } + + new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + new_flags |= real_flags; + + /* Currently supporting only imported memory */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) { + ret = -EINVAL; + goto out_unlock; + } + + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + /* Future use will use the new flags, existing mapping + * will NOT be updated as memory should not be in use + * by the GPU when updating the flags. + */ + WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); + ret = 0; + } else if (reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + /* + * When CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is not enabled the + * dma-buf GPU mapping should always be present, check that + * this is the case and warn and skip the page table update if + * not. + * + * Then update dma-buf GPU mapping with the new flags. + * + * Note: The buffer must not be in use on the GPU when + * changing flags. If the buffer is in active use on + * the GPU, there is a risk that the GPU may trigger a + * shareability fault, as it will see the same + * addresses from buffer with different shareability + * properties. + */ + dev_dbg(kctx->kbdev->dev, + "Updating page tables on mem flag change\n"); + ret = kbase_mmu_update_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + new_flags, + reg->gpu_alloc->group_id); + if (ret) + dev_warn(kctx->kbdev->dev, + "Failed to update GPU page tables on flag change: %d\n", + ret); + } else + WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count); + + /* If everything is good, then set the new flags on the region. */ + if (!ret) + reg->flags = new_flags; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + up_write(¤t->mm->mmap_sem); +out: + return ret; +} + +#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) + +int kbase_mem_do_sync_imported(struct kbase_context *kctx, + struct kbase_va_region *reg, enum kbase_sync_type sync_fn) +{ + int ret = -EINVAL; + struct dma_buf *dma_buf; + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + + lockdep_assert_held(&kctx->reg_lock); + + /* We assume that the same physical allocation object is used for both + * GPU and CPU for imported buffers. + */ + WARN_ON(reg->cpu_alloc != reg->gpu_alloc); + + /* Currently only handle dma-bufs */ + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) + return ret; + /* + * Attempting to sync with CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND + * enabled can expose us to a Linux Kernel issue between v4.6 and + * v4.19. We will not attempt to support cache syncs on dma-bufs that + * are mapped on demand (i.e. not on import), even on pre-4.6, neither + * on 4.20 or newer kernels, because this makes it difficult for + * userspace to know when they can rely on the cache sync. + * Instead, only support syncing when we always map dma-bufs on import, + * or if the particular buffer is mapped right now. + */ + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND) && + !reg->gpu_alloc->imported.umm.current_mapping_usage_count) + return ret; + + dma_buf = reg->gpu_alloc->imported.umm.dma_buf; + + switch (sync_fn) { + case KBASE_SYNC_TO_DEVICE: + dev_dbg(kctx->kbdev->dev, + "Syncing imported buffer at GPU VA %llx to GPU\n", + reg->start_pfn); +#ifdef KBASE_MEM_ION_SYNC_WORKAROUND + if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { + struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; + struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; + + dma_sync_sg_for_device(attachment->dev, sgt->sgl, + sgt->nents, dir); + ret = 0; + } +#else + /* Though the below version check could be superfluous depending upon the version condition + * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow + * ease of modification for non-ION systems or systems where ION has been patched. + */ +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) + dma_buf_end_cpu_access(dma_buf, + 0, dma_buf->size, + dir); + ret = 0; +#else + ret = dma_buf_end_cpu_access(dma_buf, + dir); +#endif +#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ + break; + case KBASE_SYNC_TO_CPU: + dev_dbg(kctx->kbdev->dev, + "Syncing imported buffer at GPU VA %llx to CPU\n", + reg->start_pfn); +#ifdef KBASE_MEM_ION_SYNC_WORKAROUND + if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) { + struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment; + struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt; + + dma_sync_sg_for_cpu(attachment->dev, sgt->sgl, + sgt->nents, dir); + ret = 0; + } +#else + ret = dma_buf_begin_cpu_access(dma_buf, +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) + 0, dma_buf->size, +#endif + dir); +#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ + break; + }; + + if (unlikely(ret)) + dev_warn(kctx->kbdev->dev, + "Failed to sync mem region %pK at GPU VA %llx: %d\n", + reg, reg->start_pfn, ret); + + return ret; +} + +/** + * kbase_mem_umm_unmap_attachment - Unmap dma-buf attachment + * @kctx: Pointer to kbase context + * @alloc: Pointer to allocation with imported dma-buf memory to unmap + * + * This will unmap a dma-buf. Must be called after the GPU page tables for the + * region have been torn down. + */ +static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct tagged_addr *pa = alloc->pages; + + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + + memset(pa, 0xff, sizeof(*pa) * alloc->nents); + alloc->nents = 0; +} + +/** + * kbase_mem_umm_map_attachment - Prepare attached dma-buf for GPU mapping + * @kctx: Pointer to kbase context + * @reg: Pointer to region with imported dma-buf memory to map + * + * Map the dma-buf and prepare the page array with the tagged Mali physical + * addresses for GPU mapping. + * + * Return: 0 on success, or negative error code + */ +static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + struct tagged_addr *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + + WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM); + WARN_ON_ONCE(alloc->imported.umm.sgt); + + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, + DMA_BIDIRECTIONAL); + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + size_t j, pages = PFN_UP(sg_dma_len(s)); + + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) + *pa++ = as_tagged(sg_dma_address(s) + + (j << PAGE_SHIFT)); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (!(reg->flags & KBASE_REG_IMPORT_PAD) && + WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { + err = -EINVAL; + goto err_unmap_attachment; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + + return 0; + +err_unmap_attachment: + kbase_mem_umm_unmap_attachment(kctx, alloc); + + return err; +} + +int kbase_mem_umm_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + int err; + struct kbase_mem_phy_alloc *alloc; + unsigned long gwt_mask = ~0; + + lockdep_assert_held(&kctx->reg_lock); + + alloc = reg->gpu_alloc; + + alloc->imported.umm.current_mapping_usage_count++; + if (alloc->imported.umm.current_mapping_usage_count != 1) { + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || + alloc->imported.umm.need_sync) { + if (!kbase_is_region_invalid_or_free(reg)) { + err = kbase_mem_do_sync_imported(kctx, reg, + KBASE_SYNC_TO_DEVICE); + WARN_ON_ONCE(err); + } + } + return 0; + } + + err = kbase_mem_umm_map_attachment(kctx, reg); + if (err) + goto bad_map_attachment; + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) + gwt_mask = ~KBASE_REG_GPU_WR; +#endif + + err = kbase_mmu_insert_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, + kctx->as_nr, + alloc->group_id); + if (err) + goto bad_insert; + + if (reg->flags & KBASE_REG_IMPORT_PAD && + !WARN_ON(reg->nr_pages < alloc->nents)) { + /* For padded imported dma-buf memory, map the dummy aliasing + * page from the end of the dma-buf pages, to the end of the + * region using a read only mapping. + * + * Assume alloc->nents is the number of actual pages in the + * dma-buf memory. + */ + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + alloc->nents, + kctx->aliasing_sink_page, + reg->nr_pages - alloc->nents, + (reg->flags | KBASE_REG_GPU_RD) & + ~KBASE_REG_GPU_WR, + KBASE_MEM_GROUP_SINK); + if (err) + goto bad_pad_insert; + } + + return 0; + +bad_pad_insert: + kbase_mmu_teardown_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + alloc->nents, + kctx->as_nr); +bad_insert: + kbase_mem_umm_unmap_attachment(kctx, alloc); +bad_map_attachment: + alloc->imported.umm.current_mapping_usage_count--; + + return err; +} + +void kbase_mem_umm_unmap(struct kbase_context *kctx, + struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) +{ + alloc->imported.umm.current_mapping_usage_count--; + if (alloc->imported.umm.current_mapping_usage_count) { + if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT) || + alloc->imported.umm.need_sync) { + if (!kbase_is_region_invalid_or_free(reg)) { + int err = kbase_mem_do_sync_imported(kctx, reg, + KBASE_SYNC_TO_CPU); + WARN_ON_ONCE(err); + } + } + return; + } + + if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { + int err; + + err = kbase_mmu_teardown_pages(kctx->kbdev, + &kctx->mmu, + reg->start_pfn, + reg->nr_pages, + kctx->as_nr); + WARN_ON(err); + } + + kbase_mem_umm_unmap_attachment(kctx, alloc); +} + +static int get_umm_memory_group_id(struct kbase_context *kctx, + struct dma_buf *dma_buf) +{ + int group_id = BASE_MEM_GROUP_DEFAULT; + + if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) { + struct memory_group_manager_import_data mgm_import_data; + + mgm_import_data.type = + MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF; + mgm_import_data.u.dma_buf = dma_buf; + + group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id( + kctx->kbdev->mgm_dev, &mgm_import_data); + } + + return group_id; +} + +/** + * kbase_mem_from_umm - Import dma-buf memory into kctx + * @kctx: Pointer to kbase context to import memory into + * @fd: File descriptor of dma-buf to import + * @va_pages: Pointer where virtual size of the region will be output + * @flags: Pointer to memory flags + * @padding: Number of read only padding pages to be inserted at the end of the + * GPU mapping of the dma-buf + * + * Return: Pointer to new kbase_va_region object of the imported dma-buf, or + * NULL on error. + * + * This function imports a dma-buf into kctx, and created a kbase_va_region + * object that wraps the dma-buf. + */ +static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, + int fd, u64 *va_pages, u64 *flags, u32 padding) +{ + struct kbase_va_region *reg; + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + bool shared_zone = false; + bool need_sync = false; + int group_id; + + /* 64-bit address range is the max */ + if (*va_pages > (U64_MAX / PAGE_SIZE)) + return NULL; + + dma_buf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dma_buf)) + return NULL; + + dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); + if (IS_ERR_OR_NULL(dma_attachment)) { + dma_buf_put(dma_buf); + return NULL; + } + + *va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding; + if (!*va_pages) { + dma_buf_detach(dma_buf, dma_attachment); + dma_buf_put(dma_buf); + return NULL; + } + + /* ignore SAME_VA */ + *flags &= ~BASE_MEM_SAME_VA; + + /* + * Force CPU cached flag. + * + * We can't query the dma-buf exporter to get details about the CPU + * cache attributes of CPU mappings, so we have to assume that the + * buffer may be cached, and call into the exporter for cache + * maintenance, and rely on the exporter to do the right thing when + * handling our calls. + */ + *flags |= BASE_MEM_CACHED_CPU; + + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + need_sync = true; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, + 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + } else { + reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) { + dma_buf_detach(dma_buf, dma_attachment); + dma_buf_put(dma_buf); + return NULL; + } + + group_id = get_umm_memory_group_id(kctx, dma_buf); + + reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages, + KBASE_MEM_TYPE_IMPORTED_UMM, group_id); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto error_out; + + /* No pages to map yet */ + reg->gpu_alloc->nents = 0; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ + + if (*flags & BASE_MEM_PROTECTED) + reg->flags |= KBASE_REG_PROTECTED; + + if (padding) + reg->flags |= KBASE_REG_IMPORT_PAD; + + reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; + reg->gpu_alloc->imported.umm.sgt = NULL; + reg->gpu_alloc->imported.umm.dma_buf = dma_buf; + reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; + reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; + reg->gpu_alloc->imported.umm.need_sync = need_sync; + reg->extent = 0; + + if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { + int err; + + reg->gpu_alloc->imported.umm.current_mapping_usage_count = 1; + + err = kbase_mem_umm_map_attachment(kctx, reg); + if (err) { + dev_warn(kctx->kbdev->dev, + "Failed to map dma-buf %pK on GPU: %d\n", + dma_buf, err); + goto error_out; + } + + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } + + return reg; + +error_out: + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kbase_mem_phy_alloc_put(reg->cpu_alloc); +no_alloc: + kfree(reg); + + return NULL; +} + +u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev) +{ + u32 cpu_cache_line_size = cache_line_size(); + u32 gpu_cache_line_size = + (1UL << kbdev->gpu_props.props.l2_props.log2_line_size); + + return ((cpu_cache_line_size > gpu_cache_line_size) ? + cpu_cache_line_size : + gpu_cache_line_size); +} + +static struct kbase_va_region *kbase_mem_from_user_buffer( + struct kbase_context *kctx, unsigned long address, + unsigned long size, u64 *va_pages, u64 *flags) +{ + long i; + struct kbase_va_region *reg; + struct rb_root *rbtree; + long faulted_pages; + int zone = KBASE_REG_ZONE_CUSTOM_VA; + bool shared_zone = false; + u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); + struct kbase_alloc_import_user_buf *user_buf; + struct page **pages = NULL; + + /* Flag supported only for dma-buf imported memory */ + if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) + return NULL; + + if ((address & (cache_line_alignment - 1)) != 0 || + (size & (cache_line_alignment - 1)) != 0) { + if (*flags & BASE_MEM_UNCACHED_GPU) { + dev_warn(kctx->kbdev->dev, + "User buffer is not cache line aligned and marked as GPU uncached\n"); + goto bad_size; + } + + /* Coherency must be enabled to handle partial cache lines */ + if (*flags & (BASE_MEM_COHERENT_SYSTEM | + BASE_MEM_COHERENT_SYSTEM_REQUIRED)) { + /* Force coherent system required flag, import will + * then fail if coherency isn't available + */ + *flags |= BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } else { + dev_warn(kctx->kbdev->dev, + "User buffer is not cache line aligned and no coherency enabled\n"); + goto bad_size; + } + } + + *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - + PFN_DOWN(address); + if (!*va_pages) + goto bad_size; + + if (*va_pages > (UINT64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* SAME_VA generally not supported with imported memory (no known use cases) */ + *flags &= ~BASE_MEM_SAME_VA; + + if (*flags & BASE_MEM_IMPORT_SHARED) + shared_zone = true; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* + * 64-bit tasks require us to reserve VA on the CPU that we use + * on the GPU. + */ + shared_zone = true; + } +#endif + + if (shared_zone) { + *flags |= BASE_MEM_NEED_MMAP; + zone = KBASE_REG_ZONE_SAME_VA; + rbtree = &kctx->reg_rbtree_same; + } else + rbtree = &kctx->reg_rbtree_custom; + + reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone); + + if (!reg) + goto no_region; + + reg->gpu_alloc = kbase_alloc_create( + kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF, + BASE_MEM_GROUP_DEFAULT); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* User-buffers are always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* Cannot be grown */ + + user_buf = ®->gpu_alloc->imported.user_buf; + + user_buf->size = size; + user_buf->address = address; + user_buf->nr_pages = *va_pages; + user_buf->mm = current->mm; +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + atomic_inc(¤t->mm->mm_count); +#else + mmgrab(current->mm); +#endif + if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + user_buf->pages = vmalloc(*va_pages * sizeof(struct page *)); + else + user_buf->pages = kmalloc_array(*va_pages, + sizeof(struct page *), GFP_KERNEL); + + if (!user_buf->pages) + goto no_page_array; + + /* If the region is coherent with the CPU then the memory is imported + * and mapped onto the GPU immediately. + * Otherwise get_user_pages is called as a sanity check, but with + * NULL as the pages argument which will fault the pages, but not + * pin them. The memory will then be pinned only around the jobs that + * specify the region as an external resource. + */ + if (reg->flags & KBASE_REG_SHARE_BOTH) { + pages = user_buf->pages; + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + } + + down_read(¤t->mm->mmap_sem); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + faulted_pages = get_user_pages(current, current->mm, address, *va_pages, +#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ +KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#else + reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#endif +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) + faulted_pages = get_user_pages(address, *va_pages, + reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); +#else + faulted_pages = get_user_pages(address, *va_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL); +#endif + + up_read(¤t->mm->mmap_sem); + + if (faulted_pages != *va_pages) + goto fault_mismatch; + + reg->gpu_alloc->nents = 0; + reg->extent = 0; + + if (pages) { + struct device *dev = kctx->kbdev->dev; + unsigned long local_size = user_buf->size; + unsigned long offset = user_buf->address & ~PAGE_MASK; + struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg); + + /* Top bit signifies that this was pinned on import */ + user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT; + + for (i = 0; i < faulted_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind_dma_map; + + user_buf->dma_addrs[i] = dma_addr; + pa[i] = as_tagged(page_to_phys(pages[i])); + + local_size -= min; + offset = 0; + } + + reg->gpu_alloc->nents = faulted_pages; + } + + return reg; + +unwind_dma_map: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + user_buf->dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + } +fault_mismatch: + if (pages) { + for (i = 0; i < faulted_pages; i++) + put_page(pages[i]); + } +no_page_array: +invalid_flags: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + return NULL; + +} + + +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + u64 nents, struct base_mem_aliasing_info *ai, + u64 *num_pages) +{ + struct kbase_va_region *reg; + u64 gpu_va; + size_t i; + bool coherent; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(ai); + KBASE_DEBUG_ASSERT(num_pages); + + /* mask to only allowed flags */ + *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | + BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED); + + if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_alias called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + + if (!stride) + goto bad_stride; + + if (!nents) + goto bad_nents; + + if ((nents * stride) > (U64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* calculate the number of pages this alias will cover */ + *num_pages = nents * stride; + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, + *num_pages, + KBASE_REG_ZONE_SAME_VA); + } else { +#else + if (1) { +#endif + reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, + 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) + goto no_reg; + + /* zero-sized page array, as we don't need one/can support one */ + reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS, + BASE_MEM_GROUP_DEFAULT); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + if (kbase_update_region_flags(kctx, reg, *flags) != 0) + goto invalid_flags; + + reg->gpu_alloc->imported.alias.nents = nents; + reg->gpu_alloc->imported.alias.stride = stride; + reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); + if (!reg->gpu_alloc->imported.alias.aliased) + goto no_aliased_array; + + kbase_gpu_vm_lock(kctx); + + /* validate and add src handles */ + for (i = 0; i < nents; i++) { + if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle.basep.handle != + BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) + goto bad_handle; /* unsupported magic handle */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + } else { + struct kbase_va_region *aliasing_reg; + struct kbase_mem_phy_alloc *alloc; + + aliasing_reg = kbase_region_tracker_find_region_base_address( + kctx, + (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); + + /* validate found region */ + if (kbase_is_region_invalid_or_free(aliasing_reg)) + goto bad_handle; /* Not found/already free */ + if (aliasing_reg->flags & KBASE_REG_DONT_NEED) + goto bad_handle; /* Ephemeral region */ + if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) + goto bad_handle; /* GPU uncached memory */ + if (!aliasing_reg->gpu_alloc) + goto bad_handle; /* No alloc */ + if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto bad_handle; /* Not a native alloc */ + if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) + goto bad_handle; + /* Non-coherent memory cannot alias + coherent memory, and vice versa.*/ + + /* check size against stride */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + + alloc = aliasing_reg->gpu_alloc; + + /* check against the alloc's size */ + if (ai[i].offset > alloc->nents) + goto bad_handle; /* beyond end */ + if (ai[i].offset + ai[i].length > alloc->nents) + goto bad_handle; /* beyond end */ + + reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; + } + } + +#ifdef CONFIG_64BIT + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { + dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); + goto no_cookie; + } + /* return a cookie */ + gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); + bitmap_clear(kctx->cookies, gpu_va, 1); + BUG_ON(kctx->pending_regions[gpu_va]); + kctx->pending_regions[gpu_va] = reg; + + /* relocate to correct base */ + gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + gpu_va <<= PAGE_SHIFT; + } else /* we control the VA */ { +#else + if (1) { +#endif + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { + dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); + goto no_mmap; + } + /* return real GPU VA */ + gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + reg->flags &= ~KBASE_REG_FREE; + reg->flags &= ~KBASE_REG_GROWABLE; + + kbase_gpu_vm_unlock(kctx); + + return gpu_va; + +#ifdef CONFIG_64BIT +no_cookie: +#endif +no_mmap: +bad_handle: + kbase_gpu_vm_unlock(kctx); +no_aliased_array: +invalid_flags: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_reg: +bad_size: +bad_nents: +bad_stride: +bad_flags: + return 0; +} + +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, + u64 *flags) +{ + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(gpu_va); + KBASE_DEBUG_ASSERT(va_pages); + KBASE_DEBUG_ASSERT(flags); + + if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && + kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) + *flags |= BASE_MEM_SAME_VA; + + if (!kbase_check_import_flags(*flags)) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_import called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + + if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 && + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) { + /* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_import call required coherent mem when unavailable"); + goto bad_flags; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && + !kbase_device_is_cpu_coherent(kctx->kbdev)) { + /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + + if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) { + dev_warn(kctx->kbdev->dev, + "padding is only supported for UMM"); + goto bad_flags; + } + + switch (type) { + case BASE_MEM_IMPORT_TYPE_UMM: { + int fd; + + if (get_user(fd, (int __user *)phandle)) + reg = NULL; + else + reg = kbase_mem_from_umm(kctx, fd, va_pages, flags, + padding); + } + break; + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + struct base_mem_import_user_buffer user_buffer; + void __user *uptr; + + if (copy_from_user(&user_buffer, phandle, + sizeof(user_buffer))) { + reg = NULL; + } else { +#ifdef CONFIG_COMPAT + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) + uptr = compat_ptr(user_buffer.ptr); + else +#endif + uptr = u64_to_user_ptr(user_buffer.ptr); + + reg = kbase_mem_from_user_buffer(kctx, + (unsigned long)uptr, user_buffer.length, + va_pages, flags); + } + break; + } + default: { + reg = NULL; + break; + } + } + + if (!reg) + goto no_reg; + + kbase_gpu_vm_lock(kctx); + + /* mmap needed to setup VA? */ + if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { + /* Bind to a cookie */ + if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) + goto no_cookie; + /* return a cookie */ + *gpu_va = find_first_bit(kctx->cookies, BITS_PER_LONG); + bitmap_clear(kctx->cookies, *gpu_va, 1); + BUG_ON(kctx->pending_regions[*gpu_va]); + kctx->pending_regions[*gpu_va] = reg; + + /* relocate to correct base */ + *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + *gpu_va <<= PAGE_SHIFT; + + } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { + /* we control the VA, mmap now to the GPU */ + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } else { + /* we control the VA, but nothing to mmap yet */ + if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + /* clear out private flags */ + *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); + + kbase_gpu_vm_unlock(kctx); + + return 0; + +no_gpu_va: +no_cookie: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kfree(reg); +no_reg: +bad_flags: + *gpu_va = 0; + *va_pages = 0; + *flags = 0; + return -ENOMEM; +} + +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + struct tagged_addr *phy_pages; + u64 delta = new_pages - old_pages; + int ret = 0; + + lockdep_assert_held(&kctx->reg_lock); + + /* Map the new pages into the GPU */ + phy_pages = kbase_get_gpu_phy_pages(reg); + ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn + old_pages, phy_pages + old_pages, delta, + reg->flags, kctx->as_nr, reg->gpu_alloc->group_id); + + return ret; +} + +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages) +{ + u64 gpu_va_start = reg->start_pfn; + + if (new_pages == old_pages) + /* Nothing to do */ + return; + + unmap_mapping_range(kctx->filp->f_inode->i_mapping, + (gpu_va_start + new_pages)<kbdev, &kctx->mmu, + reg->start_pfn + new_pages, delta, kctx->as_nr); + + return ret; +} + +int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) +{ + u64 old_pages; + u64 delta; + int res = -EINVAL; + struct kbase_va_region *reg; + bool read_locked = false; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(gpu_addr != 0); + + if (gpu_addr & ~PAGE_MASK) { + dev_warn(kctx->kbdev->dev, "kbase:mem_commit: gpu_addr: passed parameter is invalid"); + return -EINVAL; + } + + down_write(¤t->mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto out_unlock; + + if (0 == (reg->flags & KBASE_REG_GROWABLE)) + goto out_unlock; + + /* Would overflow the VA region */ + if (new_pages > reg->nr_pages) + goto out_unlock; + + /* can't be mapped more than once on the GPU */ + if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) + goto out_unlock; + /* can't grow regions which are ephemeral */ + if (reg->flags & KBASE_REG_DONT_NEED) + goto out_unlock; + +#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED + /* Reject resizing commit size */ + if (reg->flags & KBASE_REG_PF_GROW) + new_pages = reg->nr_pages; +#endif + + if (new_pages == reg->gpu_alloc->nents) { + /* no change */ + res = 0; + goto out_unlock; + } + + old_pages = kbase_reg_current_backed_size(reg); + if (new_pages > old_pages) { + delta = new_pages - old_pages; + + /* + * No update to the mm so downgrade the writer lock to a read + * lock so other readers aren't blocked after this point. + */ + downgrade_write(¤t->mm->mmap_sem); + read_locked = true; + + /* Allocate some more pages */ + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { + res = -ENOMEM; + goto out_unlock; + } + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper( + reg->gpu_alloc, delta) != 0) { + res = -ENOMEM; + kbase_free_phy_pages_helper(reg->cpu_alloc, + delta); + goto out_unlock; + } + } + + /* No update required for CPU mappings, that's done on fault. */ + + /* Update GPU mapping. */ + res = kbase_mem_grow_gpu_mapping(kctx, reg, + new_pages, old_pages); + + /* On error free the new pages */ + if (res) { + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, + delta); + res = -ENOMEM; + goto out_unlock; + } + } else { + res = kbase_mem_shrink(kctx, reg, new_pages); + if (res) + res = -ENOMEM; + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); + if (read_locked) + up_read(¤t->mm->mmap_sem); + else + up_write(¤t->mm->mmap_sem); + + return res; +} + +int kbase_mem_shrink(struct kbase_context *const kctx, + struct kbase_va_region *const reg, u64 const new_pages) +{ + u64 delta, old_pages; + int err; + + lockdep_assert_held(&kctx->reg_lock); + + if (WARN_ON(!kctx)) + return -EINVAL; + + if (WARN_ON(!reg)) + return -EINVAL; + + old_pages = kbase_reg_current_backed_size(reg); + if (WARN_ON(old_pages < new_pages)) + return -EINVAL; + + delta = old_pages - new_pages; + + /* Update the GPU mapping */ + err = kbase_mem_shrink_gpu_mapping(kctx, reg, + new_pages, old_pages); + if (err >= 0) { + /* Update all CPU mapping(s) */ + kbase_mem_shrink_cpu_mapping(kctx, reg, + new_pages, old_pages); + + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } + + return err; +} + + +static void kbase_cpu_vm_open(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + /* non-atomic as we're under Linux' mm lock */ + map->count++; +} + +static void kbase_cpu_vm_close(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + + /* non-atomic as we're under Linux' mm lock */ + if (--map->count) + return; + + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + kbase_gpu_vm_lock(map->kctx); + + if (map->free_on_close) { + KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_SAME_VA); + /* Avoid freeing memory on the process death which results in + * GPU Page Fault. Memory will be freed in kbase_destroy_context + */ + if (!(current->flags & PF_EXITING)) + kbase_mem_free_region(map->kctx, map->region); + } + + list_del(&map->mappings_list); + + kbase_va_region_alloc_put(map->kctx, map->region); + kbase_gpu_vm_unlock(map->kctx); + + kbase_mem_phy_alloc_put(map->alloc); + kfree(map); +} + +static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma, + struct kbase_va_region *reg, + pgoff_t *start_off, + size_t nr_pages) +{ + struct kbase_aliased *aliased = + reg->cpu_alloc->imported.alias.aliased; + + if (!reg->cpu_alloc->imported.alias.stride || + reg->nr_pages < (*start_off + nr_pages)) { + return NULL; + } + + while (*start_off >= reg->cpu_alloc->imported.alias.stride) { + aliased++; + *start_off -= reg->cpu_alloc->imported.alias.stride; + } + + if (!aliased->alloc) { + /* sink page not available for dumping map */ + return NULL; + } + + if ((*start_off + nr_pages) > aliased->length) { + /* not fully backed by physical pages */ + return NULL; + } + + return aliased; +} + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ +#else +static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#endif + struct kbase_cpu_mapping *map = vma->vm_private_data; + pgoff_t map_start_pgoff; + pgoff_t fault_pgoff; + size_t i; + pgoff_t addr; + size_t nents; + struct tagged_addr *pages; + vm_fault_t ret = VM_FAULT_SIGBUS; + struct memory_group_manager_device *mgm_dev; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; + + kbase_gpu_vm_lock(map->kctx); + if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { + struct kbase_aliased *aliased = + get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); + + if (!aliased) + goto exit; + + nents = aliased->length; + pages = aliased->alloc->pages + aliased->offset; + } else { + nents = map->alloc->nents; + pages = map->alloc->pages; + } + + fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff); + + if (fault_pgoff >= nents) + goto exit; + + /* Fault on access to DONT_NEED regions */ + if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) + goto exit; + + /* We are inserting all valid pages from the start of CPU mapping and + * not from the fault location (the mmap handler was previously doing + * the same). + */ + i = map_start_pgoff; + addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT); + mgm_dev = map->kctx->kbdev->mgm_dev; + while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) { + + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, + map->alloc->group_id, vma, addr << PAGE_SHIFT, + PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot); + + if (ret != VM_FAULT_NOPAGE) + goto exit; + + i++; addr++; + } + +exit: + kbase_gpu_vm_unlock(map->kctx); + return ret; +} + +const struct vm_operations_struct kbase_vm_ops = { + .open = kbase_cpu_vm_open, + .close = kbase_cpu_vm_close, + .fault = kbase_cpu_vm_fault +}; + +static int kbase_cpu_mmap(struct kbase_context *kctx, + struct kbase_va_region *reg, + struct vm_area_struct *vma, + void *kaddr, + size_t nr_pages, + unsigned long aligned_offset, + int free_on_close) +{ + struct kbase_cpu_mapping *map; + int err = 0; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + + if (!map) { + WARN_ON(1); + err = -ENOMEM; + goto out; + } + + /* + * VM_DONTCOPY - don't make this mapping available in fork'ed processes + * VM_DONTEXPAND - disable mremap on this region + * VM_IO - disables paging + * VM_DONTDUMP - Don't include in core dumps (3.7 only) + * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. + * This is needed to support using the dedicated and + * the OS based memory backends together. + */ + /* + * This will need updating to propagate coherency flags + * See MIDBASE-1057 + */ + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_ops; + vma->vm_private_data = map; + + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) { + pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn + + (aligned_offset >> PAGE_SHIFT); + struct kbase_aliased *aliased = + get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages); + + if (!aliased) { + err = -EINVAL; + kfree(map); + goto out; + } + } + + if (!(reg->flags & KBASE_REG_CPU_CACHED) && + (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { + /* We can't map vmalloc'd memory uncached. + * Other memory will have been returned from + * kbase_mem_pool which would be + * suitable for mapping uncached. + */ + BUG_ON(kaddr); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + } + + if (!kaddr) { + vma->vm_flags |= VM_PFNMAP; + } else { + WARN_ON(aligned_offset); + /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ + vma->vm_flags |= VM_MIXEDMAP; + /* vmalloc remaping is easy... */ + err = remap_vmalloc_range(vma, kaddr, 0); + WARN_ON(err); + } + + if (err) { + kfree(map); + goto out; + } + + map->region = kbase_va_region_alloc_get(kctx, reg); + map->free_on_close = free_on_close; + map->kctx = kctx; + map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + map->count = 1; /* start with one ref */ + + if (reg->flags & KBASE_REG_CPU_CACHED) + map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + list_add(&map->mappings_list, &map->alloc->mappings); + + out: + return err; +} + +#ifdef CONFIG_MALI_VECTOR_DUMP +static void kbase_free_unused_jit_allocations(struct kbase_context *kctx) +{ + /* Free all cached/unused JIT allocations as their contents are not + * really needed for the replay. The GPU writes to them would already + * have been captured through the GWT mechanism. + * This considerably reduces the size of mmu-snapshot-file and it also + * helps avoid segmentation fault issue during vector dumping of + * complex contents when the unused JIT allocations are accessed to + * dump their contents (as they appear in the page tables snapshot) + * but they got freed by the shrinker under low memory scenarios + * (which do occur with complex contents). + */ + while (kbase_jit_evict(kctx)) + ; +} +#endif + +static int kbase_mmu_dump_mmap(struct kbase_context *kctx, + struct vm_area_struct *vma, + struct kbase_va_region **const reg, + void **const kmap_addr) +{ + struct kbase_va_region *new_reg; + void *kaddr; + u32 nr_pages; + size_t size; + int err = 0; + + dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); + size = (vma->vm_end - vma->vm_start); + nr_pages = size >> PAGE_SHIFT; + +#ifdef CONFIG_MALI_VECTOR_DUMP + kbase_free_unused_jit_allocations(kctx); +#endif + + kaddr = kbase_mmu_dump(kctx, nr_pages); + + if (!kaddr) { + err = -ENOMEM; + goto out; + } + + new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages, + KBASE_REG_ZONE_SAME_VA); + if (!new_reg) { + err = -ENOMEM; + WARN_ON(1); + goto out; + } + + new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW, + BASE_MEM_GROUP_DEFAULT); + if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { + err = -ENOMEM; + new_reg->cpu_alloc = NULL; + WARN_ON(1); + goto out_no_alloc; + } + + new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); + + new_reg->flags &= ~KBASE_REG_FREE; + new_reg->flags |= KBASE_REG_CPU_CACHED; + if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { + err = -ENOMEM; + WARN_ON(1); + goto out_va_region; + } + + *kmap_addr = kaddr; + *reg = new_reg; + + dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); + return 0; + +out_no_alloc: +out_va_region: + kbase_free_alloced_region(new_reg); +out: + return err; +} + + +void kbase_os_mem_map_lock(struct kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + down_read(&mm->mmap_sem); +} + +void kbase_os_mem_map_unlock(struct kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + up_read(&mm->mmap_sem); +} + +static int kbasep_reg_mmap(struct kbase_context *kctx, + struct vm_area_struct *vma, + struct kbase_va_region **regm, + size_t *nr_pages, size_t *aligned_offset) + +{ + int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + struct kbase_va_region *reg; + int err = 0; + + *aligned_offset = 0; + + dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n"); + + /* SAME_VA stuff, fetch the right region */ + reg = kctx->pending_regions[cookie]; + if (!reg) { + err = -ENOMEM; + goto out; + } + + if ((reg->flags & KBASE_REG_GPU_NX) && (reg->nr_pages != *nr_pages)) { + /* incorrect mmap size */ + /* leave the cookie for a potential later + * mapping, or to be reclaimed later when the + * context is freed */ + err = -ENOMEM; + goto out; + } + + if ((vma->vm_flags & VM_READ && !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(kctx->kbdev->dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out; + } + + /* adjust down nr_pages to what we have physically */ + *nr_pages = kbase_reg_current_backed_size(reg); + + if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset, + reg->nr_pages, 1) != 0) { + dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__); + /* Unable to map in GPU space. */ + WARN_ON(1); + err = -ENOMEM; + goto out; + } + /* no need for the cookie anymore */ + kctx->pending_regions[cookie] = NULL; + bitmap_set(kctx->cookies, cookie, 1); + + /* + * Overwrite the offset with the region start_pfn, so we effectively + * map from offset 0 in the region. However subtract the aligned + * offset so that when user space trims the mapping the beginning of + * the trimmed VMA has the correct vm_pgoff; + */ + vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT); +out: + *regm = reg; + dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n"); + + return err; +} + +int kbase_context_mmap(struct kbase_context *const kctx, + struct vm_area_struct *const vma) +{ + struct kbase_va_region *reg = NULL; + void *kaddr = NULL; + size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int err = 0; + int free_on_close = 0; + struct device *dev = kctx->kbdev->dev; + size_t aligned_offset = 0; + + dev_dbg(dev, "kbase_mmap\n"); + + if (!(vma->vm_flags & VM_READ)) + vma->vm_flags &= ~VM_MAYREAD; + if (!(vma->vm_flags & VM_WRITE)) + vma->vm_flags &= ~VM_MAYWRITE; + + if (0 == nr_pages) { + err = -EINVAL; + goto out; + } + + if (!(vma->vm_flags & VM_SHARED)) { + err = -EINVAL; + goto out; + } + + kbase_gpu_vm_lock(kctx); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { + /* The non-mapped tracking helper page */ + err = kbase_tracking_page_setup(kctx, vma); + goto out_unlock; + } + + /* if not the MTP, verify that the MTP has been mapped */ + rcu_read_lock(); + /* catches both when the special page isn't present or + * when we've forked */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + err = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + + switch (vma->vm_pgoff) { + case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): + case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; + case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): + /* MMU dump */ + err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); + if (0 != err) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + break; + case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... + PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { + err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, + &aligned_offset); + if (0 != err) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + break; + } + default: { + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + (u64)vma->vm_pgoff << PAGE_SHIFT); + + if (!kbase_is_region_invalid_or_free(reg)) { + /* will this mapping overflow the size of the region? */ + if (nr_pages > (reg->nr_pages - + (vma->vm_pgoff - reg->start_pfn))) { + err = -ENOMEM; + goto out_unlock; + } + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + + if (KBASE_MEM_TYPE_IMPORTED_UMM == + reg->cpu_alloc->type) { + if (0 != (vma->vm_pgoff - reg->start_pfn)) { + err = -EINVAL; + dev_warn(dev, "%s:%d attempt to do a partial map in a dma_buf: non-zero offset to dma_buf mapping!\n", + __FILE__, __LINE__); + goto out_unlock; + } + err = dma_buf_mmap( + reg->cpu_alloc->imported.umm.dma_buf, + vma, vma->vm_pgoff - reg->start_pfn); + goto out_unlock; + } + + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + /* initial params check for aliased dumping map */ + if (nr_pages > reg->gpu_alloc->imported.alias.stride || + !reg->gpu_alloc->imported.alias.stride || + !nr_pages) { + err = -EINVAL; + dev_warn(dev, "mmap aliased: invalid params!\n"); + goto out_unlock; + } + } + else if (reg->cpu_alloc->nents < + (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + /* limit what we map to the amount currently backed */ + if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) + nr_pages = 0; + else + nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); + } + } else { + err = -ENOMEM; + goto out_unlock; + } + } /* default */ + } /* switch */ + + err = kbase_cpu_mmap(kctx, reg, vma, kaddr, nr_pages, aligned_offset, + free_on_close); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { + /* MMU dump - userspace should now have a reference on + * the pages, so we can now free the kernel mapping */ + vfree(kaddr); + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); +out: + if (err) + dev_err(dev, "mmap failed %d\n", err); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_context_mmap); + +void kbase_sync_mem_regions(struct kbase_context *kctx, + struct kbase_vmap_struct *map, enum kbase_sync_type dest) +{ + size_t i; + off_t const offset = map->offset_in_page; + size_t const page_count = PFN_UP(offset + map->size); + + /* Sync first page */ + size_t sz = MIN(((size_t) PAGE_SIZE - offset), map->size); + struct tagged_addr cpu_pa = map->cpu_pages[0]; + struct tagged_addr gpu_pa = map->gpu_pages[0]; + + kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, dest); + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + cpu_pa = map->cpu_pages[i]; + gpu_pa = map->gpu_pages[i]; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, dest); + } + + /* Sync last page (if any) */ + if (page_count > 1) { + cpu_pa = map->cpu_pages[page_count - 1]; + gpu_pa = map->gpu_pages[page_count - 1]; + sz = ((offset + map->size - 1) & ~PAGE_MASK) + 1; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, dest); + } +} + +static int kbase_vmap_phy_pages(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 offset_bytes, size_t size, + struct kbase_vmap_struct *map) +{ + unsigned long page_index; + unsigned int offset_in_page = offset_bytes & ~PAGE_MASK; + size_t page_count = PFN_UP(offset_in_page + size); + struct tagged_addr *page_array; + struct page **pages; + void *cpu_addr = NULL; + pgprot_t prot; + size_t i; + + if (!size || !map || !reg->cpu_alloc || !reg->gpu_alloc) + return -EINVAL; + + /* check if page_count calculation will wrap */ + if (size > ((size_t)-1 / PAGE_SIZE)) + return -EINVAL; + + page_index = offset_bytes >> PAGE_SHIFT; + + /* check if page_index + page_count will wrap */ + if (-1UL - page_count < page_index) + return -EINVAL; + + if (page_index + page_count > kbase_reg_current_backed_size(reg)) + return -ENOMEM; + + if (reg->flags & KBASE_REG_DONT_NEED) + return -EINVAL; + + prot = PAGE_KERNEL; + if (!(reg->flags & KBASE_REG_CPU_CACHED)) { + /* Map uncached */ + prot = pgprot_writecombine(prot); + } + + page_array = kbase_get_cpu_phy_pages(reg); + if (!page_array) + return -ENOMEM; + + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < page_count; i++) + pages[i] = as_page(page_array[page_index + i]); + + /* Note: enforcing a RO prot_request onto prot is not done, since: + * - CPU-arch-specific integration required + * - kbase_vmap() requires no access checks to be made/enforced */ + + cpu_addr = vmap(pages, page_count, VM_MAP, prot); + + kfree(pages); + + if (!cpu_addr) + return -ENOMEM; + + map->offset_in_page = offset_in_page; + map->cpu_alloc = reg->cpu_alloc; + map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; + map->gpu_alloc = reg->gpu_alloc; + map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; + map->addr = (void *)((uintptr_t)cpu_addr + offset_in_page); + map->size = size; + map->sync_needed = ((reg->flags & KBASE_REG_CPU_CACHED) != 0) && + !kbase_mem_is_imported(map->gpu_alloc->type); + + if (map->sync_needed) + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + + return 0; +} + +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map) +{ + struct kbase_va_region *reg; + void *addr = NULL; + u64 offset_bytes; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + int err; + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + gpu_addr); + if (kbase_is_region_invalid_or_free(reg)) + goto out_unlock; + + /* check access permissions can be satisfied + * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} + */ + if ((reg->flags & prot_request) != prot_request) + goto out_unlock; + + offset_bytes = gpu_addr - (reg->start_pfn << PAGE_SHIFT); + cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + err = kbase_vmap_phy_pages(kctx, reg, offset_bytes, size, map); + if (err < 0) + goto fail_vmap_phy_pages; + + addr = map->addr; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return addr; + +fail_vmap_phy_pages: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(cpu_alloc); + kbase_mem_phy_alloc_put(gpu_alloc); + + return NULL; +} + +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map) +{ + /* 0 is specified for prot_request to indicate no access checks should + * be made. + * + * As mentioned in kbase_vmap_prot() this means that a kernel-side + * CPU-RO mapping is not enforced to allow this to work */ + return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); +} +KBASE_EXPORT_TEST_API(kbase_vmap); + +static void kbase_vunmap_phy_pages(struct kbase_context *kctx, + struct kbase_vmap_struct *map) +{ + void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); + vunmap(addr); + + if (map->sync_needed) + kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + + map->offset_in_page = 0; + map->cpu_pages = NULL; + map->gpu_pages = NULL; + map->addr = NULL; + map->size = 0; + map->sync_needed = false; +} + +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) +{ + kbase_vunmap_phy_pages(kctx, map); + map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); + map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); +} +KBASE_EXPORT_TEST_API(kbase_vunmap); + +static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) + /* To avoid the build breakage due to an unexported kernel symbol + * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, + * we inline here the equivalent of 'add_mm_counter()' from linux + * kernel V5.4.0~8. + */ + atomic_long_add(value, &mm->rss_stat.count[member]); +#else + add_mm_counter(mm, member, value); +#endif +} + +void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) +{ + struct mm_struct *mm; + + rcu_read_lock(); + mm = rcu_dereference(kctx->process_mm); + if (mm) { + atomic_add(pages, &kctx->nonmapped_pages); +#ifdef SPLIT_RSS_COUNTING + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); +#else + spin_lock(&mm->page_table_lock); + kbasep_add_mm_counter(mm, MM_FILEPAGES, pages); + spin_unlock(&mm->page_table_lock); +#endif + } + rcu_read_unlock(); +} + +static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) +{ + int pages; + struct mm_struct *mm; + + spin_lock(&kctx->mm_update_lock); + mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); + if (!mm) { + spin_unlock(&kctx->mm_update_lock); + return; + } + + rcu_assign_pointer(kctx->process_mm, NULL); + spin_unlock(&kctx->mm_update_lock); + synchronize_rcu(); + + pages = atomic_xchg(&kctx->nonmapped_pages, 0); +#ifdef SPLIT_RSS_COUNTING + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); +#else + spin_lock(&mm->page_table_lock); + kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages); + spin_unlock(&mm->page_table_lock); +#endif +} + +static void kbase_special_vm_close(struct vm_area_struct *vma) +{ + struct kbase_context *kctx; + + kctx = vma->vm_private_data; + kbasep_os_process_page_usage_drain(kctx); +} + +static const struct vm_operations_struct kbase_vm_special_ops = { + .close = kbase_special_vm_close, +}; + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) +{ + /* check that this is the only tracking page */ + spin_lock(&kctx->mm_update_lock); + if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { + spin_unlock(&kctx->mm_update_lock); + return -EFAULT; + } + + rcu_assign_pointer(kctx->process_mm, current->mm); + + spin_unlock(&kctx->mm_update_lock); + + /* no real access */ + vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + + return 0; +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h new file mode 100644 index 0000000..cd094b3 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -0,0 +1,464 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem_linux.h + * Base kernel memory APIs, Linux implementation. + */ + +#ifndef _KBASE_MEM_LINUX_H_ +#define _KBASE_MEM_LINUX_H_ + +/** A HWC dump mapping */ +struct kbase_hwc_dma_mapping { + void *cpu_va; + dma_addr_t dma_pa; + size_t size; +}; + +/** + * kbase_mem_alloc - Create a new allocation for GPU + * + * @kctx: The kernel context + * @va_pages: The number of pages of virtual address space to reserve + * @commit_pages: The number of physical pages to allocate upfront + * @extent: The number of extra pages to allocate on each GPU fault which + * grows the region. + * @flags: bitmask of BASE_MEM_* flags to convey special requirements & + * properties for the new allocation. + * @gpu_va: Start address of the memory region which was allocated from GPU + * virtual address space. + * + * Return: 0 on success or error code + */ +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, + u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, + u64 *gpu_va); + +/** + * kbase_mem_query - Query properties of a GPU memory region + * + * @kctx: The kernel context + * @gpu_addr: A GPU address contained within the memory region + * @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be + * regarding the amount of backing physical memory allocated so far + * for the region or the size of the region or the flags associated + * with the region. + * @out: Pointer to the location to store the result of query. + * + * Return: 0 on success or error code + */ +int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, + u64 *const out); + +/** + * kbase_mem_import - Import the external memory for use by the GPU + * + * @kctx: The kernel context + * @type: Type of external memory + * @phandle: Handle to the external memory interpreted as per the type. + * @padding: Amount of extra VA pages to append to the imported buffer + * @gpu_va: GPU address assigned to the imported external memory + * @va_pages: Size of the memory region reserved from the GPU address space + * @flags: bitmask of BASE_MEM_* flags to convey special requirements & + * properties for the new allocation representing the external + * memory. + * Return: 0 on success or error code + */ +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, + void __user *phandle, u32 padding, u64 *gpu_va, u64 *va_pages, + u64 *flags); + +/** + * kbase_mem_alias - Create a new allocation for GPU, aliasing one or more + * memory regions + * + * @kctx: The kernel context + * @flags: bitmask of BASE_MEM_* flags. + * @stride: Bytes between start of each memory region + * @nents: The number of regions to pack together into the alias + * @ai: Pointer to the struct containing the memory aliasing info + * @num_pages: Number of pages the alias will cover + * + * Return: 0 on failure or otherwise the GPU VA for the alias + */ +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); + +/** + * kbase_mem_flags_change - Change the flags for a memory region + * + * @kctx: The kernel context + * @gpu_addr: A GPU address contained within the memory region to modify. + * @flags: The new flags to set + * @mask: Mask of the flags, from BASE_MEM_*, to modify. + * + * Return: 0 on success or error code + */ +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); + +/** + * kbase_mem_commit - Change the physical backing size of a region + * + * @kctx: The kernel context + * @gpu_addr: Handle to the memory region + * @new_pages: Number of physical pages to back the region with + * + * Return: 0 on success or error code + */ +int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages); + +/** + * kbase_mem_shrink - Shrink the physical backing size of a region + * + * @kctx: The kernel context + * @reg: The GPU region + * @new_pages: Number of physical pages to back the region with + * + * Return: 0 on success or error code + */ +int kbase_mem_shrink(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 new_pages); + +/** + * kbase_context_mmap - Memory map method, gets invoked when mmap system call is + * issued on device file /dev/malixx. + * @kctx: The kernel context + * @vma: Pointer to the struct containing the info where the GPU allocation + * will be mapped in virtual address space of CPU. + * + * Return: 0 on success or error code + */ +int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma); + +/** + * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to initialize. + * + * Return: Zero on success or -errno on failure. + */ +int kbase_mem_evictable_init(struct kbase_context *kctx); + +/** + * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction + * mechanism. + * @kctx: The kbase context to de-initialize. + */ +void kbase_mem_evictable_deinit(struct kbase_context *kctx); + +/** + * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the grow + * @old_pages: The number of pages before the grow + * + * Return: 0 on success, -errno on error. + * + * Expand the GPU mapping to encompass the new psychical pages which have + * been added to the allocation. + * + * Note: Caller must be holding the region lock. + */ +int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_mem_evictable_make - Make a physical allocation eligible for eviction + * @gpu_alloc: The physical allocation to make evictable + * + * Return: 0 on success, -errno on error. + * + * Take the provided region and make all the physical pages within it + * reclaimable by the kernel, updating the per-process VM stats as well. + * Remove any CPU mappings (as these can't be removed in the shrinker callback + * as mmap_sem might already be taken) but leave the GPU mapping intact as + * and until the shrinker reclaims the allocation. + * + * Note: Must be called with the region lock of the containing context. + */ +int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); + +/** + * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for + * eviction. + * @alloc: The physical allocation to remove eviction eligibility from. + * + * Return: True if the allocation had its backing restored and false if + * it hasn't. + * + * Make the physical pages in the region no longer reclaimable and update the + * per-process stats, if the shrinker has already evicted the memory then + * re-allocate it if the region is still alive. + * + * Note: Must be called with the region lock of the containing context. + */ +bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); + +struct kbase_vmap_struct { + off_t offset_in_page; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + struct tagged_addr *cpu_pages; + struct tagged_addr *gpu_pages; + void *addr; + size_t size; + bool sync_needed; +}; + + +/** + * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the + * requested access permissions are supported + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @prot_request: Flags indicating how the caller will then access the memory + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check + * whether the region should allow the intended access, and return an error if + * disallowed. This is essential for security of imported memory, particularly + * a user buf from SHM mapped into the process as RO. In that case, write + * access must be checked if the intention is for kernel to write to the + * memory. + * + * The checks are also there to help catch access errors on memory where + * security is not a concern: imported memory that is always RW, and memory + * that was allocated and owned by the process attached to @kctx. In this case, + * it helps to identify memory that was was mapped with the wrong access type. + * + * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases + * where either the security of memory is solely dependent on those flags, or + * when userspace code was expecting only the GPU to access the memory (e.g. HW + * workarounds). + * + * All cache maintenance operations shall be ignored if the + * memory region has been imported. + * + */ +void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, + unsigned long prot_request, struct kbase_vmap_struct *map); + +/** + * kbase_vmap - Map a GPU VA range into the kernel safely + * @kctx: Context the VA range belongs to + * @gpu_addr: Start address of VA range + * @size: Size of VA range + * @map: Structure to be given to kbase_vunmap() on freeing + * + * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error + * + * Map a GPU VA Range into the kernel. The VA range must be contained within a + * GPU memory region. Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * This is safer than using kmap() on the pages directly, + * because the pages here are refcounted to prevent freeing (and hence reuse + * elsewhere in the system) until an kbase_vunmap() + * + * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no + * checks to ensure the security of e.g. imported user bufs from RO SHM. + * + * Note: All cache maintenance operations shall be ignored if the memory region + * has been imported. + */ +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map); + +/** + * kbase_vunmap - Unmap a GPU VA range from the kernel + * @kctx: Context the VA range belongs to + * @map: Structure describing the mapping from the corresponding kbase_vmap() + * call + * + * Unmaps a GPU VA range from the kernel, given its @map structure obtained + * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as + * required, dependent on the CPU mapping for the memory region. + * + * The reference taken on pages during kbase_vmap() is released. + * + * Note: All cache maintenance operations shall be ignored if the memory region + * has been imported. + */ +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); + +extern const struct vm_operations_struct kbase_vm_ops; + +/** + * kbase_sync_mem_regions - Perform the cache maintenance for the kernel mode + * CPU mapping. + * @kctx: Context the CPU mapping belongs to. + * @map: Structure describing the CPU mapping, setup previously by the + * kbase_vmap() call. + * @dest: Indicates the type of maintenance required (i.e. flush or invalidate) + * + * Note: The caller shall ensure that CPU mapping is not revoked & remains + * active whilst the maintenance is in progress. + */ +void kbase_sync_mem_regions(struct kbase_context *kctx, + struct kbase_vmap_struct *map, enum kbase_sync_type dest); + +/** + * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation + * @kctx: Context the region belongs to + * @reg: The GPU region + * @new_pages: The number of pages after the shrink + * @old_pages: The number of pages before the shrink + * + * Shrink (or completely remove) all CPU mappings which reference the shrunk + * part of the allocation. + */ +void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, + struct kbase_va_region *reg, + u64 new_pages, u64 old_pages); + +/** + * kbase_phy_alloc_mapping_term - Terminate the kernel side mapping of a + * physical allocation + * @kctx: The kernel base context associated with the mapping + * @alloc: Pointer to the allocation to terminate + * + * This function will unmap the kernel mapping, and free any structures used to + * track it. + */ +void kbase_phy_alloc_mapping_term(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_phy_alloc_mapping_get - Get a kernel-side CPU pointer to the permanent + * mapping of a physical allocation + * @kctx: The kernel base context @gpu_addr will be looked up in + * @gpu_addr: The gpu address to lookup for the kernel-side CPU mapping + * @out_kern_mapping: Pointer to storage for a struct kbase_vmap_struct pointer + * which will be used for a call to + * kbase_phy_alloc_mapping_put() + * + * Return: Pointer to a kernel-side accessible location that directly + * corresponds to @gpu_addr, or NULL on failure + * + * Looks up @gpu_addr to retrieve the CPU pointer that can be used to access + * that location kernel-side. Only certain kinds of memory have a permanent + * kernel mapping, refer to the internal functions + * kbase_reg_needs_kernel_mapping() and kbase_phy_alloc_mapping_init() for more + * information. + * + * If this function succeeds, a CPU access to the returned pointer will access + * the actual location represented by @gpu_addr. That is, the return value does + * not require any offset added to it to access the location specified in + * @gpu_addr + * + * The client must take care to either apply any necessary sync operations when + * accessing the data, or ensure that the enclosing region was coherent with + * the GPU, or uncached in the CPU. + * + * The refcount on the physical allocations backing the region are taken, so + * that they do not disappear whilst the client is accessing it. Once the + * client has finished accessing the memory, it must be released with a call to + * kbase_phy_alloc_mapping_put() + * + * Whilst this is expected to execute quickly (the mapping was already setup + * when the physical allocation was created), the call is not IRQ-safe due to + * the region lookup involved. + * + * An error code may indicate that: + * - a userside process has freed the allocation, and so @gpu_addr is no longer + * valid + * - the region containing @gpu_addr does not support a permanent kernel mapping + */ +void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx, u64 gpu_addr, + struct kbase_vmap_struct **out_kern_mapping); + +/** + * kbase_phy_alloc_mapping_put - Put a reference to the kernel-side mapping of a + * physical allocation + * @kctx: The kernel base context associated with the mapping + * @kern_mapping: Pointer to a struct kbase_phy_alloc_mapping pointer obtained + * from a call to kbase_phy_alloc_mapping_get() + * + * Releases the reference to the allocations backing @kern_mapping that was + * obtained through a call to kbase_phy_alloc_mapping_get(). This must be used + * when the client no longer needs to access the kernel-side CPU pointer. + * + * If this was the last reference on the underlying physical allocations, they + * will go through the normal allocation free steps, which also includes an + * unmap of the permanent kernel mapping for those allocations. + * + * Due to these operations, the function is not IRQ-safe. However it is + * expected to execute quickly in the normal case, i.e. when the region holding + * the physical allocation is still present. + */ +void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, + struct kbase_vmap_struct *kern_mapping); + +/** + * kbase_get_cache_line_alignment - Return cache line alignment + * + * Helper function to return the maximum cache line alignment considering + * both CPU and GPU cache sizes. + * + * Return: CPU and GPU cache line alignment, in bytes. + * + * @kbdev: Device pointer. + */ +u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev); + +#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) +static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, pgprot_t pgprot) +{ + int err; + +#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ + (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) + if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + err = vm_insert_pfn(vma, addr, pfn); +#else + err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); +#endif + + if (unlikely(err == -ENOMEM)) + return VM_FAULT_OOM; + if (unlikely(err < 0 && err != -EBUSY)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +#endif + +#endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h new file mode 100644 index 0000000..7011603 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h @@ -0,0 +1,166 @@ +/* + * + * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_MEM_LOWLEVEL_H +#define _KBASE_MEM_LOWLEVEL_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include + +/** + * @brief Flags for kbase_phy_allocator_pages_alloc + */ +#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ +#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ +#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ + +#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) + +#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ + +enum kbase_sync_type { + KBASE_SYNC_TO_CPU, + KBASE_SYNC_TO_DEVICE +}; + +struct tagged_addr { phys_addr_t tagged_addr; }; + +#define HUGE_PAGE (1u << 0) +#define HUGE_HEAD (1u << 1) +#define FROM_PARTIAL (1u << 2) + +/* + * Note: if macro for converting physical address to page is not defined + * in the kernel itself, it is defined hereby. This is to avoid build errors + * which are reported during builds for some architectures. + */ +#ifndef phys_to_page +#define phys_to_page(phys) (pfn_to_page((phys) >> PAGE_SHIFT)) +#endif + +/** + * as_phys_addr_t - Retrieve the physical address from tagged address by + * masking the lower order 12 bits. + * @t: tagged address to be translated. + * + * Return: physical address corresponding to tagged address. + */ +static inline phys_addr_t as_phys_addr_t(struct tagged_addr t) +{ + return t.tagged_addr & PAGE_MASK; +} + +/** + * as_page - Retrieve the struct page from a tagged address + * @t: tagged address to be translated. + * + * Return: pointer to struct page corresponding to tagged address. + */ +static inline struct page *as_page(struct tagged_addr t) +{ + return phys_to_page(as_phys_addr_t(t)); +} + +/** + * as_tagged - Convert the physical address to tagged address type though + * there is no tag info present, the lower order 12 bits will be 0 + * @phys: physical address to be converted to tagged type + * + * This is used for 4KB physical pages allocated by the Driver or imported pages + * and is needed as physical pages tracking object stores the reference for + * physical pages using tagged address type in lieu of the type generally used + * for physical addresses. + * + * Return: address of tagged address type. + */ +static inline struct tagged_addr as_tagged(phys_addr_t phys) +{ + struct tagged_addr t; + + t.tagged_addr = phys & PAGE_MASK; + return t; +} + +/** + * as_tagged_tag - Form the tagged address by storing the tag or metadata in the + * lower order 12 bits of physial address + * @phys: physical address to be converted to tagged address + * @tag: tag to be stored along with the physical address. + * + * The tag info is used while freeing up the pages + * + * Return: tagged address storing physical address & tag. + */ +static inline struct tagged_addr as_tagged_tag(phys_addr_t phys, int tag) +{ + struct tagged_addr t; + + t.tagged_addr = (phys & PAGE_MASK) | (tag & ~PAGE_MASK); + return t; +} + +/** + * is_huge - Check if the physical page is one of the 512 4KB pages of the + * large page which was not split to be used partially + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page belongs to large page, or false + */ +static inline bool is_huge(struct tagged_addr t) +{ + return t.tagged_addr & HUGE_PAGE; +} + +/** + * is_huge_head - Check if the physical page is the first 4KB page of the + * 512 4KB pages within a large page which was not split + * to be used partially + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page is the first page of a large page, or false + */ +static inline bool is_huge_head(struct tagged_addr t) +{ + int mask = HUGE_HEAD | HUGE_PAGE; + + return mask == (t.tagged_addr & mask); +} + +/** + * is_partial - Check if the physical page is one of the 512 pages of the + * large page which was split in 4KB pages to be used + * partially for allocations >= 2 MB in size. + * @t: tagged address storing the tag in the lower order bits. + * + * Return: true if page was taken from large page used partially, or false + */ +static inline bool is_partial(struct tagged_addr t) +{ + return t.tagged_addr & FROM_PARTIAL; +} + +#endif /* _KBASE_LOWLEVEL_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c new file mode 100644 index 0000000..0723e32 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -0,0 +1,856 @@ +/* + * + * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define pool_dbg(pool, format, ...) \ + dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \ + (pool->next_pool) ? "kctx" : "kbdev", \ + kbase_mem_pool_size(pool), \ + kbase_mem_pool_max_size(pool), \ + ##__VA_ARGS__) + +#define NOT_DIRTY false +#define NOT_RECLAIMED false + +static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool) +{ + ssize_t max_size = kbase_mem_pool_max_size(pool); + ssize_t cur_size = kbase_mem_pool_size(pool); + + return max(max_size - cur_size, (ssize_t)0); +} + +static bool kbase_mem_pool_is_full(struct kbase_mem_pool *pool) +{ + return kbase_mem_pool_size(pool) >= kbase_mem_pool_max_size(pool); +} + +static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool) +{ + return kbase_mem_pool_size(pool) == 0; +} + +static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, + struct page *p) +{ + lockdep_assert_held(&pool->pool_lock); + + list_add(&p->lru, &pool->page_list); + pool->cur_size++; + + pool_dbg(pool, "added page\n"); +} + +static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p) +{ + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_locked(pool, p); + kbase_mem_pool_unlock(pool); +} + +static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, + struct list_head *page_list, size_t nr_pages) +{ + lockdep_assert_held(&pool->pool_lock); + + list_splice(page_list, &pool->page_list); + pool->cur_size += nr_pages; + + pool_dbg(pool, "added %zu pages\n", nr_pages); +} + +static void kbase_mem_pool_add_list(struct kbase_mem_pool *pool, + struct list_head *page_list, size_t nr_pages) +{ + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_list_locked(pool, page_list, nr_pages); + kbase_mem_pool_unlock(pool); +} + +static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool) +{ + struct page *p; + + lockdep_assert_held(&pool->pool_lock); + + if (kbase_mem_pool_is_empty(pool)) + return NULL; + + p = list_first_entry(&pool->page_list, struct page, lru); + list_del_init(&p->lru); + pool->cur_size--; + + pool_dbg(pool, "removed page\n"); + + return p; +} + +static struct page *kbase_mem_pool_remove(struct kbase_mem_pool *pool) +{ + struct page *p; + + kbase_mem_pool_lock(pool); + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_unlock(pool); + + return p; +} + +static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool, + struct page *p) +{ + struct device *dev = pool->kbdev->dev; + dma_sync_single_for_device(dev, kbase_dma_addr(p), + (PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL); +} + +static void kbase_mem_pool_zero_page(struct kbase_mem_pool *pool, + struct page *p) +{ + int i; + + for (i = 0; i < (1U << pool->order); i++) + clear_highpage(p+i); + + kbase_mem_pool_sync_page(pool, p); +} + +static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, + struct page *p) +{ + /* Zero page before spilling */ + kbase_mem_pool_zero_page(next_pool, p); + + kbase_mem_pool_add(next_pool, p); +} + +struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) +{ + struct page *p; + gfp_t gfp; + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; + dma_addr_t dma_addr; + int i; + +#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) + /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ + gfp = GFP_USER | __GFP_ZERO; +#else + gfp = GFP_HIGHUSER | __GFP_ZERO; +#endif + + /* don't warn on higher order failures */ + if (pool->order) + gfp |= __GFP_NOWARN; + + p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, + pool->group_id, gfp, pool->order); + if (!p) + return NULL; + + dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order), + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(dev, dma_addr)) { + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, + pool->group_id, p, pool->order); + return NULL; + } + + WARN_ON(dma_addr != page_to_phys(p)); + for (i = 0; i < (1u << pool->order); i++) + kbase_set_dma_addr(p+i, dma_addr + PAGE_SIZE * i); + + return p; +} + +static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, + struct page *p) +{ + struct kbase_device *const kbdev = pool->kbdev; + struct device *const dev = kbdev->dev; + dma_addr_t dma_addr = kbase_dma_addr(p); + int i; + + dma_unmap_page(dev, dma_addr, (PAGE_SIZE << pool->order), + DMA_BIDIRECTIONAL); + for (i = 0; i < (1u << pool->order); i++) + kbase_clear_dma_addr(p+i); + + kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, + pool->group_id, p, pool->order); + + pool_dbg(pool, "freed page to kernel\n"); +} + +static size_t kbase_mem_pool_shrink_locked(struct kbase_mem_pool *pool, + size_t nr_to_shrink) +{ + struct page *p; + size_t i; + + lockdep_assert_held(&pool->pool_lock); + + for (i = 0; i < nr_to_shrink && !kbase_mem_pool_is_empty(pool); i++) { + p = kbase_mem_pool_remove_locked(pool); + kbase_mem_pool_free_page(pool, p); + } + + return i; +} + +static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool, + size_t nr_to_shrink) +{ + size_t nr_freed; + + kbase_mem_pool_lock(pool); + nr_freed = kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + kbase_mem_pool_unlock(pool); + + return nr_freed; +} + +int kbase_mem_pool_grow(struct kbase_mem_pool *pool, + size_t nr_to_grow) +{ + struct page *p; + size_t i; + + kbase_mem_pool_lock(pool); + + pool->dont_reclaim = true; + for (i = 0; i < nr_to_grow; i++) { + if (pool->dying) { + pool->dont_reclaim = false; + kbase_mem_pool_shrink_locked(pool, nr_to_grow); + kbase_mem_pool_unlock(pool); + + return -ENOMEM; + } + kbase_mem_pool_unlock(pool); + + p = kbase_mem_alloc_page(pool); + if (!p) { + kbase_mem_pool_lock(pool); + pool->dont_reclaim = false; + kbase_mem_pool_unlock(pool); + + return -ENOMEM; + } + + kbase_mem_pool_lock(pool); + kbase_mem_pool_add_locked(pool, p); + } + pool->dont_reclaim = false; + kbase_mem_pool_unlock(pool); + + return 0; +} + +void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size) +{ + size_t cur_size; + int err = 0; + + cur_size = kbase_mem_pool_size(pool); + + if (new_size > pool->max_size) + new_size = pool->max_size; + + if (new_size < cur_size) + kbase_mem_pool_shrink(pool, cur_size - new_size); + else if (new_size > cur_size) + err = kbase_mem_pool_grow(pool, new_size - cur_size); + + if (err) { + size_t grown_size = kbase_mem_pool_size(pool); + + dev_warn(pool->kbdev->dev, + "Mem pool not grown to the required size of %zu bytes, grown for additional %zu bytes instead!\n", + (new_size - cur_size), (grown_size - cur_size)); + } +} + +void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) +{ + size_t cur_size; + size_t nr_to_shrink; + + kbase_mem_pool_lock(pool); + + pool->max_size = max_size; + + cur_size = kbase_mem_pool_size(pool); + if (max_size < cur_size) { + nr_to_shrink = cur_size - max_size; + kbase_mem_pool_shrink_locked(pool, nr_to_shrink); + } + + kbase_mem_pool_unlock(pool); +} + + +static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_pool *pool; + size_t pool_size; + + pool = container_of(s, struct kbase_mem_pool, reclaim); + + kbase_mem_pool_lock(pool); + if (pool->dont_reclaim && !pool->dying) { + kbase_mem_pool_unlock(pool); + return 0; + } + pool_size = kbase_mem_pool_size(pool); + kbase_mem_pool_unlock(pool); + + return pool_size; +} + +static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_pool *pool; + unsigned long freed; + + pool = container_of(s, struct kbase_mem_pool, reclaim); + + kbase_mem_pool_lock(pool); + if (pool->dont_reclaim && !pool->dying) { + kbase_mem_pool_unlock(pool); + return 0; + } + + pool_dbg(pool, "reclaim scan %ld:\n", sc->nr_to_scan); + + freed = kbase_mem_pool_shrink_locked(pool, sc->nr_to_scan); + + kbase_mem_pool_unlock(pool); + + pool_dbg(pool, "reclaim freed %ld pages\n", freed); + + return freed; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_pool_reclaim_count_objects(s, sc); + + return kbase_mem_pool_reclaim_scan_objects(s, sc); +} +#endif + +int kbase_mem_pool_init(struct kbase_mem_pool *pool, + const struct kbase_mem_pool_config *config, + unsigned int order, + int group_id, + struct kbase_device *kbdev, + struct kbase_mem_pool *next_pool) +{ + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) { + return -EINVAL; + } + + pool->cur_size = 0; + pool->max_size = kbase_mem_pool_config_get_max_size(config); + pool->order = order; + pool->group_id = group_id; + pool->kbdev = kbdev; + pool->next_pool = next_pool; + pool->dying = false; + + spin_lock_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->page_list); + + /* Register shrinker */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; +#else + pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; + pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; +#endif + pool->reclaim.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + pool->reclaim.batch = 0; +#endif + register_shrinker(&pool->reclaim); + + pool_dbg(pool, "initialized\n"); + + return 0; +} + +void kbase_mem_pool_mark_dying(struct kbase_mem_pool *pool) +{ + kbase_mem_pool_lock(pool); + pool->dying = true; + kbase_mem_pool_unlock(pool); +} + +void kbase_mem_pool_term(struct kbase_mem_pool *pool) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + struct page *p, *tmp; + size_t nr_to_spill = 0; + LIST_HEAD(spill_list); + LIST_HEAD(free_list); + int i; + + pool_dbg(pool, "terminate()\n"); + + unregister_shrinker(&pool->reclaim); + + kbase_mem_pool_lock(pool); + pool->max_size = 0; + + if (next_pool && !kbase_mem_pool_is_full(next_pool)) { + /* Spill to next pool (may overspill) */ + nr_to_spill = kbase_mem_pool_capacity(next_pool); + nr_to_spill = min(kbase_mem_pool_size(pool), nr_to_spill); + + /* Zero pages first without holding the next_pool lock */ + for (i = 0; i < nr_to_spill; i++) { + p = kbase_mem_pool_remove_locked(pool); + list_add(&p->lru, &spill_list); + } + } + + while (!kbase_mem_pool_is_empty(pool)) { + /* Free remaining pages to kernel */ + p = kbase_mem_pool_remove_locked(pool); + list_add(&p->lru, &free_list); + } + + kbase_mem_pool_unlock(pool); + + if (next_pool && nr_to_spill) { + list_for_each_entry(p, &spill_list, lru) + kbase_mem_pool_zero_page(pool, p); + + /* Add new page list to next_pool */ + kbase_mem_pool_add_list(next_pool, &spill_list, nr_to_spill); + + pool_dbg(pool, "terminate() spilled %zu pages\n", nr_to_spill); + } + + list_for_each_entry_safe(p, tmp, &free_list, lru) { + list_del_init(&p->lru); + kbase_mem_pool_free_page(pool, p); + } + + pool_dbg(pool, "terminated\n"); +} + +struct page *kbase_mem_pool_alloc(struct kbase_mem_pool *pool) +{ + struct page *p; + + do { + pool_dbg(pool, "alloc()\n"); + p = kbase_mem_pool_remove(pool); + + if (p) + return p; + + pool = pool->next_pool; + } while (pool); + + return NULL; +} + +struct page *kbase_mem_pool_alloc_locked(struct kbase_mem_pool *pool) +{ + struct page *p; + + lockdep_assert_held(&pool->pool_lock); + + pool_dbg(pool, "alloc_locked()\n"); + p = kbase_mem_pool_remove_locked(pool); + + if (p) + return p; + + return NULL; +} + +void kbase_mem_pool_free(struct kbase_mem_pool *pool, struct page *p, + bool dirty) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + + pool_dbg(pool, "free()\n"); + + if (!kbase_mem_pool_is_full(pool)) { + /* Add to our own pool */ + if (dirty) + kbase_mem_pool_sync_page(pool, p); + + kbase_mem_pool_add(pool, p); + } else if (next_pool && !kbase_mem_pool_is_full(next_pool)) { + /* Spill to next pool */ + kbase_mem_pool_spill(next_pool, p); + } else { + /* Free page */ + kbase_mem_pool_free_page(pool, p); + } +} + +void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p, + bool dirty) +{ + pool_dbg(pool, "free_locked()\n"); + + lockdep_assert_held(&pool->pool_lock); + + if (!kbase_mem_pool_is_full(pool)) { + /* Add to our own pool */ + if (dirty) + kbase_mem_pool_sync_page(pool, p); + + kbase_mem_pool_add_locked(pool, p); + } else { + /* Free page */ + kbase_mem_pool_free_page(pool, p); + } +} + +int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages, + struct tagged_addr *pages, bool partial_allowed) +{ + struct page *p; + size_t nr_from_pool; + size_t i = 0; + int err = -ENOMEM; + size_t nr_pages_internal; + + nr_pages_internal = nr_4k_pages / (1u << (pool->order)); + + if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + + pool_dbg(pool, "alloc_pages(4k=%zu):\n", nr_4k_pages); + pool_dbg(pool, "alloc_pages(internal=%zu):\n", nr_pages_internal); + + /* Get pages from this pool */ + kbase_mem_pool_lock(pool); + nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool)); + while (nr_from_pool--) { + int j; + p = kbase_mem_pool_remove_locked(pool); + if (pool->order) { + pages[i++] = as_tagged_tag(page_to_phys(p), + HUGE_HEAD | HUGE_PAGE); + for (j = 1; j < (1u << pool->order); j++) + pages[i++] = as_tagged_tag(page_to_phys(p) + + PAGE_SIZE * j, + HUGE_PAGE); + } else { + pages[i++] = as_tagged(page_to_phys(p)); + } + } + kbase_mem_pool_unlock(pool); + + if (i != nr_4k_pages && pool->next_pool) { + /* Allocate via next pool */ + err = kbase_mem_pool_alloc_pages(pool->next_pool, + nr_4k_pages - i, pages + i, partial_allowed); + + if (err < 0) + goto err_rollback; + + i += err; + } else { + /* Get any remaining pages from kernel */ + while (i != nr_4k_pages) { + p = kbase_mem_alloc_page(pool); + if (!p) { + if (partial_allowed) + goto done; + else + goto err_rollback; + } + + if (pool->order) { + int j; + + pages[i++] = as_tagged_tag(page_to_phys(p), + HUGE_PAGE | + HUGE_HEAD); + for (j = 1; j < (1u << pool->order); j++) { + phys_addr_t phys; + + phys = page_to_phys(p) + PAGE_SIZE * j; + pages[i++] = as_tagged_tag(phys, + HUGE_PAGE); + } + } else { + pages[i++] = as_tagged(page_to_phys(p)); + } + } + } + +done: + pool_dbg(pool, "alloc_pages(%zu) done\n", i); + return i; + +err_rollback: + kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); + return err; +} + +int kbase_mem_pool_alloc_pages_locked(struct kbase_mem_pool *pool, + size_t nr_4k_pages, struct tagged_addr *pages) +{ + struct page *p; + size_t i; + size_t nr_pages_internal; + + lockdep_assert_held(&pool->pool_lock); + + nr_pages_internal = nr_4k_pages / (1u << (pool->order)); + + if (nr_pages_internal * (1u << pool->order) != nr_4k_pages) + return -EINVAL; + + pool_dbg(pool, "alloc_pages_locked(4k=%zu):\n", nr_4k_pages); + pool_dbg(pool, "alloc_pages_locked(internal=%zu):\n", + nr_pages_internal); + + if (kbase_mem_pool_size(pool) < nr_pages_internal) { + pool_dbg(pool, "Failed alloc\n"); + return -ENOMEM; + } + + for (i = 0; i < nr_pages_internal; i++) { + int j; + + p = kbase_mem_pool_remove_locked(pool); + if (pool->order) { + *pages++ = as_tagged_tag(page_to_phys(p), + HUGE_HEAD | HUGE_PAGE); + for (j = 1; j < (1u << pool->order); j++) { + *pages++ = as_tagged_tag(page_to_phys(p) + + PAGE_SIZE * j, + HUGE_PAGE); + } + } else { + *pages++ = as_tagged(page_to_phys(p)); + } + } + + return nr_4k_pages; +} + +static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, + bool zero, bool sync) +{ + struct page *p; + size_t nr_to_pool = 0; + LIST_HEAD(new_page_list); + size_t i; + + if (!nr_pages) + return; + + pool_dbg(pool, "add_array(%zu, zero=%d, sync=%d):\n", + nr_pages, zero, sync); + + /* Zero/sync pages first without holding the pool lock */ + for (i = 0; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge_head(pages[i]) || !is_huge(pages[i])) { + p = as_page(pages[i]); + if (zero) + kbase_mem_pool_zero_page(pool, p); + else if (sync) + kbase_mem_pool_sync_page(pool, p); + + list_add(&p->lru, &new_page_list); + nr_to_pool++; + } + pages[i] = as_tagged(0); + } + + /* Add new page list to pool */ + kbase_mem_pool_add_list(pool, &new_page_list, nr_to_pool); + + pool_dbg(pool, "add_array(%zu) added %zu pages\n", + nr_pages, nr_to_pool); +} + +static void kbase_mem_pool_add_array_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, + bool zero, bool sync) +{ + struct page *p; + size_t nr_to_pool = 0; + LIST_HEAD(new_page_list); + size_t i; + + lockdep_assert_held(&pool->pool_lock); + + if (!nr_pages) + return; + + pool_dbg(pool, "add_array_locked(%zu, zero=%d, sync=%d):\n", + nr_pages, zero, sync); + + /* Zero/sync pages first */ + for (i = 0; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge_head(pages[i]) || !is_huge(pages[i])) { + p = as_page(pages[i]); + if (zero) + kbase_mem_pool_zero_page(pool, p); + else if (sync) + kbase_mem_pool_sync_page(pool, p); + + list_add(&p->lru, &new_page_list); + nr_to_pool++; + } + pages[i] = as_tagged(0); + } + + /* Add new page list to pool */ + kbase_mem_pool_add_list_locked(pool, &new_page_list, nr_to_pool); + + pool_dbg(pool, "add_array_locked(%zu) added %zu pages\n", + nr_pages, nr_to_pool); +} + +void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, + struct tagged_addr *pages, bool dirty, bool reclaimed) +{ + struct kbase_mem_pool *next_pool = pool->next_pool; + struct page *p; + size_t nr_to_pool; + LIST_HEAD(to_pool_list); + size_t i = 0; + + pool_dbg(pool, "free_pages(%zu):\n", nr_pages); + + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + + i += nr_to_pool; + + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); + + kbase_mem_pool_add_array(next_pool, nr_to_pool, + pages + i, true, dirty); + i += nr_to_pool; + } + } + + /* Free any remaining pages to kernel */ + for (; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge(pages[i]) && !is_huge_head(pages[i])) { + pages[i] = as_tagged(0); + continue; + } + + p = as_page(pages[i]); + + kbase_mem_pool_free_page(pool, p); + pages[i] = as_tagged(0); + } + + pool_dbg(pool, "free_pages(%zu) done\n", nr_pages); +} + + +void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, + size_t nr_pages, struct tagged_addr *pages, bool dirty, + bool reclaimed) +{ + struct page *p; + size_t nr_to_pool; + LIST_HEAD(to_pool_list); + size_t i = 0; + + lockdep_assert_held(&pool->pool_lock); + + pool_dbg(pool, "free_pages_locked(%zu):\n", nr_pages); + + if (!reclaimed) { + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); + + kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, + dirty); + + i += nr_to_pool; + } + + /* Free any remaining pages to kernel */ + for (; i < nr_pages; i++) { + if (unlikely(!as_phys_addr_t(pages[i]))) + continue; + + if (is_huge(pages[i]) && !is_huge_head(pages[i])) { + pages[i] = as_tagged(0); + continue; + } + + p = as_page(pages[i]); + + kbase_mem_pool_free_page(pool, p); + pages[i] = as_tagged(0); + } + + pool_dbg(pool, "free_pages_locked(%zu) done\n", nr_pages); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c new file mode 100644 index 0000000..5879fdf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c @@ -0,0 +1,191 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include + +#include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_debugfs_helper.h" + +void kbase_mem_pool_debugfs_trim(void *const array, size_t const index, + size_t const value) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_trim(&mem_pools[index], value); +} + +void kbase_mem_pool_debugfs_set_max_size(void *const array, + size_t const index, size_t const value) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_set_max_size(&mem_pools[index], value); +} + +size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_size(&mem_pools[index]); +} + +size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index) +{ + struct kbase_mem_pool *const mem_pools = array; + + if (WARN_ON(!mem_pools) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_max_size(&mem_pools[index]); +} + +void kbase_mem_pool_config_debugfs_set_max_size(void *const array, + size_t const index, size_t const value) +{ + struct kbase_mem_pool_config *const configs = array; + + if (WARN_ON(!configs) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + kbase_mem_pool_config_set_max_size(&configs[index], value); +} + +size_t kbase_mem_pool_config_debugfs_max_size(void *const array, + size_t const index) +{ + struct kbase_mem_pool_config *const configs = array; + + if (WARN_ON(!configs) || + WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return 0; + + return kbase_mem_pool_config_get_max_size(&configs[index]); +} + +static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size); +} + +static ssize_t kbase_mem_pool_debugfs_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim); + return err ? err : count; +} + +static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbase_mem_pool_debugfs_size_show, + in->i_private); +} + +static const struct file_operations kbase_mem_pool_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbase_mem_pool_debugfs_open, + .read = seq_read, + .write = kbase_mem_pool_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile, + void *data) +{ + CSTD_UNUSED(data); + return kbase_debugfs_helper_seq_read(sfile, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_max_size); +} + +static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *ppos) +{ + int err; + + CSTD_UNUSED(ppos); + err = kbase_debugfs_helper_seq_write(file, ubuf, count, + MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + return err ? err : count; +} + +static int kbase_mem_pool_debugfs_max_size_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbase_mem_pool_debugfs_max_size_show, + in->i_private); +} + +static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = { + .owner = THIS_MODULE, + .open = kbase_mem_pool_debugfs_max_size_open, + .read = seq_read, + .write = kbase_mem_pool_debugfs_max_size_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_mem_pool_debugfs_init(struct dentry *parent, + struct kbase_context *kctx) +{ + /* prevent unprivileged use of debug file in old kernel version */ +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + /* only for newer kernel version debug file system is safe */ + const mode_t mode = 0644; +#else + const mode_t mode = 0600; +#endif + + debugfs_create_file("mem_pool_size", mode, parent, + &kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops); + + debugfs_create_file("mem_pool_max_size", mode, parent, + &kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops); + + debugfs_create_file("lp_mem_pool_size", mode, parent, + &kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops); + + debugfs_create_file("lp_mem_pool_max_size", mode, parent, + &kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h new file mode 100644 index 0000000..2932945 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h @@ -0,0 +1,123 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MEM_POOL_DEBUGFS_H_ +#define _KBASE_MEM_POOL_DEBUGFS_H_ + +#include + +/** + * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool + * @parent: Parent debugfs dentry + * @kctx: The kbase context + * + * Adds four debugfs files under @parent: + * - mem_pool_size: get/set the current sizes of @kctx: mem_pools + * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools + * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool + * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool + */ +void kbase_mem_pool_debugfs_init(struct dentry *parent, + struct kbase_context *kctx); + +/** + * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value: New number of pages in the pool. + * + * If @value > current size, fill the pool with new pages from the kernel, but + * not above the max_size for the pool. + * If @value < current size, shrink the pool by freeing pages to the kernel. + */ +void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value); + +/** + * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in + * memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value: Maximum number of free pages the pool can hold. + * + * If the maximum size is reduced, the pool will be shrunk to adhere to the + * new limit. For details see kbase_mem_pool_shrink(). + */ +void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index, + size_t value); + +/** + * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Note: the size of the pool may in certain corner cases exceed @max_size! + * + * Return: Number of free pages in the pool + */ +size_t kbase_mem_pool_debugfs_size(void *array, size_t index); + +/** + * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a + * memory pool + * + * @array: Address of the first in an array of physical memory pools. + * @index: A memory group ID to be used as an index into the array of memory + * pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: Maximum number of free pages in the pool + */ +size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index); + +/** + * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages + * in initial configuration of pool + * + * @array: Array of initial configurations for a set of physical memory pools. + * @index: A memory group ID to be used as an index into the array. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @value : Maximum number of free pages that a memory pool created from the + * selected configuration can hold. + */ +void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index, + size_t value); + +/** + * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages + * from initial configuration of pool + * + * @array: Array of initial configurations for a set of physical memory pools. + * @index: A memory group ID to be used as an index into the array. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: Maximum number of free pages that a memory pool created from the + * selected configuration can hold. + */ +size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index); + +#endif /*_KBASE_MEM_POOL_DEBUGFS_H_ */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c new file mode 100644 index 0000000..aa25548 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c @@ -0,0 +1,115 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include + +void kbase_mem_pool_group_config_set_max_size( + struct kbase_mem_pool_group_config *const configs, + size_t const max_size) +{ + size_t const large_max_size = max_size >> + (KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER - + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_config_set_max_size(&configs->small[gid], + max_size); + + kbase_mem_pool_config_set_max_size(&configs->large[gid], + large_max_size); + } +} + +int kbase_mem_pool_group_init( + struct kbase_mem_pool_group *const mem_pools, + struct kbase_device *const kbdev, + const struct kbase_mem_pool_group_config *const configs, + struct kbase_mem_pool_group *next_pools) +{ + int gid, err = 0; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + err = kbase_mem_pool_init(&mem_pools->small[gid], + &configs->small[gid], + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER, + gid, + kbdev, + next_pools ? &next_pools->small[gid] : NULL); + + if (!err) { + err = kbase_mem_pool_init(&mem_pools->large[gid], + &configs->large[gid], + KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER, + gid, + kbdev, + next_pools ? &next_pools->large[gid] : NULL); + if (err) + kbase_mem_pool_term(&mem_pools->small[gid]); + } + + /* Break out of the loop early to avoid incrementing the count + * of memory pool pairs successfully initialized. + */ + if (err) + break; + } + + if (err) { + /* gid gives the number of memory pool pairs successfully + * initialized, which is one greater than the array index of the + * last group. + */ + while (gid-- > 0) { + kbase_mem_pool_term(&mem_pools->small[gid]); + kbase_mem_pool_term(&mem_pools->large[gid]); + } + } + + return err; +} + +void kbase_mem_pool_group_mark_dying( + struct kbase_mem_pool_group *const mem_pools) +{ + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_mark_dying(&mem_pools->small[gid]); + kbase_mem_pool_mark_dying(&mem_pools->large[gid]); + } +} + +void kbase_mem_pool_group_term( + struct kbase_mem_pool_group *const mem_pools) +{ + int gid; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) { + kbase_mem_pool_term(&mem_pools->small[gid]); + kbase_mem_pool_term(&mem_pools->large[gid]); + } +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h new file mode 100644 index 0000000..0484f59 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MEM_POOL_GROUP_H_ +#define _KBASE_MEM_POOL_GROUP_H_ + +#include + +/** + * kbase_mem_pool_group_config_init - Set the initial configuration for a + * set of memory pools + * + * This function sets the initial configuration for every memory pool so that + * the maximum amount of free memory that each pool can hold is identical. + * The equivalent number of 2 MiB pages is calculated automatically for the + * purpose of configuring the large page pools. + * + * @configs: Initial configuration for the set of memory pools + * @max_size: Maximum number of free 4 KiB pages each pool can hold + */ +void kbase_mem_pool_group_config_set_max_size( + struct kbase_mem_pool_group_config *configs, size_t max_size); + +/** + * kbase_mem_pool_group_init - Initialize a set of memory pools + * + * Initializes a complete set of physical memory pools. Memory pools are used to + * allow efficient reallocation of previously-freed physical pages. A pair of + * memory pools is initialized for each physical memory group: one for 4 KiB + * pages and one for 2 MiB pages. + * + * If @next_pools is not NULL then a request to allocate memory from an + * empty pool in @mem_pools will attempt to allocate from the equivalent pool + * in @next_pools before going to the memory group manager. Similarly + * pages can spill over to the equivalent pool in @next_pools when a pool + * is full in @mem_pools. Pages are zeroed before they spill over to another + * pool, to prevent leaking information between applications. + * + * @mem_pools: Set of memory pools to initialize + * @kbdev: Kbase device where memory is used + * @configs: Initial configuration for the set of memory pools + * @next_pools: Set of memory pools from which to allocate memory if there + * is no free memory in one of the @mem_pools + * + * Return: 0 on success, otherwise a negative error code + */ +int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools, + struct kbase_device *kbdev, + const struct kbase_mem_pool_group_config *configs, + struct kbase_mem_pool_group *next_pools); + +/** + * kbase_mem_pool_group_term - Mark a set of memory pools as dying + * + * Marks a complete set of physical memory pools previously initialized by + * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation + * operations (eg growing on page fault) to be terminated. + * + * @mem_pools: Set of memory pools to mark + */ +void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools); + +/** + * kbase_mem_pool_group_term - Terminate a set of memory pools + * + * Terminates a complete set of physical memory pools previously initialized by + * @kbase_mem_pool_group_init. + * + * @mem_pools: Set of memory pools to terminate + */ +void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools); + +#endif /* _KBASE_MEM_POOL_GROUP_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c new file mode 100644 index 0000000..5752d4a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -0,0 +1,138 @@ +/* + * + * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include + +#ifdef CONFIG_DEBUG_FS + +/** Show callback for the @c mem_profile debugfs file. + * + * This function is called to get the contents of the @c mem_profile debugfs + * file. This is a report of current memory usage and distribution in userspace. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise + */ +static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *kctx = sfile->private; + + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) /* not initialized */ + return -EINVAL; + + mutex_lock(&kctx->mem_profile_lock); + + seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); + + seq_putc(sfile, '\n'); + + mutex_unlock(&kctx->mem_profile_lock); + + return 0; +} + +/* + * File operations related to debugfs entry for mem_profile + */ +static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_mem_profile_seq_show, in->i_private); +} + +static const struct file_operations kbasep_mem_profile_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_mem_profile_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) +{ + int err = 0; + + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) /* not initialized */ + return -EINVAL; + + mutex_lock(&kctx->mem_profile_lock); + + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + + if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { + err = -ENOMEM; + } else if (!debugfs_create_file("mem_profile", 0444, + kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops)) { + err = -EAGAIN; + } else { + kbase_ctx_flag_set(kctx, + KCTX_MEM_PROFILE_INITIALIZED); + } + } + + if (kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = data; + kctx->mem_profile_size = size; + } else { + kfree(data); + } + + dev_dbg(kctx->kbdev->dev, "returning: %d, initialised: %d", + err, kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + + mutex_unlock(&kctx->mem_profile_lock); + + return err; +} + +void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) +{ + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) /* not initialized */ + return ; + + mutex_lock(&kctx->mem_profile_lock); + + dev_dbg(kctx->kbdev->dev, "initialised: %d", + kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)); + + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = NULL; + kctx->mem_profile_size = 0; + + mutex_unlock(&kctx->mem_profile_lock); +} + +#else /* CONFIG_DEBUG_FS */ + +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) +{ + kfree(data); + return 0; +} +#endif /* CONFIG_DEBUG_FS */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h new file mode 100644 index 0000000..1462247 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -0,0 +1,64 @@ +/* + * + * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_mem_profile_debugfs.h + * Header file for mem profiles entries in debugfs + * + */ + +#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H +#define _KBASE_MEM_PROFILE_DEBUGFS_H + +#include +#include + +/** + * @brief Remove entry from Mali memory profile debugfs + */ +void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + +/** + * @brief Insert @p data to the debugfs file so it can be read by userspace + * + * The function takes ownership of @p data and frees it later when new data + * is inserted. + * + * If the debugfs entry corresponding to the @p kctx doesn't exist, + * an attempt will be made to create it. + * + * @param kctx The context whose debugfs file @p data should be inserted to + * @param data A NULL-terminated string to be inserted to the debugfs file, + * without the trailing new line character + * @param size The length of the @p data string + * @return 0 if @p data inserted correctly + * -EAGAIN in case of error + * @post @ref mem_profile_initialized will be set to @c true + * the first time this function succeeds. + */ +int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size); + +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h new file mode 100644 index 0000000..d55cc85 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_mem_profile_debugfs_buf_size.h + * Header file for the size of the buffer to accumulate the histogram report text in + */ + +#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ +#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ + +/** + * The size of the buffer to accumulate the histogram report text in + * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT + */ +#define KBASE_MEM_PROFILE_MAX_BUF_SIZE \ + ((size_t) (64 + ((80 + (56 * 64)) * 53) + 56)) + +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h new file mode 100644 index 0000000..ec52122 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h @@ -0,0 +1,217 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + +#include "mali_kbase_mipe_proto.h" + +/** + * This header generates MIPE tracepoint declaration BLOB at + * compile time. + * + * It is intentional that there is no header guard. + * The header could be included multiple times for + * different blobs compilation. + * + * Before including this header MIPE_HEADER_* parameters must be + * defined. See documentation below: + */ + +/** + * The name of the variable where the result BLOB will be stored. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_NAME) +#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" +#endif + +/** + * A compiler attribute for the BLOB variable. + * + * e.g. __attribute__((section("my_section"))) + * + * Default value is no attribute. + */ +#if !defined(MIPE_HEADER_BLOB_VAR_ATTRIBUTE) +#define MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#endif + +/** + * MIPE stream id. + * + * See enum tl_stream_id. + */ +#if !defined(MIPE_HEADER_STREAM_ID) +#error "MIPE_HEADER_STREAM_ID must be defined!" +#endif + +/** + * MIPE packet class. + * + * See enum tl_packet_class. + */ +#if !defined(MIPE_HEADER_PKT_CLASS) +#error "MIPE_HEADER_PKT_CLASS must be defined!" +#endif + +/** + * The list of tracepoints to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_TRACEPOINT_LIST \ + * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ + * etc. + * + * Where the first argument is tracepoints name, the second + * argument is a short tracepoint description, the third argument + * argument types (see MIPE documentation), and the fourth argument + * is comma separated argument names. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST) +#error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" +#endif + +/** + * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. + */ +#if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) +#error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" +#endif + +/** + * The list of enums to process. + * + * It should be defined as follows: + * #define MIPE_HEADER_ENUM_LIST \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * ENUM_DESC(enum_arg_name, enum_value) \ + * etc. + * + * Where enum_arg_name is the name of a tracepoint argument being used with + * this enum. enum_value is a valid C enum value. + * + * Default value is an empty list. + */ +#if defined(MIPE_HEADER_ENUM_LIST) + +/** + * Tracepoint message ID used for enums declaration. + */ +#if !defined(MIPE_HEADER_ENUM_MSG_ID) +#error "MIPE_HEADER_ENUM_MSG_ID must be defined!" +#endif + +#else +#define MIPE_HEADER_ENUM_LIST +#endif + +/** + * The MIPE tracepoint declaration BLOB. + */ +const struct +{ + u32 _mipe_w0; + u32 _mipe_w1; + u8 _protocol_version; + u8 _pointer_size; + u32 _tp_count; +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + struct { \ + u32 _name; \ + u32 _size_string_name; \ + char _string_name[sizeof(#name)]; \ + u32 _size_desc; \ + char _desc[sizeof(desc)]; \ + u32 _size_arg_types; \ + char _arg_types[sizeof(arg_types)]; \ + u32 _size_arg_names; \ + char _arg_names[sizeof(arg_names)]; \ + } __attribute__ ((__packed__)) __ ## name; + +#define ENUM_DESC(arg_name, value) \ + struct { \ + u32 _msg_id; \ + u32 _arg_name_len; \ + char _arg_name[sizeof(#arg_name)]; \ + u32 _value; \ + u32 _value_str_len; \ + char _value_str[sizeof(#value)]; \ + } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value; + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC +} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { + ._mipe_w0 = MIPE_PACKET_HEADER_W0( + TL_PACKET_FAMILY_TL, + MIPE_HEADER_PKT_CLASS, + TL_PACKET_TYPE_HEADER, + MIPE_HEADER_STREAM_ID), + ._mipe_w1 = MIPE_PACKET_HEADER_W1( + sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE, + 0), + ._protocol_version = SWTRACE_VERSION, + ._pointer_size = sizeof(void *), + ._tp_count = MIPE_HEADER_TRACEPOINT_LIST_SIZE, +#define TRACEPOINT_DESC(name, desc, arg_types, arg_names) \ + .__ ## name = { \ + ._name = name, \ + ._size_string_name = sizeof(#name), \ + ._string_name = #name, \ + ._size_desc = sizeof(desc), \ + ._desc = desc, \ + ._size_arg_types = sizeof(arg_types), \ + ._arg_types = arg_types, \ + ._size_arg_names = sizeof(arg_names), \ + ._arg_names = arg_names \ + }, +#define ENUM_DESC(arg_name, value) \ + .__ ## arg_name ## _ ## value = { \ + ._msg_id = MIPE_HEADER_ENUM_MSG_ID, \ + ._arg_name_len = sizeof(#arg_name), \ + ._arg_name = #arg_name, \ + ._value = value, \ + ._value_str_len = sizeof(#value), \ + ._value_str = #value \ + }, + + MIPE_HEADER_TRACEPOINT_LIST + MIPE_HEADER_ENUM_LIST +#undef TRACEPOINT_DESC +#undef ENUM_DESC +}; + +#undef MIPE_HEADER_BLOB_VAR_NAME +#undef MIPE_HEADER_BLOB_VAR_ATTRIBUTE +#undef MIPE_HEADER_STREAM_ID +#undef MIPE_HEADER_PKT_CLASS +#undef MIPE_HEADER_TRACEPOINT_LIST +#undef MIPE_HEADER_TRACEPOINT_LIST_SIZE +#undef MIPE_HEADER_ENUM_LIST +#undef MIPE_HEADER_ENUM_MSG_ID + +/* clang-format on */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h new file mode 100644 index 0000000..54667cf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h @@ -0,0 +1,127 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +/* clang-format off */ + +#if !defined(_KBASE_MIPE_PROTO_H) +#define _KBASE_MIPE_PROTO_H + +#define _BITFIELD_MASK_FIELD(pos, len) \ + (((1u << len) - 1) << pos) + +#define _BITFIELD_SET_FIELD(pos, len, value) \ + (_BITFIELD_MASK_FIELD(pos, len) & (((u32) value) << pos)) + +#define BITFIELD_SET(field_name, value) \ + _BITFIELD_SET_FIELD(field_name ## _POS, field_name ## _LEN, value) + +/* The version of swtrace protocol used in timeline stream. */ +#define SWTRACE_VERSION 3 + +/* Packet header - first word. + * These values must be defined according to MIPE documentation. + */ +#define PACKET_STREAMID_POS 0 +#define PACKET_STREAMID_LEN 8 +#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) +#define PACKET_RSVD1_LEN 8 +#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) +#define PACKET_TYPE_LEN 3 +#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) +#define PACKET_CLASS_LEN 7 +#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) +#define PACKET_FAMILY_LEN 6 + +/* Packet header - second word + * These values must be defined according to MIPE documentation. + */ +#define PACKET_LENGTH_POS 0 +#define PACKET_LENGTH_LEN 24 +#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) +#define PACKET_SEQBIT_LEN 1 +#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) +#define PACKET_RSVD2_LEN 7 + +/* First word of a MIPE packet */ +#define MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id) \ + (0 \ + | BITFIELD_SET(PACKET_FAMILY, pkt_family) \ + | BITFIELD_SET(PACKET_CLASS, pkt_class) \ + | BITFIELD_SET(PACKET_TYPE, pkt_type) \ + | BITFIELD_SET(PACKET_STREAMID, stream_id)) + +/* Second word of a MIPE packet */ +#define MIPE_PACKET_HEADER_W1(packet_length, seqbit) \ + (0 \ + | BITFIELD_SET(PACKET_LENGTH, packet_length) \ + | BITFIELD_SET(PACKET_SEQBIT, seqbit)) + +/* The number of bytes reserved for packet header. + * These value must be defined according to MIPE documentation. + */ +#define PACKET_HEADER_SIZE 8 /* bytes */ + +/* The number of bytes reserved for packet sequence number. + * These value must be defined according to MIPE documentation. + */ +#define PACKET_NUMBER_SIZE 4 /* bytes */ + +/* Timeline packet family ids. + * Values are significant! Check MIPE documentation. + */ +enum tl_packet_family { + TL_PACKET_FAMILY_CTRL = 0, /* control packets */ + TL_PACKET_FAMILY_TL = 1, /* timeline packets */ + TL_PACKET_FAMILY_COUNT +}; + +/* Packet classes used in timeline streams. + * Values are significant! Check MIPE documentation. + */ +enum tl_packet_class { + TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ + TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ +}; + +/* Packet types used in timeline streams. + * Values are significant! Check MIPE documentation. + */ +enum tl_packet_type { + TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ + TL_PACKET_TYPE_BODY = 1, /* stream's body */ + TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ +}; + +/* Stream ID types (timeline family). */ +enum tl_stream_id { + TL_STREAM_ID_USER = 0, /* User-space driver Timeline stream. */ + TL_STREAM_ID_KERNEL = 1, /* Kernel-space driver Timeline stream. */ + TL_STREAM_ID_CSFFW = 2, /* CSF firmware driver Timeline stream. */ +}; + +#endif /* _KBASE_MIPE_PROTO_H */ + +/* clang-format on */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c new file mode 100644 index 0000000..38ae46e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c @@ -0,0 +1,153 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include +#include + +/** + * kbase_native_mgm_alloc - Native physical memory allocation method + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + * + * Delegates all memory allocation requests to the kernel's alloc_pages + * function. + * + * Return: Pointer to allocated page, or NULL if allocation failed. + */ +static struct page *kbase_native_mgm_alloc( + struct memory_group_manager_device *mgm_dev, int group_id, + gfp_t gfp_mask, unsigned int order) +{ + /* + * Check that the base and the mgm defines, from separate header files, + * for the max number of memory groups are compatible. + */ + BUILD_BUG_ON(BASE_MEM_GROUP_COUNT != MEMORY_GROUP_MANAGER_NR_GROUPS); + /* + * Check that the mask used for storing the memory group ID is big + * enough for the largest possible memory group ID. + */ + BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK + >> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + < (BASE_MEM_GROUP_COUNT - 1)); + + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + + return alloc_pages(gfp_mask, order); +} + +/** + * kbase_native_mgm_free - Native physical memory freeing method + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @page: Address of the struct associated with a page of physical + * memory that was allocated by calling kbase_native_mgm_alloc + * with the same argument values. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + * + * Delegates all memory freeing requests to the kernel's __free_pages function. + */ +static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev, + int group_id, struct page *page, unsigned int order) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + + __free_pages(page, order); +} + +/** + * kbase_native_mgm_vmf_insert_pfn_prot - Native method to map a page on the CPU + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @vma: The virtual memory area to insert the page into. + * @addr: An address contained in @vma to assign to the inserted page. + * @pfn: The kernel Page Frame Number to insert at @addr in @vma. + * @pgprot: Protection flags for the inserted page. + * + * Called from a CPU virtual memory page fault handler. Delegates all memory + * mapping requests to the kernel's vmf_insert_pfn_prot function. + * + * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table + * entry was successfully installed. + */ +static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot( + struct memory_group_manager_device *mgm_dev, int group_id, + struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + + return vmf_insert_pfn_prot(vma, addr, pfn, pgprot); +} + +/** + * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table + * entry + * + * @mgm_dev: The memory group manager the request is being made through. + * @group_id: A physical memory group ID, which must be valid but is not used. + * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @mmu_level: The level of the MMU page table where the page is getting mapped. + * @pte: The prepared page table entry. + * + * This function simply returns the @pte without modification. + * + * Return: A GPU page table entry to be stored in a page table. + */ +static u64 +kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev, + int group_id, int mmu_level, u64 pte) +{ + CSTD_UNUSED(mgm_dev); + CSTD_UNUSED(group_id); + CSTD_UNUSED(mmu_level); + + return pte; +} + +struct memory_group_manager_device kbase_native_mgm_dev = { + .ops = { + .mgm_alloc_page = kbase_native_mgm_alloc, + .mgm_free_page = kbase_native_mgm_free, + .mgm_get_import_memory_id = NULL, + .mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot, + .mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte, + }, + .data = NULL +}; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h new file mode 100644 index 0000000..431b1f4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_NATIVE_MGM_H_ +#define _KBASE_NATIVE_MGM_H_ + +#include + +/** + * kbase_native_mgm_dev - Native memory group manager device + * + * An implementation of the memory group manager interface that is intended for + * internal use when no platform-specific memory group manager is available. + * + * It ignores the specified group ID and delegates to the kernel's physical + * memory allocation and freeing functions. + */ +extern struct memory_group_manager_device kbase_native_mgm_dev; + +#endif /* _KBASE_NATIVE_MGM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c new file mode 100644 index 0000000..fbb090e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c @@ -0,0 +1,124 @@ +/* + * + * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include + + +/* + * This file is included only for type definitions and functions belonging to + * specific platform folders. Do not add dependencies with symbols that are + * defined somewhere else. + */ +#include + +#define PLATFORM_CONFIG_RESOURCE_COUNT 4 +#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 + +static struct platform_device *mali_device; + +#ifndef CONFIG_OF +/** + * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources + * + * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function + * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. + * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. + * + * @param[in] io_resource Input IO resource data + * @param[out] linux_resources Pointer to output array of Linux resource structures + */ +static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) +{ + if (!io_resources || !linux_resources) { + pr_err("%s: couldn't find proper resources\n", __func__); + return; + } + + memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); + + linux_resources[0].start = io_resources->io_memory_region.start; + linux_resources[0].end = io_resources->io_memory_region.end; + linux_resources[0].flags = IORESOURCE_MEM; + + linux_resources[1].start = io_resources->job_irq_number; + linux_resources[1].end = io_resources->job_irq_number; + linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[2].start = io_resources->mmu_irq_number; + linux_resources[2].end = io_resources->mmu_irq_number; + linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[3].start = io_resources->gpu_irq_number; + linux_resources[3].end = io_resources->gpu_irq_number; + linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; +} +#endif /* CONFIG_OF */ + +int kbase_platform_register(void) +{ + struct kbase_platform_config *config; +#ifndef CONFIG_OF + struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; +#endif + int err; + + config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ + if (config == NULL) { + pr_err("%s: couldn't get platform config\n", __func__); + return -ENODEV; + } + + mali_device = platform_device_alloc("mali", 0); + if (mali_device == NULL) + return -ENOMEM; + +#ifndef CONFIG_OF + kbasep_config_parse_io_resources(config->io_resources, resources); + err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); + if (err) { + platform_device_put(mali_device); + mali_device = NULL; + return err; + } +#endif /* CONFIG_OF */ + + err = platform_device_add(mali_device); + if (err) { + platform_device_unregister(mali_device); + mali_device = NULL; + return err; + } + + return 0; +} +EXPORT_SYMBOL(kbase_platform_register); + +void kbase_platform_unregister(void) +{ + if (mali_device) + platform_device_unregister(mali_device); +} +EXPORT_SYMBOL(kbase_platform_unregister); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c new file mode 100644 index 0000000..b9ed8c3 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -0,0 +1,278 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_pm.c + * Base kernel power management APIs + */ + +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +#include +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) +{ + return kbase_hwaccess_pm_powerup(kbdev, flags); +} + +void kbase_pm_halt(struct kbase_device *kbdev) +{ + kbase_hwaccess_pm_halt(kbdev); +} + +void kbase_pm_context_active(struct kbase_device *kbdev) +{ + (void)kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); +} + +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, + enum kbase_pm_suspend_handler suspend_handler) +{ + int c; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + dev_dbg(kbdev->dev, "%s - reason = %d, pid = %d\n", __func__, + suspend_handler, current->pid); + kbase_pm_lock(kbdev); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) + return 1; + + if (kbase_pm_is_suspending(kbdev) || + kbase_pm_is_gpu_lost(kbdev)) { +#else + if (kbase_pm_is_suspending(kbdev)) { +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count != 0) + break; + /* FALLTHROUGH */ + case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: + kbase_pm_unlock(kbdev); + return 1; + + case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: + /* FALLTHROUGH */ + default: + KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); + break; + } + } + c = ++kbdev->pm.active_count; + KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_ACTIVE, NULL, c); + + if (c == 1) { + /* First context active: Power on the GPU and + * any cores requested by the policy + */ + kbase_hwaccess_pm_gpu_active(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + } + + kbase_pm_unlock(kbdev); + dev_dbg(kbdev->dev, "%s %d\n", __func__, kbdev->pm.active_count); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_active); + +void kbase_pm_context_idle(struct kbase_device *kbdev) +{ + int c; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + + kbase_pm_lock(kbdev); + + c = --kbdev->pm.active_count; + KBASE_KTRACE_ADD(kbdev, PM_CONTEXT_IDLE, NULL, c); + + KBASE_DEBUG_ASSERT(c >= 0); + + if (c == 0) { + /* Last context has gone idle */ + kbase_hwaccess_pm_gpu_idle(kbdev); + + /* Wake up anyone waiting for this to become 0 (e.g. suspend). + * The waiters must synchronize with us by locking the pm.lock + * after waiting. + */ + wake_up(&kbdev->pm.zero_active_count_wait); + } + + kbase_pm_unlock(kbdev); + dev_dbg(kbdev->dev, "%s %d (pid = %d)\n", __func__, + kbdev->pm.active_count, current->pid); +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_idle); + +void kbase_pm_driver_suspend(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Suspend vinstr. This blocks until the vinstr worker and timer are + * no longer running. + */ + kbase_vinstr_suspend(kbdev->vinstr_ctx); + + /* Disable GPU hardware counters. + * This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + mutex_lock(&kbdev->pm.lock); + if (WARN_ON(kbase_pm_is_suspending(kbdev))) { + mutex_unlock(&kbdev->pm.lock); + return; + } + kbdev->pm.suspending = true; + mutex_unlock(&kbdev->pm.lock); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) { + int i; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_disjoint_state_up(kbdev); + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + + /* From now on, the active count will drop towards zero. Sometimes, + * it'll go up briefly before going down again. However, once + * it reaches zero it will stay there - guaranteeing that we've idled + * all pm references + */ + + /* Suspend job scheduler and associated components, so that it releases all + * the PM active count references */ + kbasep_js_suspend(kbdev); + + /* Wait for the active count to reach zero. This is not the same as + * waiting for a power down, since not all policies power down when this + * reaches zero. + */ + dev_dbg(kbdev->dev, ">wait_event - waiting for active_count == 0 (pid = %d)\n", + current->pid); + wait_event(kbdev->pm.zero_active_count_wait, + kbdev->pm.active_count == 0); + dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); + + /* NOTE: We synchronize with anything that was just finishing a + * kbase_pm_context_idle() call by locking the pm.lock below + */ + kbase_hwaccess_pm_suspend(kbdev); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) { + mutex_lock(&kbdev->pm.arb_vm_state->vm_state_lock); + kbase_arbiter_pm_vm_stopped(kbdev); + mutex_unlock(&kbdev->pm.arb_vm_state->vm_state_lock); + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} + +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) +{ + unsigned long flags; + + /* MUST happen before any pm_context_active calls occur */ + kbase_hwaccess_pm_resume(kbdev); + + /* Initial active call, to power on the GPU/cores if needed */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + (void)kbase_pm_context_active_handle_suspend(kbdev, + (arb_gpu_start ? + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)); +#else + kbase_pm_context_active(kbdev); +#endif + + /* Resume any blocked atoms (which may cause contexts to be scheduled in + * and dependent atoms to run) + */ + kbase_resume_suspended_soft_jobs(kbdev); + + /* Resume the Job Scheduler and associated components, and start running + * atoms + */ + kbasep_js_resume(kbdev); + + /* Matching idle call, to power off the GPU/cores if we didn't actually + * need it and the policy doesn't want it on + */ + kbase_pm_context_idle(kbdev); + + /* Re-enable GPU hardware counters */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Resume vinstr */ + kbase_vinstr_resume(kbdev->vinstr_ctx); +} + +void kbase_pm_suspend(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_SUSPEND_EVENT); + else + kbase_pm_driver_suspend(kbdev); +#else + kbase_pm_driver_suspend(kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} + +void kbase_pm_resume(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_OS_RESUME_EVENT); + else + kbase_pm_driver_resume(kbdev, false); +#else + kbase_pm_driver_resume(kbdev, false); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h new file mode 100644 index 0000000..257f959 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h @@ -0,0 +1,244 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +/** + * @file mali_kbase_pm.h + * Power management API definitions + */ + +#ifndef _KBASE_PM_H_ +#define _KBASE_PM_H_ + +#include "mali_kbase_hwaccess_pm.h" + +#define PM_ENABLE_IRQS 0x01 +#define PM_HW_ISSUES_DETECT 0x02 + + +/** Initialize the power management framework. + * + * Must be called before any other power management function + * + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * @return 0 if the power management framework was successfully initialized. + */ +int kbase_pm_init(struct kbase_device *kbdev); + +/** Power up GPU after all modules have been initialized and interrupt handlers installed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @param flags Flags to pass on to kbase_pm_init_hw + * + * @return 0 if powerup was successful. + */ +int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); + +/** + * Halt the power management framework. + * Should ensure that no new interrupts are generated, + * but allow any currently running interrupt handlers to complete successfully. + * The GPU is forced off by the time this function returns, regardless of + * whether or not the active power policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_halt(struct kbase_device *kbdev); + +/** Terminate the power management framework. + * + * No power management functions may be called after this + * (except @ref kbase_pm_init) + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_term(struct kbase_device *kbdev); + +/** Increment the count of active contexts. + * + * This function should be called when a context is about to submit a job. + * It informs the active power policy that the GPU is going to be in use shortly + * and the policy is expected to start turning on the GPU. + * + * This function will block until the GPU is available. + * + * This function ASSERTS if a suspend is occuring/has occurred whilst this is + * in use. Use kbase_pm_contect_active_unless_suspending() instead. + * + * @note a Suspend is only visible to Kernel threads; user-space threads in a + * syscall cannot witness a suspend, because they are frozen before the suspend + * begins. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_active(struct kbase_device *kbdev); + + +/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ +enum kbase_pm_suspend_handler { + /** A suspend is not expected/not possible - this is the same as + * kbase_pm_context_active() + */ + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, + /** If we're suspending, fail and don't increase the active count */ + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, + /** If we're suspending, succeed and allow the active count to increase + * if it didn't go from 0->1 (i.e., we didn't re-activate the GPU). + * + * This should only be used when there is a bounded time on the activation + * (e.g. guarantee it's going to be idled very soon after) + */ + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE, +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /** Special case when Arbiter has notified we can use GPU. + * Active count should always start at 0 in this case. + */ + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED, +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ +}; + +/** Suspend 'safe' variant of kbase_pm_context_active() + * + * If a suspend is in progress, this allows for various different ways of + * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. + * + * We returns a status code indicating whether we're allowed to keep the GPU + * active during the suspend, depending on the handler code. If the status code + * indicates a failure, the caller must abort whatever operation it was + * attempting, and potentially queue it up for after the OS has resumed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param suspend_handler The handler code for how to handle a suspend that might occur + * @return zero Indicates success + * @return non-zero Indicates failure due to the system being suspending/suspended. + */ +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); + +/** Decrement the reference count of active contexts. + * + * This function should be called when a context becomes idle. + * After this call the GPU may be turned off by the power policy so the calling + * code should ensure that it does not access the GPU's registers. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_idle(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_active() is in mali_kbase.h, because it is an inline + * function + */ + +/** + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_suspend(struct kbase_device *kbdev); + +/** + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * Also called when using VM arbiter, when GPU access has been granted. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_resume(struct kbase_device *kbdev); + +/** + * kbase_pm_vsync_callback - vsync callback + * + * @buffer_updated: 1 if a new frame was displayed, 0 otherwise + * @data: Pointer to the kbase device as returned by kbase_find_device() + * + * Callback function used to notify the power management code that a vsync has + * occurred on the display. + */ +void kbase_pm_vsync_callback(int buffer_updated, void *data); + +/** + * kbase_pm_driver_suspend() - Put GPU and driver in suspend state + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * Despite kbase_pm_suspend(), it will ignore to update Arbiter + * status if MALI_ARBITER_SUPPORT is enabled. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + */ +void kbase_pm_driver_suspend(struct kbase_device *kbdev); + +/** + * kbase_pm_driver_resume() - Put GPU and driver in resume + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * Also called when using VM arbiter, when GPU access has been granted. + * + * Despite kbase_pm_resume(), it will ignore to update Arbiter + * status if MALI_ARBITER_SUPPORT is enabled. + */ +void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start); + +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/** + * kbase_pm_handle_gpu_lost() - Handle GPU Lost for the VM + * @kbdev: Device pointer + * + * Handles the case that the Arbiter has forced the GPU away from the VM, + * so that interrupts will not be received and registers are no longer + * accessible because replaced by dummy RAM. + * Kill any running tasks and put the driver into a GPU powered-off state. + */ +void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + +#endif /* _KBASE_PM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c new file mode 100644 index 0000000..1d114a6 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c @@ -0,0 +1,136 @@ +/* + * + * (C) COPYRIGHT 2016, 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase.h" + +#include "mali_kbase_regs_history_debugfs.h" + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +#include + + +static int regs_history_size_get(void *data, u64 *val) +{ + struct kbase_io_history *const h = data; + + *val = h->size; + + return 0; +} + +static int regs_history_size_set(void *data, u64 val) +{ + struct kbase_io_history *const h = data; + + return kbase_io_history_resize(h, (u16)val); +} + + +DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, + regs_history_size_get, + regs_history_size_set, + "%llu\n"); + + +/** + * regs_history_show - show callback for the register access history file. + * + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to dump all recent accesses to the GPU registers. + * + * @return 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int regs_history_show(struct seq_file *sfile, void *data) +{ + struct kbase_io_history *const h = sfile->private; + u16 i; + size_t iters; + unsigned long flags; + + if (!h->enabled) { + seq_puts(sfile, "The register access history is disabled\n"); + goto out; + } + + spin_lock_irqsave(&h->lock, flags); + + iters = (h->size > h->count) ? h->count : h->size; + seq_printf(sfile, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access, + (unsigned long)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); + +out: + return 0; +} + + +/** + * regs_history_open - open operation for regs_history debugfs file + * + * @in: &struct inode pointer + * @file: &struct file pointer + * + * @return file descriptor + */ +static int regs_history_open(struct inode *in, struct file *file) +{ + return single_open(file, ®s_history_show, in->i_private); +} + + +static const struct file_operations regs_history_fops = { + .owner = THIS_MODULE, + .open = ®s_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history.enabled); + debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + &kbdev->io_history, ®s_history_size_fops); + debugfs_create_file("regs_history", S_IRUGO, + kbdev->mali_debugfs_directory, &kbdev->io_history, + ®s_history_fops); +} + + +#endif /* CONFIG_DEBUG_FS */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h new file mode 100644 index 0000000..a0078cb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Header file for register access history support via debugfs + * + * This interface is made available via /sys/kernel/debug/mali#/regs_history*. + * + * Usage: + * - regs_history_enabled: whether recording of register accesses is enabled. + * Write 'y' to enable, 'n' to disable. + * - regs_history_size: size of the register history buffer, must be > 0 + * - regs_history: return the information about last accesses to the registers. + */ + +#ifndef _KBASE_REGS_HISTORY_DEBUGFS_H +#define _KBASE_REGS_HISTORY_DEBUGFS_H + +struct kbase_device; + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) + +/** + * kbasep_regs_history_debugfs_init - add debugfs entries for register history + * + * @kbdev: Pointer to kbase_device containing the register history + */ +void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); + +#else /* CONFIG_DEBUG_FS */ + +#define kbasep_regs_history_debugfs_init CSTD_NOP + +#endif /* CONFIG_DEBUG_FS */ + +#endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h new file mode 100644 index 0000000..df72eec --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h @@ -0,0 +1,139 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_RESET_GPU_H_ +#define _KBASE_RESET_GPU_H_ + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * Caller is expected to hold the kbdev->hwaccess_lock. + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu if it returns + * true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). + * + * After this function is called the caller should call kbase_reset_gpu_wait() + * to know when the reset has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_locked - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu_locked if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). + * Caller is expected to hold the kbdev->hwaccess_lock. + * + * After this function is called, the caller should call kbase_reset_gpu_wait() + * to know when the reset has completed. + */ +void kbase_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_silent - Reset the GPU silently + * @kbdev: Device pointer + * + * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs) + * and don't emit messages into the kernel log while doing the reset. + * + * This function should be used in cases where we are doing a controlled reset + * of the GPU as part of normal processing (e.g. exiting protected mode) where + * the driver will have ensured the scheduler has been idled and all other + * users of the GPU (e.g. instrumentation) have been suspended. + * + * Return: 0 if the reset was started successfully + * -EAGAIN if another reset is currently in progress + */ +int kbase_reset_gpu_silent(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_is_active - Reports if the GPU is being reset + * @kbdev: Device pointer + * + * Return: True if the GPU is in the process of being reset (or if the reset of + * GPU failed, not applicable to Job Manager GPUs). + */ +bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_wait - Wait for a GPU reset to complete + * @kbdev: Device pointer + * + * This function may wait indefinitely. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_reset_gpu_wait(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism. + * + * @kbdev: Device pointer + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_reset_gpu_init(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism. + * + * @kbdev: Device pointer + */ +void kbase_reset_gpu_term(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_register_complete_cb - Register the callback function to be + * invoked on completion of GPU reset. + * + * @kbdev: Device pointer + * @complete_callback: Pointer to the callback function + */ +void kbase_reset_gpu_register_complete_cb(struct kbase_device *kbdev, + int (*complete_callback)(struct kbase_device *kbdev)); + +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c new file mode 100644 index 0000000..b5c7b12 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c @@ -0,0 +1,91 @@ +/* + * + * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifdef CONFIG_ARM64 + +#include +#include + +#include + +/* __asmeq is not available on Kernel versions >= 4.20 */ +#ifndef __asmeq +/* + * This is used to ensure the compiler did actually allocate the register we + * asked it for some inline assembly sequences. Apparently we can't trust the + * compiler from one version to another so a bit of paranoia won't hurt. This + * string is meant to be concatenated with the inline asm string and will + * cause compilation to stop on mismatch. (for details, see gcc PR 15089) + */ +#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" +#endif + +static noinline u64 invoke_smc_fid(u64 function_id, + u64 arg0, u64 arg1, u64 arg2) +{ + register u64 x0 asm("x0") = function_id; + register u64 x1 asm("x1") = arg0; + register u64 x2 asm("x2") = arg1; + register u64 x3 asm("x3") = arg2; + + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "smc #0\n" + : "+r" (x0) + : "r" (x1), "r" (x2), "r" (x3)); + + return x0; +} + +u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2) +{ + /* Is fast call (bit 31 set) */ + KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); + /* bits 16-23 must be zero for fast calls */ + KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); + + return invoke_smc_fid(fid, arg0, arg1, arg2); +} + +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, + u64 arg0, u64 arg1, u64 arg2) +{ + u32 fid = 0; + + /* Only the six bits allowed should be used. */ + KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); + + fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ + if (smc64) + fid |= SMC_64; /* Bit 30: 1=SMC64, 0=SMC32 */ + fid |= oen; /* Bit 29:24: OEN */ + /* Bit 23:16: Must be zero for fast calls */ + fid |= (function_number); /* Bit 15:0: function number */ + + return kbase_invoke_smc_fid(fid, arg0, arg1, arg2); +} + +#endif /* CONFIG_ARM64 */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h new file mode 100644 index 0000000..221eb21 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h @@ -0,0 +1,72 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_SMC_H_ +#define _KBASE_SMC_H_ + +#ifdef CONFIG_ARM64 + +#include + +#define SMC_FAST_CALL (1 << 31) +#define SMC_64 (1 << 30) + +#define SMC_OEN_OFFSET 24 +#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ +#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) +#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) + + +/** + * kbase_invoke_smc_fid - Perform a secure monitor call + * @fid: The SMC function to call, see SMC Calling convention. + * @arg0: First argument to the SMC. + * @arg1: Second argument to the SMC. + * @arg2: Third argument to the SMC. + * + * See SMC Calling Convention for details. + * + * Return: the return value from the SMC. + */ +u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2); + +/** + * kbase_invoke_smc_fid - Perform a secure monitor call + * @oen: Owning Entity number (SIP, STD etc). + * @function_number: The function number within the OEN. + * @smc64: use SMC64 calling convention instead of SMC32. + * @arg0: First argument to the SMC. + * @arg1: Second argument to the SMC. + * @arg2: Third argument to the SMC. + * + * See SMC Calling Convention for details. + * + * Return: the return value from the SMC call. + */ +u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64, + u64 arg0, u64 arg1, u64 arg2); + +#endif /* CONFIG_ARM64 */ + +#endif /* _KBASE_SMC_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c new file mode 100644 index 0000000..cbb0c76 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -0,0 +1,1785 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include + +#include +#include +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * @file mali_kbase_softjobs.c + * + * This file implements the logic behind software only jobs that are + * executed within the driver rather than being handed over to the GPU. + */ + +static void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_del(&katom->queue); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + /* Record the start time of this atom so we could cancel it at + * the right time. + */ + katom->start_timestamp = ktime_get(); + + /* Add the atom to the waiting list before the timer is + * (re)started to make sure that it gets processed. + */ + kbasep_add_waiting_soft_job(katom); + + /* Schedule timeout of this atom after a period if it is not active */ + if (!timer_pending(&kctx->soft_job_timeout)) { + int timeout_ms = atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + mod_timer(&kctx->soft_job_timeout, + jiffies + msecs_to_jiffies(timeout_ms)); + } +} + +static int kbasep_read_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char *status) +{ + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; + + *status = *mapped_evt; + + kbase_vunmap(kctx, &map); + + return 0; +} + +static int kbasep_write_soft_event_status( + struct kbase_context *kctx, u64 evt, unsigned char new_status) +{ + unsigned char *mapped_evt; + struct kbase_vmap_struct map; + + if ((new_status != BASE_JD_SOFT_EVENT_SET) && + (new_status != BASE_JD_SOFT_EVENT_RESET)) + return -EINVAL; + + mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); + if (!mapped_evt) + return -EFAULT; + + *mapped_evt = new_status; + + kbase_vunmap(kctx, &map); + + return 0; +} + +static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) +{ + struct kbase_vmap_struct map; + void *user_result; + struct timespec64 ts; + struct base_dump_cpu_gpu_counters data; + u64 system_time; + u64 cycle_counter; + u64 jc = katom->jc; + struct kbase_context *kctx = katom->kctx; + int pm_active_err; + + memset(&data, 0, sizeof(data)); + + /* Take the PM active reference as late as possible - otherwise, it could + * delay suspend until we process the atom (which may be at the end of a + * long chain of dependencies */ + pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + if (pm_active_err) { + struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + + /* We're suspended - queue this on the list of suspended jobs + * Use dep_item[1], because dep_item[0] was previously in use + * for 'waiting_soft_jobs'. + */ + mutex_lock(&js_devdata->runpool_mutex); + list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Also adding this to the list of waiting soft job */ + kbasep_add_waiting_soft_job(katom); + + return pm_active_err; + } + + kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, + &ts); + + kbase_pm_context_idle(kctx->kbdev); + + data.sec = ts.tv_sec; + data.usec = ts.tv_nsec / 1000; + data.system_time = system_time; + data.cycle_counter = cycle_counter; + + /* Assume this atom will be cancelled until we know otherwise */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* GPU_WR access is checked on the range for returning the result to + * userspace for the following reasons: + * - security, this is currently how imported user bufs are checked. + * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ + user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); + if (!user_result) + return 0; + + memcpy(user_result, &data, sizeof(data)); + + kbase_vunmap(kctx, &map); + + /* Atom was fine - mark it as done */ + katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) +/* Called by the explicit fence mechanism when a fence wait has completed */ +void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); +} +#endif + +static void kbasep_soft_event_complete_job(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) +{ + int cancel_timer = 1; + struct list_head *entry, *tmp; + unsigned long lflags; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry( + entry, struct kbase_jd_atom, queue); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + if (katom->jc == evt) { + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_DONE; + INIT_WORK(&katom->work, + kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, + &katom->work); + } else { + /* There are still other waiting jobs, we cannot + * cancel the timer yet. + */ + cancel_timer = 0; + } + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Keep the timer running if fence debug is enabled and + * there are waiting fence jobs. + */ + cancel_timer = 0; + break; +#endif + } + } + + if (cancel_timer) + del_timer(&kctx->soft_job_timeout); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +#ifdef CONFIG_MALI_FENCE_DEBUG +static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep; + + list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { + if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || + dep->status == KBASE_JD_ATOM_STATE_COMPLETED) + continue; + + if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) + == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { + /* Found blocked trigger fence. */ + struct kbase_sync_fence_info info; + + if (!kbase_sync_fence_in_info_get(dep, &info)) { + dev_warn(dev, + "\tVictim trigger atom %d fence [%p] %s: %s\n", + kbase_jd_atom_id(kctx, dep), + info.fence, + info.name, + kbase_sync_status_string(info.status)); + } + } + + kbase_fence_debug_check_atom(dep); + } + } +} + +static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = katom->kctx->kbdev->dev; + int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); + unsigned long lflags; + struct kbase_sync_fence_info info; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + + if (kbase_sync_fence_in_info_get(katom, &info)) { + /* Fence must have signaled just after timeout. */ + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + return; + } + + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + kctx->tgid, kctx->id, + kbase_jd_atom_id(kctx, katom), + info.fence, timeout_ms); + dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + info.fence, info.name, + kbase_sync_status_string(info.status)); + + /* Search for blocked trigger atoms */ + kbase_fence_debug_check_atom(katom); + + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); + + kbase_sync_fence_in_dump(katom); +} + +struct kbase_fence_debug_work { + struct kbase_jd_atom *katom; + struct work_struct work; +}; + +static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) +{ + struct kbase_fence_debug_work *w = container_of(work, + struct kbase_fence_debug_work, work); + struct kbase_jd_atom *katom = w->katom; + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + kbase_fence_debug_wait_timeout(katom); + mutex_unlock(&kctx->jctx.lock); + + kfree(w); +} + +static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) +{ + struct kbase_fence_debug_work *work; + struct kbase_context *kctx = katom->kctx; + + /* Enqueue fence debug worker. Use job_done_wq to get + * debug print ordered with job completion. + */ + work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); + /* Ignore allocation failure. */ + if (work) { + work->katom = katom; + INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); + queue_work(kctx->jctx.job_done_wq, &work->work); + } +} +#endif /* CONFIG_MALI_FENCE_DEBUG */ + +void kbasep_soft_job_timeout_worker(struct timer_list *timer) +{ + struct kbase_context *kctx = container_of(timer, struct kbase_context, + soft_job_timeout); + u32 timeout_ms = (u32)atomic_read( + &kctx->kbdev->js_data.soft_job_timeout_ms); + ktime_t cur_time = ktime_get(); + bool restarting = false; + unsigned long lflags; + struct list_head *entry, *tmp; + + spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + struct kbase_jd_atom *katom = list_entry(entry, + struct kbase_jd_atom, queue); + s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, + katom->start_timestamp)); + + if (elapsed_time < (s64)timeout_ms) { + restarting = true; + continue; + } + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_EVENT_WAIT: + /* Take it out of the list to ensure that it + * will be cancelled in all cases + */ + list_del(&katom->queue); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + INIT_WORK(&katom->work, kbasep_soft_event_complete_job); + queue_work(kctx->jctx.job_done_wq, &katom->work); + break; +#ifdef CONFIG_MALI_FENCE_DEBUG + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_fence_debug_timeout(katom); + break; +#endif + } + } + + if (restarting) + mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); + spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); +} + +static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + unsigned char status; + + /* The status of this soft-job is stored in jc */ + if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return 0; + } + + if (status == BASE_JD_SOFT_EVENT_SET) + return 0; /* Event already set, nothing to do */ + + kbasep_add_waiting_with_timeout(katom); + + return 1; +} + +static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, + unsigned char new_status) +{ + /* Complete jobs waiting on the same event */ + struct kbase_context *kctx = katom->kctx; + + if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + return; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, katom->jc); +} + +/** + * kbase_soft_event_update() - Update soft event state + * @kctx: Pointer to context + * @event: Event to update + * @new_status: New status value of event + * + * Update the event, and wake up any atoms waiting for the event. + * + * Return: 0 on success, a negative error code on failure. + */ +int kbase_soft_event_update(struct kbase_context *kctx, + u64 event, + unsigned char new_status) +{ + int err = 0; + + mutex_lock(&kctx->jctx.lock); + + if (kbasep_write_soft_event_status(kctx, event, new_status)) { + err = -ENOENT; + goto out; + } + + if (new_status == BASE_JD_SOFT_EVENT_SET) + kbasep_complete_triggered_soft_events(kctx, event); + +out: + mutex_unlock(&kctx->jctx.lock); + + return err; +} + +static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) +{ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = katom->softjob_data; + unsigned int i; + unsigned int nr = katom->nr_extres; + + if (!buffers) + return; + + kbase_gpu_vm_lock(katom->kctx); + for (i = 0; i < nr; i++) { + int p; + struct kbase_mem_phy_alloc *gpu_alloc = buffers[i].gpu_alloc; + + if (!buffers[i].pages) + break; + for (p = 0; p < buffers[i].nr_pages; p++) { + struct page *pg = buffers[i].pages[p]; + + if (pg) + put_page(pg); + } + if (buffers[i].is_vmalloc) + vfree(buffers[i].pages); + else + kfree(buffers[i].pages); + if (gpu_alloc) { + switch (gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + kbase_free_user_buffer(&buffers[i]); + break; + } + default: + /* Nothing to be done. */ + break; + } + kbase_mem_phy_alloc_put(gpu_alloc); + } + } + kbase_gpu_vm_unlock(katom->kctx); + kfree(buffers); + + katom->softjob_data = NULL; +} + +static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers; + struct base_jd_debug_copy_buffer *user_buffers = NULL; + unsigned int i; + unsigned int nr = katom->nr_extres; + int ret = 0; + void __user *user_structs = (void __user *)(uintptr_t)katom->jc; + + if (!user_structs) + return -EINVAL; + + buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); + if (!buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + katom->softjob_data = buffers; + + user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); + + if (!user_buffers) { + ret = -ENOMEM; + goto out_cleanup; + } + + ret = copy_from_user(user_buffers, user_structs, + sizeof(*user_buffers)*nr); + if (ret) { + ret = -EFAULT; + goto out_cleanup; + } + + for (i = 0; i < nr; i++) { + u64 addr = user_buffers[i].address; + u64 page_addr = addr & PAGE_MASK; + u64 end_page_addr = addr + user_buffers[i].size - 1; + u64 last_page_addr = end_page_addr & PAGE_MASK; + int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; + int pinned_pages; + struct kbase_va_region *reg; + struct base_external_resource user_extres; + + if (!addr) + continue; + + if (last_page_addr < page_addr) { + ret = -EINVAL; + goto out_cleanup; + } + + buffers[i].nr_pages = nr_pages; + buffers[i].offset = addr & ~PAGE_MASK; + if (buffers[i].offset >= PAGE_SIZE) { + ret = -EINVAL; + goto out_cleanup; + } + buffers[i].size = user_buffers[i].size; + + if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD / + sizeof(struct page *))) { + buffers[i].is_vmalloc = true; + buffers[i].pages = vzalloc(nr_pages * + sizeof(struct page *)); + } else { + buffers[i].is_vmalloc = false; + buffers[i].pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + } + + if (!buffers[i].pages) { + ret = -ENOMEM; + goto out_cleanup; + } + + pinned_pages = get_user_pages_fast(page_addr, + nr_pages, + 1, /* Write */ + buffers[i].pages); + if (pinned_pages < 0) { + /* get_user_pages_fast has failed - page array is not + * valid. Don't try to release any pages. + */ + buffers[i].nr_pages = 0; + + ret = pinned_pages; + goto out_cleanup; + } + if (pinned_pages != nr_pages) { + /* Adjust number of pages, so that we only attempt to + * release pages in the array that we know are valid. + */ + buffers[i].nr_pages = pinned_pages; + + ret = -EINVAL; + goto out_cleanup; + } + + user_extres = user_buffers[i].extres; + if (user_extres.ext_resource == 0ULL) { + ret = -EINVAL; + goto out_cleanup; + } + + kbase_gpu_vm_lock(katom->kctx); + reg = kbase_region_tracker_find_region_enclosing_address( + katom->kctx, user_extres.ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + + if (kbase_is_region_invalid_or_free(reg) || + reg->gpu_alloc == NULL) { + ret = -EINVAL; + goto out_unlock; + } + + buffers[i].gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + buffers[i].nr_extres_pages = reg->nr_pages; + + if (reg->nr_pages*PAGE_SIZE != buffers[i].size) + dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); + + switch (reg->gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; + unsigned long nr_pages = + alloc->imported.user_buf.nr_pages; + + if (alloc->imported.user_buf.mm != current->mm) { + ret = -EINVAL; + goto out_unlock; + } + buffers[i].extres_pages = kcalloc(nr_pages, + sizeof(struct page *), GFP_KERNEL); + if (!buffers[i].extres_pages) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = get_user_pages_fast( + alloc->imported.user_buf.address, + nr_pages, 0, + buffers[i].extres_pages); + if (ret != nr_pages) { + /* Adjust number of pages, so that we only + * attempt to release pages in the array that we + * know are valid. + */ + if (ret < 0) + buffers[i].nr_extres_pages = 0; + else + buffers[i].nr_extres_pages = ret; + + goto out_unlock; + } + ret = 0; + break; + } + default: + /* Nothing to be done. */ + break; + } + kbase_gpu_vm_unlock(katom->kctx); + } + kfree(user_buffers); + + return ret; + +out_unlock: + kbase_gpu_vm_unlock(katom->kctx); + +out_cleanup: + /* Frees allocated memory for kbase_debug_copy_job struct, including + * members, and sets jc to 0 */ + kbase_debug_copy_finish(katom); + kfree(user_buffers); + + return ret; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, + unsigned long page_num, struct page **page) +{ + struct sg_table *sgt = gpu_alloc->imported.umm.sgt; + struct sg_page_iter sg_iter; + unsigned long page_index = 0; + + if (WARN_ON(gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) + return NULL; + + if (!sgt) + return NULL; + + if (WARN_ON(page_num >= gpu_alloc->nents)) + return NULL; + + for_each_sg_page(sgt->sgl, &sg_iter, sgt->nents, 0) { + if (page_index == page_num) { + *page = sg_page_iter_page(&sg_iter); + + return kmap(*page); + } + page_index++; + } + + return NULL; +} +#endif + +int kbase_mem_copy_from_extres(struct kbase_context *kctx, + struct kbase_debug_copy_buffer *buf_data) +{ + unsigned int i; + unsigned int target_page_nr = 0; + struct page **pages = buf_data->pages; + u64 offset = buf_data->offset; + size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; + size_t to_copy = min(extres_size, buf_data->size); + struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc; + int ret = 0; + size_t dma_to_copy; + + KBASE_DEBUG_ASSERT(pages != NULL); + + kbase_gpu_vm_lock(kctx); + if (!gpu_alloc) { + ret = -EINVAL; + goto out_unlock; + } + + switch (gpu_alloc->type) { + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: + { + for (i = 0; i < buf_data->nr_extres_pages && + target_page_nr < buf_data->nr_pages; i++) { + struct page *pg = buf_data->extres_pages[i]; + void *extres_page = kmap(pg); + + if (extres_page) { + ret = kbase_mem_copy_to_pinned_user_pages( + pages, extres_page, &to_copy, + buf_data->nr_pages, + &target_page_nr, offset); + kunmap(pg); + if (ret) + goto out_unlock; + } + } + } + break; + case KBASE_MEM_TYPE_IMPORTED_UMM: { + struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf; + + KBASE_DEBUG_ASSERT(dma_buf != NULL); + if (dma_buf->size > buf_data->nr_extres_pages * PAGE_SIZE) + dev_warn(kctx->kbdev->dev, "External resources buffer size mismatch"); + + dma_to_copy = min(dma_buf->size, + (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); + ret = dma_buf_begin_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) + 0, dma_to_copy, +#endif + DMA_FROM_DEVICE); + if (ret) + goto out_unlock; + + for (i = 0; i < dma_to_copy/PAGE_SIZE && + target_page_nr < buf_data->nr_pages; i++) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + struct page *pg; + void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); +#else + void *extres_page = dma_buf_kmap(dma_buf, i); +#endif + if (extres_page) { + ret = kbase_mem_copy_to_pinned_user_pages( + pages, extres_page, &to_copy, + buf_data->nr_pages, + &target_page_nr, offset); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) + kunmap(pg); +#else + dma_buf_kunmap(dma_buf, i, extres_page); +#endif + if (ret) + goto out_unlock; + } + } + dma_buf_end_cpu_access(dma_buf, +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) + 0, dma_to_copy, +#endif + DMA_FROM_DEVICE); + break; + } + default: + ret = -EINVAL; + } +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +static int kbase_debug_copy(struct kbase_jd_atom *katom) +{ + struct kbase_debug_copy_buffer *buffers = katom->softjob_data; + unsigned int i; + + if (WARN_ON(!buffers)) + return -EINVAL; + + for (i = 0; i < katom->nr_extres; i++) { + int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); + + if (res) + return res; + } + + return 0; +} + +#define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) + +int kbasep_jit_alloc_validate(struct kbase_context *kctx, + struct base_jit_alloc_info *info) +{ + int j; + /* If the ID is zero, then fail the job */ + if (info->id == 0) + return -EINVAL; + + /* Sanity check that the PA fits within the VA */ + if (info->va_pages < info->commit_pages) + return -EINVAL; + + /* Ensure the GPU address is correctly aligned */ + if ((info->gpu_alloc_addr & KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT) != 0) + return -EINVAL; + + /* Interface version 2 (introduced with kernel driver version 11.5) + * onward has padding and a flags member to validate. + * + * Note: To support earlier versions the extra bytes will have been set + * to 0 by the caller. + */ + + /* Check padding is all zeroed */ + for (j = 0; j < sizeof(info->padding); j++) { + if (info->padding[j] != 0) + return -EINVAL; + } + + /* Only valid flags shall be set */ + if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) + return -EINVAL; + +#if !MALI_JIT_PRESSURE_LIMIT + /* If just-in-time memory allocation pressure limit feature is disabled, + * heap_info_gpu_addr must be zeroed-out + */ + if (info->heap_info_gpu_addr) + return -EINVAL; +#endif + + /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr + * cannot be 0 + */ + if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && + !info->heap_info_gpu_addr) + return -EINVAL; + + return 0; +} + + +#if (KERNEL_VERSION(3, 18, 63) > LINUX_VERSION_CODE) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) +#endif + +/* + * Sizes of user data to copy for each just-in-time memory interface version + * + * In interface version 2 onwards this is the same as the struct size, allowing + * copying of arrays of structures from userspace. + * + * In interface version 1 the structure size was variable, and hence arrays of + * structures cannot be supported easily, and were not a feature present in + * version 1 anyway. + */ +static const size_t jit_info_copy_size_for_jit_version[] = { + /* in jit_version 1, the structure did not have any end padding, hence + * it could be a different size on 32 and 64-bit clients. We therefore + * do not copy past the last member + */ + [1] = offsetofend(struct base_jit_alloc_info_10_2, id), + [2] = sizeof(struct base_jit_alloc_info_11_5), + [3] = sizeof(struct base_jit_alloc_info) +}; + +static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) +{ + __user u8 *data = (__user u8 *)(uintptr_t) katom->jc; + struct base_jit_alloc_info *info; + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + u32 count; + int ret; + u32 i; + size_t jit_info_user_copy_size; + + WARN_ON(kctx->jit_version >= + ARRAY_SIZE(jit_info_copy_size_for_jit_version)); + jit_info_user_copy_size = + jit_info_copy_size_for_jit_version[kctx->jit_version]; + WARN_ON(jit_info_user_copy_size > sizeof(*info)); + + /* For backwards compatibility, and to prevent reading more than 1 jit + * info struct on jit version 1 + */ + if (katom->nr_extres == 0 || kctx->jit_version == 1) + katom->nr_extres = 1; + count = katom->nr_extres; + + /* Sanity checks */ + if (!data || count > kctx->jit_max_allocations || + count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto fail; + } + + katom->softjob_data = info; + + for (i = 0; i < count; i++, info++, data += jit_info_user_copy_size) { + if (copy_from_user(info, data, jit_info_user_copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + /* Clear any remaining bytes when user struct is smaller than + * kernel struct. For jit version 1, this also clears the + * padding bytes + */ + memset(((u8 *)info) + jit_info_user_copy_size, 0, + sizeof(*info) - jit_info_user_copy_size); + + ret = kbasep_jit_alloc_validate(kctx, info); + if (ret) + goto free_info; + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom, + info->va_pages, info->commit_pages, info->extent, + info->id, info->bin_id, info->max_allocations, + info->flags, info->usage_id); + } + + katom->jit_blocked = false; + + lockdep_assert_held(&kctx->jctx.lock); + list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); + + /* + * Note: + * The provided info->gpu_alloc_addr isn't validated here as + * userland can cache allocations which means that even + * though the region is valid it doesn't represent the + * same thing it used to. + * + * Complete validation of va_pages, commit_pages and extent + * isn't done here as it will be done during the call to + * kbase_mem_alloc. + */ + return 0; + +free_info: + kfree(katom->softjob_data); + katom->softjob_data = NULL; +fail: + return ret; +} + +static u8 *kbase_jit_free_get_ids(struct kbase_jd_atom *katom) +{ + if (WARN_ON((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) != + BASE_JD_REQ_SOFT_JIT_FREE)) + return NULL; + + return (u8 *) katom->softjob_data; +} + +static void kbase_jit_add_to_pending_alloc_list(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct list_head *target_list_head = NULL; + struct kbase_jd_atom *entry; + + list_for_each_entry(entry, &kctx->jctx.jit_pending_alloc, queue) { + if (katom->age < entry->age) { + target_list_head = &entry->queue; + break; + } + } + + if (target_list_head == NULL) + target_list_head = &kctx->jctx.jit_pending_alloc; + + list_add_tail(&katom->queue, target_list_head); +} + +static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct base_jit_alloc_info *info; + struct kbase_va_region *reg; + struct kbase_vmap_struct mapping; + u64 *ptr, new_addr; + u32 count = katom->nr_extres; + u32 i; + bool ignore_pressure_limit = false; + + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); + + if (katom->jit_blocked) { + list_del(&katom->queue); + katom->jit_blocked = false; + } + + info = katom->softjob_data; + if (WARN_ON(!info)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return 0; + } + + for (i = 0; i < count; i++, info++) { + /* The JIT ID is still in use so fail the allocation */ + if (kctx->jit_alloc[info->id]) { + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } + } + +#if MALI_JIT_PRESSURE_LIMIT + /** + * If this is the only JIT_ALLOC atom in-flight then allow it to exceed + * the defined pressure limit. + */ + if (kctx->jit_current_allocations == 0) + ignore_pressure_limit = true; +#endif /* MALI_JIT_PRESSURE_LIMIT */ + + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + if (kctx->jit_alloc[info->id]) { + /* The JIT ID is duplicated in this atom. Roll back + * previous allocations and fail. + */ + u32 j; + + info = katom->softjob_data; + for (j = 0; j < i; j++, info++) { + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = + KBASE_RESERVED_REG_JIT_ALLOC; + } + + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + return 0; + } + + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info, ignore_pressure_limit); + if (!reg) { + struct kbase_jd_atom *jit_atom; + bool can_block = false; + + lockdep_assert_held(&kctx->jctx.lock); + + list_for_each_entry(jit_atom, &kctx->jctx.jit_atoms_head, jit_node) { + if (jit_atom == katom) + break; + + if ((jit_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == + BASE_JD_REQ_SOFT_JIT_FREE) { + u8 *free_ids = kbase_jit_free_get_ids(jit_atom); + + if (free_ids && *free_ids && + kctx->jit_alloc[*free_ids]) { + /* A JIT free which is active and + * submitted before this atom + */ + can_block = true; + break; + } + } + } + + if (!can_block) { + /* Mark the failed allocation as well as the + * other un-attempted allocations in the set, + * so we know they are in use even if the + * allocation itself failed. + */ + for (; i < count; i++, info++) { + kctx->jit_alloc[info->id] = + KBASE_RESERVED_REG_JIT_ALLOC; + } + + katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; + dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n", + kbase_jd_atom_id(kctx, katom)); + return 0; + } + + /* There are pending frees for an active allocation + * so we should wait to see whether they free the + * memory. Add to the list of atoms for which JIT + * allocation is pending. + */ + kbase_jit_add_to_pending_alloc_list(katom); + katom->jit_blocked = true; + + /* Rollback, the whole set will be re-attempted */ + while (i-- > 0) { + info--; + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = NULL; + } + + return 1; + } + + /* Bind it to the user provided ID. */ + kctx->jit_alloc[info->id] = reg; + } + + for (i = 0, info = katom->softjob_data; i < count; i++, info++) { + u64 entry_mmu_flags = 0; + /* + * Write the address of the JIT allocation to the user provided + * GPU allocation. + */ + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + /* + * Leave the allocations "live" as the JIT free atom + * will be submitted anyway. + */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return 0; + } + + reg = kctx->jit_alloc[info->id]; + new_addr = reg->start_pfn << PAGE_SHIFT; + *ptr = new_addr; + +#if defined(CONFIG_MALI_VECTOR_DUMP) + /* + * Retrieve the mmu flags for JIT allocation + * only if dumping is enabled + */ + entry_mmu_flags = kbase_mmu_create_ate(kbdev, + (struct tagged_addr){ 0 }, reg->flags, + MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); +#endif + + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom, + info->gpu_alloc_addr, new_addr, info->flags, + entry_mmu_flags, info->id, info->commit_pages, + info->extent, info->va_pages); + kbase_vunmap(kctx, &mapping); + + kbase_trace_jit_report_gpu_mem(kctx, reg, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + } + + katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + +static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) +{ + struct base_jit_alloc_info *info; + + lockdep_assert_held(&katom->kctx->jctx.lock); + + if (WARN_ON(!katom->softjob_data)) + return; + + /* Remove atom from jit_atoms_head list */ + list_del(&katom->jit_node); + + if (katom->jit_blocked) { + list_del(&katom->queue); + katom->jit_blocked = false; + } + + info = katom->softjob_data; + /* Free the info structure */ + kfree(info); +} + +static int kbase_jit_free_prepare(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + __user void *data = (__user void *)(uintptr_t) katom->jc; + u8 *ids; + u32 count = MAX(katom->nr_extres, 1); + u32 i; + int ret; + + /* Sanity checks */ + if (count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); + if (!ids) { + ret = -ENOMEM; + goto fail; + } + + lockdep_assert_held(&kctx->jctx.lock); + katom->softjob_data = ids; + + /* For backwards compatibility */ + if (katom->nr_extres) { + /* Fail the job if there is no list of ids */ + if (!data) { + ret = -EINVAL; + goto free_info; + } + + if (copy_from_user(ids, data, sizeof(*ids)*count) != 0) { + ret = -EINVAL; + goto free_info; + } + } else { + katom->nr_extres = 1; + *ids = (u8)katom->jc; + } + for (i = 0; i < count; i++) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]); + + list_add_tail(&katom->jit_node, &kctx->jctx.jit_atoms_head); + + return 0; + +free_info: + kfree(katom->softjob_data); + katom->softjob_data = NULL; +fail: + return ret; +} + +static void kbase_jit_free_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 *ids = kbase_jit_free_get_ids(katom); + u32 count = katom->nr_extres; + u32 i; + + if (ids == NULL) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + + for (i = 0; i < count; i++, ids++) { + /* + * If the ID is zero or it is not in use yet then fail the job. + */ + if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + return; + } + } +} + +static void kbasep_jit_finish_worker(struct work_struct *work) +{ + struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + int resched; + + mutex_lock(&kctx->jctx.lock); + kbase_finish_soft_job(katom); + resched = jd_done_nolock(katom, NULL); + mutex_unlock(&kctx->jctx.lock); + + if (resched) + kbase_js_sched_all(kctx->kbdev); +} + +void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) +{ + LIST_HEAD(jit_pending_alloc_list); + struct list_head *i, *tmp; + + list_splice_tail_init(&kctx->jctx.jit_pending_alloc, + &jit_pending_alloc_list); + + list_for_each_safe(i, tmp, &jit_pending_alloc_list) { + struct kbase_jd_atom *pending_atom = list_entry(i, + struct kbase_jd_atom, queue); + if (kbase_jit_allocate_process(pending_atom) == 0) { + /* Atom has completed */ + INIT_WORK(&pending_atom->work, + kbasep_jit_finish_worker); + queue_work(kctx->jctx.job_done_wq, &pending_atom->work); + } + } +} + +static void kbase_jit_free_finish(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + u8 *ids; + size_t j; + + lockdep_assert_held(&kctx->jctx.lock); + + ids = kbase_jit_free_get_ids(katom); + if (WARN_ON(ids == NULL)) { + return; + } + + /* Remove this atom from the jit_atoms_head list */ + list_del(&katom->jit_node); + + for (j = 0; j != katom->nr_extres; ++j) { + if ((ids[j] != 0) && (kctx->jit_alloc[ids[j]] != NULL)) { + /* + * If the ID is valid but the allocation request failed + * still succeed this soft job but don't try and free + * the allocation. + */ + if (kctx->jit_alloc[ids[j]] != + KBASE_RESERVED_REG_JIT_ALLOC) { + KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev, + kctx->jit_alloc[ids[j]]-> + gpu_alloc->nents, ids[j]); + kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]); + } + kctx->jit_alloc[ids[j]] = NULL; + } + } + /* Free the list of ids */ + kfree(ids); + + kbase_jit_retry_pending_alloc(kctx); +} + +static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) +{ + __user struct base_external_resource_list *user_ext_res; + struct base_external_resource_list *ext_res; + u64 count = 0; + size_t copy_size; + int ret; + + user_ext_res = (__user struct base_external_resource_list *) + (uintptr_t) katom->jc; + + /* Fail the job if there is no info structure */ + if (!user_ext_res) { + ret = -EINVAL; + goto fail; + } + + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { + ret = -EINVAL; + goto fail; + } + + /* Is the number of external resources in range? */ + if (!count || count > BASE_EXT_RES_COUNT_MAX) { + ret = -EINVAL; + goto fail; + } + + /* Copy the information for safe access and future storage */ + copy_size = sizeof(*ext_res); + copy_size += sizeof(struct base_external_resource) * (count - 1); + ext_res = kzalloc(copy_size, GFP_KERNEL); + if (!ext_res) { + ret = -ENOMEM; + goto fail; + } + + if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { + ret = -EINVAL; + goto free_info; + } + + /* + * Overwrite the count with the first value incase it was changed + * after the fact. + */ + ext_res->count = count; + + katom->softjob_data = ext_res; + + return 0; + +free_info: + kfree(ext_res); +fail: + return ret; +} + +static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) +{ + struct base_external_resource_list *ext_res; + int i; + bool failed = false; + + ext_res = katom->softjob_data; + if (!ext_res) + goto failed_jc; + + kbase_gpu_vm_lock(katom->kctx); + + for (i = 0; i < ext_res->count; i++) { + u64 gpu_addr; + + gpu_addr = ext_res->ext_res[i].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + if (map) { + if (!kbase_sticky_resource_acquire(katom->kctx, + gpu_addr)) + goto failed_loop; + } else + if (!kbase_sticky_resource_release_force(katom->kctx, NULL, + gpu_addr)) + failed = true; + } + + /* + * In the case of unmap we continue unmapping other resources in the + * case of failure but will always report failure if _any_ unmap + * request fails. + */ + if (failed) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + else + katom->event_code = BASE_JD_EVENT_DONE; + + kbase_gpu_vm_unlock(katom->kctx); + + return; + +failed_loop: + while (i > 0) { + u64 const gpu_addr = ext_res->ext_res[i - 1].ext_resource & + ~BASE_EXT_RES_ACCESS_EXCLUSIVE; + + kbase_sticky_resource_release_force(katom->kctx, NULL, gpu_addr); + + --i; + } + + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_vm_unlock(katom->kctx); + +failed_jc: + return; +} + +static void kbase_ext_res_finish(struct kbase_jd_atom *katom) +{ + struct base_external_resource_list *ext_res; + + ext_res = katom->softjob_data; + /* Free the info structure */ + kfree(ext_res); +} + +int kbase_process_soft_job(struct kbase_jd_atom *katom) +{ + int ret = 0; + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); + + trace_sysgraph(SGR_SUBMIT, kctx->id, + kbase_jd_atom_id(kctx, katom)); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + ret = kbase_dump_cpu_gpu_time(katom); + break; + +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + katom->event_code = kbase_sync_fence_out_trigger(katom, + katom->event_code == BASE_JD_EVENT_DONE ? + 0 : -EFAULT); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + { + ret = kbase_sync_fence_in_wait(katom); + + if (ret == 1) { +#ifdef CONFIG_MALI_FENCE_DEBUG + kbasep_add_waiting_with_timeout(katom); +#else + kbasep_add_waiting_soft_job(katom); +#endif + } + break; + } +#endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + ret = kbasep_soft_event_wait(katom); + break; + case BASE_JD_REQ_SOFT_EVENT_SET: + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); + break; + case BASE_JD_REQ_SOFT_EVENT_RESET: + kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + { + int res = kbase_debug_copy(katom); + + if (res) + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + break; + } + case BASE_JD_REQ_SOFT_JIT_ALLOC: + ret = kbase_jit_allocate_process(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_process(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_process(katom, true); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_process(katom, false); + break; + } + + /* Atom is complete */ + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); + return ret; +} + +void kbase_cancel_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_sync_fence_in_cancel_wait(katom); + break; +#endif + case BASE_JD_REQ_SOFT_EVENT_WAIT: + kbasep_soft_event_cancel_job(katom); + break; + default: + /* This soft-job doesn't support cancellation! */ + KBASE_DEBUG_ASSERT(0); + } +} + +int kbase_prepare_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + { + if (!IS_ALIGNED(katom->jc, cache_line_size())) + return -EINVAL; + } + break; +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + { + struct base_fence fence; + int fd; + + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return -EINVAL; + + fd = kbase_sync_fence_out_create(katom, + fence.basep.stream_fd); + if (fd < 0) + return -EINVAL; + + fence.basep.fd = fd; + if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { + kbase_sync_fence_out_remove(katom); + kbase_sync_fence_close_fd(fd); + fence.basep.fd = -EINVAL; + return -EINVAL; + } + } + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + { + struct base_fence fence; + int ret; + + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return -EINVAL; + + /* Get a reference to the fence object */ + ret = kbase_sync_fence_in_from_fd(katom, + fence.basep.fd); + if (ret < 0) + return ret; + +#ifdef CONFIG_MALI_DMA_FENCE + /* + * Set KCTX_NO_IMPLICIT_FENCE in the context the first + * time a soft fence wait job is observed. This will + * prevent the implicit dma-buf fence to conflict with + * the Android native sync fences. + */ + if (!kbase_ctx_flag(katom->kctx, KCTX_NO_IMPLICIT_SYNC)) + kbase_ctx_flag_set(katom->kctx, KCTX_NO_IMPLICIT_SYNC); +#endif /* CONFIG_MALI_DMA_FENCE */ + } + break; +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + case BASE_JD_REQ_SOFT_JIT_ALLOC: + return kbase_jit_allocate_prepare(katom); + case BASE_JD_REQ_SOFT_JIT_FREE: + return kbase_jit_free_prepare(katom); + case BASE_JD_REQ_SOFT_EVENT_WAIT: + case BASE_JD_REQ_SOFT_EVENT_SET: + case BASE_JD_REQ_SOFT_EVENT_RESET: + if (katom->jc == 0) + return -EINVAL; + break; + case BASE_JD_REQ_SOFT_DEBUG_COPY: + return kbase_debug_copy_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + return kbase_ext_res_prepare(katom); + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + return kbase_ext_res_prepare(katom); + default: + /* Unsupported soft-job */ + return -EINVAL; + } + return 0; +} + +void kbase_finish_soft_job(struct kbase_jd_atom *katom) +{ + trace_sysgraph(SGR_COMPLETE, katom->kctx->id, + kbase_jd_atom_id(katom->kctx, katom)); + + switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + /* Nothing to do */ + break; +#if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + /* If fence has not yet been signaled, do it now */ + kbase_sync_fence_out_trigger(katom, katom->event_code == + BASE_JD_EVENT_DONE ? 0 : -EFAULT); + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Release katom's reference to fence object */ + kbase_sync_fence_in_remove(katom); + break; +#endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */ + case BASE_JD_REQ_SOFT_DEBUG_COPY: + kbase_debug_copy_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_ALLOC: + kbase_jit_allocate_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_MAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: + kbase_ext_res_finish(katom); + break; + case BASE_JD_REQ_SOFT_JIT_FREE: + kbase_jit_free_finish(katom); + break; + } +} + +void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) +{ + LIST_HEAD(local_suspended_soft_jobs); + struct kbase_jd_atom *tmp_iter; + struct kbase_jd_atom *katom_iter; + struct kbasep_js_device_data *js_devdata; + bool resched = false; + + KBASE_DEBUG_ASSERT(kbdev); + + js_devdata = &kbdev->js_data; + + /* Move out the entire list */ + mutex_lock(&js_devdata->runpool_mutex); + list_splice_init(&js_devdata->suspended_soft_jobs_list, + &local_suspended_soft_jobs); + mutex_unlock(&js_devdata->runpool_mutex); + + /* + * Each atom must be detached from the list and ran separately - + * it could be re-added to the old list, but this is unlikely + */ + list_for_each_entry_safe(katom_iter, tmp_iter, + &local_suspended_soft_jobs, dep_item[1]) { + struct kbase_context *kctx = katom_iter->kctx; + + mutex_lock(&kctx->jctx.lock); + + /* Remove from the global list */ + list_del(&katom_iter->dep_item[1]); + /* Remove from the context's list of waiting soft jobs */ + kbasep_remove_waiting_soft_job(katom_iter); + + if (kbase_process_soft_job(katom_iter) == 0) { + kbase_finish_soft_job(katom_iter); + resched |= jd_done_nolock(katom_iter, NULL); + } + mutex_unlock(&kctx->jctx.lock); + } + + if (resched) + kbase_js_sched_all(kbdev); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c new file mode 100644 index 0000000..22caa4a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c @@ -0,0 +1,28 @@ + /* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ +#include "mali_kbase_strings.h" + +#define KBASE_DRV_NAME "mali" +#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + +const char kbase_drv_name[] = KBASE_DRV_NAME; +const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h new file mode 100644 index 0000000..d2f1825 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h @@ -0,0 +1,24 @@ +/* + * + * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +extern const char kbase_drv_name[]; +extern const char kbase_timeline_name[]; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h new file mode 100644 index 0000000..80b54d0 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -0,0 +1,223 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_sync.h + * + * This file contains our internal "API" for explicit fences. + * It hides the implementation details of the actual explicit fence mechanism + * used (Android fences or sync file with DMA fences). + */ + +#ifndef MALI_KBASE_SYNC_H +#define MALI_KBASE_SYNC_H + +#include +#ifdef CONFIG_SYNC +#include +#endif +#ifdef CONFIG_SYNC_FILE +#include "mali_kbase_fence_defs.h" +#include +#endif + +#include "mali_kbase.h" + +/** + * struct kbase_sync_fence_info - Information about a fence + * @fence: Pointer to fence (type is void*, as underlaying struct can differ) + * @name: The name given to this fence when it was created + * @status: < 0 means error, 0 means active, 1 means signaled + * + * Use kbase_sync_fence_in_info_get() or kbase_sync_fence_out_info_get() + * to get the information. + */ +struct kbase_sync_fence_info { + void *fence; + char name[32]; + int status; +}; + +/** + * kbase_sync_fence_stream_create() - Create a stream object + * @name: Name of stream (only used to ease debugging/visualization) + * @out_fd: A file descriptor representing the created stream object + * + * Can map down to a timeline implementation in some implementations. + * Exposed as a file descriptor. + * Life-time controlled via the file descriptor: + * - dup to add a ref + * - close to remove a ref + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_stream_create(const char *name, int *const out_fd); + +/** + * kbase_sync_fence_out_create Create an explicit output fence to specified atom + * @katom: Atom to assign the new explicit fence to + * @stream_fd: File descriptor for stream object to create fence on + * + * return: Valid file descriptor to fence or < 0 on error + */ +int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); + +/** + * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom + * @katom: Atom to assign the existing explicit fence to + * @fd: File descriptor to an existing fence + * + * Assigns an explicit input fence to atom. + * This can later be waited for by calling @kbase_sync_fence_in_wait + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); + +/** + * kbase_sync_fence_validate() - Validate a fd to be a valid fence + * @fd: File descriptor to check + * + * This function is only usable to catch unintentional user errors early, + * it does not stop malicious code changing the fd after this function returns. + * + * return 0: if fd is for a valid fence, < 0 if invalid + */ +int kbase_sync_fence_validate(int fd); + +/** + * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom + * @katom: Atom with an explicit fence to signal + * @result: < 0 means signal with error, 0 >= indicates success + * + * Signal output fence attached on katom and remove the fence from the atom. + * + * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE + */ +enum base_jd_event_code +kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result); + +/** + * kbase_sync_fence_in_wait() - Wait for explicit input fence to be signaled + * @katom: Atom with explicit fence to wait for + * + * If the fence is already signaled, then 0 is returned, and the caller must + * continue processing of the katom. + * + * If the fence isn't already signaled, then this kbase_sync framework will + * take responsibility to continue the processing once the fence is signaled. + * + * return: 0 if already signaled, otherwise 1 + */ +int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_in_cancel_wait() - Cancel explicit input fence waits + * @katom: Atom to cancel wait for + * + * This function is fully responsible for continuing processing of this atom + * (remove_waiting_soft_job + finish_soft_job + jd_done + js_sched_all) + */ +void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_in_remove() - Remove the input fence from the katom + * @katom: Atom to remove explicit input fence for + * + * This will also release the corresponding reference. + */ +void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_out_remove() - Remove the output fence from the katom + * @katom: Atom to remove explicit output fence for + * + * This will also release the corresponding reference. + */ +void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); + +/** + * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence + * @fd: File descriptor to close + */ +static inline void kbase_sync_fence_close_fd(int fd) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + ksys_close(fd); +#else + sys_close(fd); +#endif +} + +/** + * kbase_sync_fence_in_info_get() - Retrieves information about input fence + * @katom: Atom to get fence information from + * @info: Struct to be filled with fence information + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info); + +/** + * kbase_sync_fence_out_info_get() - Retrieves information about output fence + * @katom: Atom to get fence information from + * @info: Struct to be filled with fence information + * + * return: 0 on success, < 0 on error + */ +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info); + +#if defined(CONFIG_SYNC_FILE) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +void kbase_sync_fence_info_get(struct fence *fence, + struct kbase_sync_fence_info *info); +#else +void kbase_sync_fence_info_get(struct dma_fence *fence, + struct kbase_sync_fence_info *info); +#endif +#endif + +/** + * kbase_sync_status_string() - Get string matching @status + * @status: Value of fence status. + * + * return: Pointer to string describing @status. + */ +const char *kbase_sync_status_string(int status); + + +/* + * Internal worker used to continue processing of atom. + */ +void kbase_sync_fence_wait_worker(struct work_struct *data); + +#ifdef CONFIG_MALI_FENCE_DEBUG +/** + * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state + * @katom: Atom to trigger fence debug dump for + */ +void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); +#endif + +#endif /* MALI_KBASE_SYNC_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c new file mode 100644 index 0000000..75940fb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c @@ -0,0 +1,542 @@ +/* + * + * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Code for supporting explicit Android fences (CONFIG_SYNC) + * Known to be good for kernels 4.5 and earlier. + * Replaced with CONFIG_SYNC_FILE for 4.9 and later kernels + * (see mali_kbase_sync_file.c) + */ + +#include +#include +#include +#include +#include +#include +#include +#include "sync.h" +#include +#include + +struct mali_sync_timeline { + struct sync_timeline timeline; + atomic_t counter; + atomic_t signaled; +}; + +struct mali_sync_pt { + struct sync_pt pt; + int order; + int result; +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +/* For backwards compatibility with kernels before 3.17. After 3.17 + * sync_pt_parent is included in the kernel. */ +static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) +{ + return pt->parent; +} +#endif + +static struct mali_sync_timeline *to_mali_sync_timeline( + struct sync_timeline *timeline) +{ + return container_of(timeline, struct mali_sync_timeline, timeline); +} + +static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) +{ + return container_of(pt, struct mali_sync_pt, pt); +} + +static struct sync_pt *timeline_dup(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_pt *new_mpt; + struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), + sizeof(struct mali_sync_pt)); + + if (!new_pt) + return NULL; + + new_mpt = to_mali_sync_pt(new_pt); + new_mpt->order = mpt->order; + new_mpt->result = mpt->result; + + return new_pt; +} + +static int timeline_has_signaled(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline( + sync_pt_parent(pt)); + int result = mpt->result; + + int diff = atomic_read(&mtl->signaled) - mpt->order; + + if (diff >= 0) + return (result < 0) ? result : 1; + + return 0; +} + +static int timeline_compare(struct sync_pt *a, struct sync_pt *b) +{ + struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); + struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); + + int diff = ma->order - mb->order; + + if (diff == 0) + return 0; + + return (diff < 0) ? -1 : 1; +} + +static void timeline_value_str(struct sync_timeline *timeline, char *str, + int size) +{ + struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); + + snprintf(str, size, "%d", atomic_read(&mtl->signaled)); +} + +static void pt_value_str(struct sync_pt *pt, char *str, int size) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + + snprintf(str, size, "%d(%d)", mpt->order, mpt->result); +} + +static struct sync_timeline_ops mali_timeline_ops = { + .driver_name = "Mali", + .dup = timeline_dup, + .has_signaled = timeline_has_signaled, + .compare = timeline_compare, + .timeline_value_str = timeline_value_str, + .pt_value_str = pt_value_str, +}; + +/* Allocates a timeline for Mali + * + * One timeline should be allocated per API context. + */ +static struct sync_timeline *mali_sync_timeline_alloc(const char *name) +{ + struct sync_timeline *tl; + struct mali_sync_timeline *mtl; + + tl = sync_timeline_create(&mali_timeline_ops, + sizeof(struct mali_sync_timeline), name); + if (!tl) + return NULL; + + /* Set the counter in our private struct */ + mtl = to_mali_sync_timeline(tl); + atomic_set(&mtl->counter, 0); + atomic_set(&mtl->signaled, 0); + + return tl; +} + +static int kbase_stream_close(struct inode *inode, struct file *file) +{ + struct sync_timeline *tl; + + tl = (struct sync_timeline *)file->private_data; + sync_timeline_destroy(tl); + return 0; +} + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE, + .release = kbase_stream_close, +}; + +int kbase_sync_fence_stream_create(const char *name, int *const out_fd) +{ + struct sync_timeline *tl; + + if (!out_fd) + return -EINVAL; + + tl = mali_sync_timeline_alloc(name); + if (!tl) + return -EINVAL; + + *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY|O_CLOEXEC); + + if (*out_fd < 0) { + sync_timeline_destroy(tl); + return -EINVAL; + } + + return 0; +} + +/* Allocates a sync point within the timeline. + * + * The timeline must be the one allocated by kbase_sync_timeline_alloc + * + * Sync points must be triggered in *exactly* the same order as they are + * allocated. + */ +static struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) +{ + struct sync_pt *pt = sync_pt_create(parent, + sizeof(struct mali_sync_pt)); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); + struct mali_sync_pt *mpt; + + if (!pt) + return NULL; + + mpt = to_mali_sync_pt(pt); + mpt->order = atomic_inc_return(&mtl->counter); + mpt->result = 0; + + return pt; +} + +int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) +{ + struct sync_timeline *tl; + struct sync_pt *pt; + struct sync_fence *fence; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) + struct files_struct *files; + struct fdtable *fdt; +#endif + int fd; + struct file *tl_file; + + tl_file = fget(tl_fd); + if (tl_file == NULL) + return -EBADF; + + if (tl_file->f_op != &stream_fops) { + fd = -EBADF; + goto out; + } + + tl = tl_file->private_data; + + pt = kbase_sync_pt_alloc(tl); + if (!pt) { + fd = -EFAULT; + goto out; + } + + fence = sync_fence_create("mali_fence", pt); + if (!fence) { + sync_pt_free(pt); + fd = -EFAULT; + goto out; + } + + /* from here the fence owns the sync_pt */ + + /* create a fd representing the fence */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } +#else + fd = get_unused_fd(); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } + + files = current->files; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) + __set_close_on_exec(fd, fdt); +#else + FD_SET(fd, fdt->close_on_exec); +#endif + spin_unlock(&files->file_lock); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + + /* bind fence to the new fd */ + sync_fence_install(fence, fd); + + katom->fence = sync_fence_fdget(fd); + if (katom->fence == NULL) { + /* The only way the fence can be NULL is if userspace closed it + * for us, so we don't need to clear it up */ + fd = -EINVAL; + goto out; + } + +out: + fput(tl_file); + + return fd; +} + +int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) +{ + katom->fence = sync_fence_fdget(fd); + return katom->fence ? 0 : -ENOENT; +} + +int kbase_sync_fence_validate(int fd) +{ + struct sync_fence *fence; + + fence = sync_fence_fdget(fd); + if (!fence) + return -EINVAL; + + sync_fence_put(fence); + return 0; +} + +/* Returns true if the specified timeline is allocated by Mali */ +static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) +{ + return timeline->ops == &mali_timeline_ops; +} + +/* Signals a particular sync point + * + * Sync points must be triggered in *exactly* the same order as they are + * allocated. + * + * If they are signaled in the wrong order then a message will be printed in + * debug builds and otherwise attempts to signal order sync_pts will be ignored. + * + * result can be negative to indicate error, any other value is interpreted as + * success. + */ +static void kbase_sync_signal_pt(struct sync_pt *pt, int result) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline( + sync_pt_parent(pt)); + int signaled; + int diff; + + mpt->result = result; + + do { + signaled = atomic_read(&mtl->signaled); + + diff = signaled - mpt->order; + + if (diff > 0) { + /* The timeline is already at or ahead of this point. + * This should not happen unless userspace has been + * signaling fences out of order, so warn but don't + * violate the sync_pt API. + * The warning is only in debug builds to prevent + * a malicious user being able to spam dmesg. + */ +#ifdef CONFIG_MALI_DEBUG + pr_err("Fences were triggered in a different order to allocation!"); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + } while (atomic_cmpxchg(&mtl->signaled, + signaled, mpt->order) != signaled); +} + +enum base_jd_event_code +kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) +{ + struct sync_pt *pt; + struct sync_timeline *timeline; + + if (!katom->fence) + return BASE_JD_EVENT_JOB_CANCELLED; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + if (!list_is_singular(&katom->fence->pt_list_head)) { +#else + if (katom->fence->num_fences != 1) { +#endif + /* Not exactly one item in the list - so it didn't (directly) + * come from us */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + pt = list_first_entry(&katom->fence->pt_list_head, + struct sync_pt, pt_list); +#else + pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); +#endif + timeline = sync_pt_parent(pt); + + if (!kbase_sync_timeline_is_ours(timeline)) { + /* Fence has a sync_pt which isn't ours! */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + kbase_sync_signal_pt(pt, result); + + sync_timeline_signal(timeline); + + kbase_sync_fence_out_remove(katom); + + return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + +static inline int kbase_fence_get_status(struct sync_fence *fence) +{ + if (!fence) + return -ENOENT; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + return fence->status; +#else + return atomic_read(&fence->status); +#endif +} + +static void kbase_fence_wait_callback(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct kbase_jd_atom *katom = container_of(waiter, + struct kbase_jd_atom, sync_waiter); + struct kbase_context *kctx = katom->kctx; + + /* Propagate the fence status to the atom. + * If negative then cancel this atom and its dependencies. + */ + if (kbase_fence_get_status(fence) < 0) + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* To prevent a potential deadlock we schedule the work onto the + * job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding + * kctx->jctx.lock and the callbacks are run synchronously from + * sync_timeline_signal. So we simply defer the work. + */ + + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) +{ + int ret; + + sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); + + ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); + + if (ret == 1) { + /* Already signaled */ + return 0; + } + + if (ret < 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + + return 1; +} + +void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) +{ + if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { + /* The wait wasn't cancelled - leave the cleanup for + * kbase_fence_wait_callback */ + return; + } + + /* Wait was cancelled - zap the atoms */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) +{ + if (katom->fence) { + sync_fence_put(katom->fence); + katom->fence = NULL; + } +} + +void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) +{ + if (katom->fence) { + sync_fence_put(katom->fence); + katom->fence = NULL; + } +} + +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ + if (!katom->fence) + return -ENOENT; + + info->fence = katom->fence; + info->status = kbase_fence_get_status(katom->fence); + strlcpy(info->name, katom->fence->name, sizeof(info->name)); + + return 0; +} + +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ + if (!katom->fence) + return -ENOENT; + + info->fence = katom->fence; + info->status = kbase_fence_get_status(katom->fence); + strlcpy(info->name, katom->fence->name, sizeof(info->name)); + + return 0; +} + +#ifdef CONFIG_MALI_FENCE_DEBUG +void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) +{ + /* Dump out the full state of all the Android sync fences. + * The function sync_dump() isn't exported to modules, so force + * sync_fence_wait() to time out to trigger sync_dump(). + */ + if (katom->fence) + sync_fence_wait(katom->fence, 1); +} +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c new file mode 100644 index 0000000..2e1ede5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c @@ -0,0 +1,49 @@ +/* + * + * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * @file mali_kbase_sync_common.c + * + * Common code for our explicit fence functionality + */ + +#include +#include "mali_kbase.h" +#include "mali_kbase_sync.h" + +void kbase_sync_fence_wait_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom; + + katom = container_of(data, struct kbase_jd_atom, work); + kbase_soft_event_wait_callback(katom); +} + +const char *kbase_sync_status_string(int status) +{ + if (status == 0) + return "active"; + else if (status > 0) + return "signaled"; + else + return "error"; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c new file mode 100644 index 0000000..0679c48 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c @@ -0,0 +1,366 @@ +/* + * + * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Code for supporting explicit Linux fences (CONFIG_SYNC_FILE) + * Introduced in kernel 4.9. + * Android explicit fences (CONFIG_SYNC) can be used for older kernels + * (see mali_kbase_sync_android.c) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mali_kbase_fence_defs.h" +#include "mali_kbase_sync.h" +#include "mali_kbase_fence.h" +#include "mali_kbase.h" + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE +}; + +int kbase_sync_fence_stream_create(const char *name, int *const out_fd) +{ + if (!out_fd) + return -EINVAL; + + *out_fd = anon_inode_getfd(name, &stream_fops, NULL, + O_RDONLY | O_CLOEXEC); + if (*out_fd < 0) + return -EINVAL; + + return 0; +} + +int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct sync_file *sync_file; + int fd; + + fence = kbase_fence_out_new(katom); + if (!fence) + return -ENOMEM; + +#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) + /* Take an extra reference to the fence on behalf of the sync_file. + * This is only needed on older kernels where sync_file_create() + * does not take its own reference. This was changed in v4.9.68, + * where sync_file_create() now takes its own reference. + */ + dma_fence_get(fence); +#endif + + /* create a sync_file fd representing the fence */ + sync_file = sync_file_create(fence); + if (!sync_file) { +#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) + dma_fence_put(fence); +#endif + kbase_fence_out_remove(katom); + return -ENOMEM; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + fput(sync_file->file); + kbase_fence_out_remove(katom); + return fd; + } + + fd_install(fd, sync_file->file); + + return fd; +} + +int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = sync_file_get_fence(fd); +#else + struct dma_fence *fence = sync_file_get_fence(fd); +#endif + + if (!fence) + return -ENOENT; + + kbase_fence_fence_in_set(katom, fence); + + return 0; +} + +int kbase_sync_fence_validate(int fd) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence = sync_file_get_fence(fd); +#else + struct dma_fence *fence = sync_file_get_fence(fd); +#endif + + if (!fence) + return -EINVAL; + + dma_fence_put(fence); + + return 0; /* valid */ +} + +enum base_jd_event_code +kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) +{ + int res; + + if (!kbase_fence_out_is_ours(katom)) { + /* Not our fence */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + res = kbase_fence_out_signal(katom, result); + if (unlikely(res < 0)) { + dev_warn(katom->kctx->kbdev->dev, + "fence_signal() failed with %d\n", res); + } + + kbase_sync_fence_out_remove(katom); + + return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +static void kbase_fence_wait_callback(struct fence *fence, + struct fence_cb *cb) +#else +static void kbase_fence_wait_callback(struct dma_fence *fence, + struct dma_fence_cb *cb) +#endif +{ + struct kbase_fence_cb *kcb = container_of(cb, + struct kbase_fence_cb, + fence_cb); + struct kbase_jd_atom *katom = kcb->katom; + struct kbase_context *kctx = katom->kctx; + + /* Cancel atom if fence is erroneous */ +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) + if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) +#else + if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) +#endif + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + if (kbase_fence_dep_count_dec_and_test(katom)) { + /* We take responsibility of handling this */ + kbase_fence_dep_count_set(katom, -1); + + /* To prevent a potential deadlock we schedule the work onto the + * job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding + * kctx->jctx.lock and the callbacks are run synchronously from + * sync_timeline_signal. So we simply defer the work. + */ + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); + } +} + +int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) +{ + int err; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + fence = kbase_fence_in_get(katom); + if (!fence) + return 0; /* no input fence to wait for, good to go! */ + + kbase_fence_dep_count_set(katom, 1); + + err = kbase_fence_add_callback(katom, fence, kbase_fence_wait_callback); + + kbase_fence_put(fence); + + if (likely(!err)) { + /* Test if the callbacks are already triggered */ + if (kbase_fence_dep_count_dec_and_test(katom)) { + kbase_fence_free_callbacks(katom); + kbase_fence_dep_count_set(katom, -1); + return 0; /* Already signaled, good to go right now */ + } + + /* Callback installed, so we just need to wait for it... */ + } else { + /* Failure */ + kbase_fence_free_callbacks(katom); + kbase_fence_dep_count_set(katom, -1); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* We should cause the dependent jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + + INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + } + + return 1; /* completion to be done later by callback/worker */ +} + +void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) +{ + if (!kbase_fence_free_callbacks(katom)) { + /* The wait wasn't cancelled - + * leave the cleanup for kbase_fence_wait_callback */ + return; + } + + /* Take responsibility of completion */ + kbase_fence_dep_count_set(katom, -1); + + /* Wait was cancelled - zap the atoms */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + kbasep_remove_waiting_soft_job(katom); + kbase_finish_soft_job(katom); + + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(katom->kctx->kbdev); +} + +void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom) +{ + kbase_fence_out_remove(katom); +} + +void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) +{ + kbase_fence_free_callbacks(katom); + kbase_fence_in_remove(katom); +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +void kbase_sync_fence_info_get(struct fence *fence, + struct kbase_sync_fence_info *info) +#else +void kbase_sync_fence_info_get(struct dma_fence *fence, + struct kbase_sync_fence_info *info) +#endif +{ + info->fence = fence; + + /* translate into CONFIG_SYNC status: + * < 0 : error + * 0 : active + * 1 : signaled + */ + if (dma_fence_is_signaled(fence)) { +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ + (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) + int status = fence->error; +#else + int status = fence->status; +#endif + if (status < 0) + info->status = status; /* signaled with error */ + else + info->status = 1; /* signaled with success */ + } else { + info->status = 0; /* still active (unsignaled) */ + } + +#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) + scnprintf(info->name, sizeof(info->name), "%u#%u", + fence->context, fence->seqno); +#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + scnprintf(info->name, sizeof(info->name), "%llu#%u", + fence->context, fence->seqno); +#else + scnprintf(info->name, sizeof(info->name), "%llu#%llu", + fence->context, fence->seqno); +#endif +} + +int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + fence = kbase_fence_in_get(katom); + if (!fence) + return -ENOENT; + + kbase_sync_fence_info_get(fence, info); + + kbase_fence_put(fence); + + return 0; +} + +int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, + struct kbase_sync_fence_info *info) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + fence = kbase_fence_out_get(katom); + if (!fence) + return -ENOENT; + + kbase_sync_fence_info_get(fence, info); + + kbase_fence_put(fence); + + return 0; +} + + +#ifdef CONFIG_MALI_FENCE_DEBUG +void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) +{ + /* Not implemented */ +} +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h new file mode 100644 index 0000000..8d4f044 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h @@ -0,0 +1,55 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#ifndef _KBASE_UTILITY_H +#define _KBASE_UTILITY_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +static inline void kbase_timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *timer)) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) + setup_timer(timer, (void (*)(unsigned long)) callback, + (unsigned long) timer); +#else + timer_setup(timer, callback, 0); +#endif +} + +#ifndef WRITE_ONCE + #ifdef ASSIGN_ONCE + #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) + #else + #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) + #endif +#endif + +#ifndef READ_ONCE + #define READ_ONCE(x) ACCESS_ONCE(x) +#endif + +#endif /* _KBASE_UTILITY_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c new file mode 100644 index 0000000..d96b565 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -0,0 +1,996 @@ +/* + * + * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_vinstr.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase_hwcnt_reader.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_ioctl.h" +#include "mali_malisw.h" +#include "mali_kbase_debug.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Hwcnt reader API version */ +#define HWCNT_READER_API 1 + +/* The minimum allowed interval between dumps (equivalent to 10KHz) */ +#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC) + +/* The maximum allowed buffers per client */ +#define MAX_BUFFER_COUNT 32 + +/** + * struct kbase_vinstr_context - IOCTL interface for userspace hardware + * counters. + * @hvirt: Hardware counter virtualizer used by vinstr. + * @metadata: Hardware counter metadata provided by virtualizer. + * @lock: Lock protecting all vinstr state. + * @suspend_count: Suspend reference count. If non-zero, timer and worker are + * prevented from being re-scheduled. + * @client_count: Number of vinstr clients. + * @clients: List of vinstr clients. + * @dump_timer: Timer that enqueues dump_work to a workqueue. + * @dump_work: Worker for performing periodic counter dumps. + */ +struct kbase_vinstr_context { + struct kbase_hwcnt_virtualizer *hvirt; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t suspend_count; + size_t client_count; + struct list_head clients; + struct hrtimer dump_timer; + struct work_struct dump_work; +}; + +/** + * struct kbase_vinstr_client - A vinstr client attached to a vinstr context. + * @vctx: Vinstr context client is attached to. + * @hvcli: Hardware counter virtualizer client. + * @node: Node used to attach this client to list in vinstr + * context. + * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic + * client. + * @next_dump_time_ns: Time in ns when this client's next periodic dump must + * occur. If 0, not a periodic client. + * @enable_map: Counters enable map. + * @dump_bufs: Array of dump buffers allocated by this client. + * @dump_bufs_meta: Metadata of dump buffers. + * @meta_idx: Index of metadata being accessed by userspace. + * @read_idx: Index of buffer read by userspace. + * @write_idx: Index of buffer being written by dump worker. + * @waitq: Client's notification queue. + */ +struct kbase_vinstr_client { + struct kbase_vinstr_context *vctx; + struct kbase_hwcnt_virtualizer_client *hvcli; + struct list_head node; + u64 next_dump_time_ns; + u32 dump_interval_ns; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer_array dump_bufs; + struct kbase_hwcnt_reader_metadata *dump_bufs_meta; + atomic_t meta_idx; + atomic_t read_idx; + atomic_t write_idx; + wait_queue_head_t waitq; +}; + +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait); + +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg); + +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma); + +static int kbasep_vinstr_hwcnt_reader_release( + struct inode *inode, + struct file *filp); + +/* Vinstr client file operations */ +static const struct file_operations vinstr_client_fops = { + .owner = THIS_MODULE, + .poll = kbasep_vinstr_hwcnt_reader_poll, + .unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, + .compat_ioctl = kbasep_vinstr_hwcnt_reader_ioctl, + .mmap = kbasep_vinstr_hwcnt_reader_mmap, + .release = kbasep_vinstr_hwcnt_reader_release, +}; + +/** + * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds. + * + * Return: Current time in nanoseconds. + */ +static u64 kbasep_vinstr_timestamp_ns(void) +{ + return ktime_get_raw_ns(); +} + +/** + * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time. + * @cur_ts_ns: Current time in nanoseconds. + * @interval: Interval between dumps in nanoseconds. + * + * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump + * time that occurs after cur_ts_ns. + */ +static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval) +{ + /* Non-periodic client */ + if (interval == 0) + return 0; + + /* + * Return the next interval after the current time relative to t=0. + * This means multiple clients with the same period will synchronise, + * regardless of when they were started, allowing the worker to be + * scheduled less frequently. + */ + do_div(cur_ts_ns, interval); + return (cur_ts_ns + 1) * interval; +} + +/** + * kbasep_vinstr_client_dump() - Perform a dump for a client. + * @vcli: Non-NULL pointer to a vinstr client. + * @event_id: Event type that triggered the dump. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_dump( + struct kbase_vinstr_client *vcli, + enum base_hwcnt_reader_event event_id) +{ + int errcode; + u64 ts_start_ns; + u64 ts_end_ns; + unsigned int write_idx; + unsigned int read_idx; + struct kbase_hwcnt_dump_buffer *dump_buf; + struct kbase_hwcnt_reader_metadata *meta; + + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); + + write_idx = atomic_read(&vcli->write_idx); + read_idx = atomic_read(&vcli->read_idx); + + /* Check if there is a place to copy HWC block into. */ + if (write_idx - read_idx == vcli->dump_bufs.buf_cnt) + return -EBUSY; + write_idx %= vcli->dump_bufs.buf_cnt; + + dump_buf = &vcli->dump_bufs.bufs[write_idx]; + meta = &vcli->dump_bufs_meta[write_idx]; + + errcode = kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); + if (errcode) + return errcode; + + /* Patch the dump buf headers, to hide the counters that other hwcnt + * clients are using. + */ + kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); + + /* Zero all non-enabled counters (current values are undefined) */ + kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); + + meta->timestamp = ts_end_ns; + meta->event_id = event_id; + meta->buffer_idx = write_idx; + + /* Notify client. Make sure all changes to memory are visible. */ + wmb(); + atomic_inc(&vcli->write_idx); + wake_up_interruptible(&vcli->waitq); + return 0; +} + +/** + * kbasep_vinstr_client_clear() - Reset all the client's counters to zero. + * @vcli: Non-NULL pointer to a vinstr client. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli) +{ + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!vcli); + lockdep_assert_held(&vcli->vctx->lock); + + /* A virtualizer dump with a NULL buffer will just clear the virtualizer + * client's buffer. + */ + return kbase_hwcnt_virtualizer_client_dump( + vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL); +} + +/** + * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic + * vinstr clients, then reschedule the dump + * worker appropriately. + * @vctx: Non-NULL pointer to the vinstr context. + * + * If there are no periodic clients, then the dump worker will not be + * rescheduled. Else, the dump worker will be rescheduled for the next periodic + * client dump. + */ +static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx) +{ + u64 cur_ts_ns; + u64 earliest_next_ns = U64_MAX; + struct kbase_vinstr_client *pos; + + WARN_ON(!vctx); + lockdep_assert_held(&vctx->lock); + + cur_ts_ns = kbasep_vinstr_timestamp_ns(); + + /* + * Update each client's next dump time, and find the earliest next + * dump time if any of the clients have a non-zero interval. + */ + list_for_each_entry(pos, &vctx->clients, node) { + const u64 cli_next_ns = + kbasep_vinstr_next_dump_time_ns( + cur_ts_ns, pos->dump_interval_ns); + + /* Non-zero next dump time implies a periodic client */ + if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns)) + earliest_next_ns = cli_next_ns; + + pos->next_dump_time_ns = cli_next_ns; + } + + /* Cancel the timer if it is already pending */ + hrtimer_cancel(&vctx->dump_timer); + + /* Start the timer if there are periodic clients and vinstr is not + * suspended. + */ + if ((earliest_next_ns != U64_MAX) && + (vctx->suspend_count == 0) && + !WARN_ON(earliest_next_ns < cur_ts_ns)) + hrtimer_start( + &vctx->dump_timer, + ns_to_ktime(earliest_next_ns - cur_ts_ns), + HRTIMER_MODE_REL); +} + +/** + * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients + * that need to be dumped, then reschedules itself. + * @work: Work structure. + */ +static void kbasep_vinstr_dump_worker(struct work_struct *work) +{ + struct kbase_vinstr_context *vctx = + container_of(work, struct kbase_vinstr_context, dump_work); + struct kbase_vinstr_client *pos; + u64 cur_time_ns; + + mutex_lock(&vctx->lock); + + cur_time_ns = kbasep_vinstr_timestamp_ns(); + + /* Dump all periodic clients whose next dump time is before the current + * time. + */ + list_for_each_entry(pos, &vctx->clients, node) { + if ((pos->next_dump_time_ns != 0) && + (pos->next_dump_time_ns < cur_time_ns)) + kbasep_vinstr_client_dump( + pos, BASE_HWCNT_READER_EVENT_PERIODIC); + } + + /* Update the next dump times of all periodic clients, then reschedule + * this worker at the earliest next dump time. + */ + kbasep_vinstr_reschedule_worker(vctx); + + mutex_unlock(&vctx->lock); +} + +/** + * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for + * execution as soon as possible. + * @timer: Timer structure. + */ +static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) +{ + struct kbase_vinstr_context *vctx = + container_of(timer, struct kbase_vinstr_context, dump_timer); + + /* We don't need to check vctx->suspend_count here, as the suspend + * function will ensure that any worker enqueued here is immediately + * cancelled, and the worker itself won't reschedule this timer if + * suspend_count != 0. + */ +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + return HRTIMER_NORESTART; +} + +/** + * kbasep_vinstr_client_destroy() - Destroy a vinstr client. + * @vcli: vinstr client. Must not be attached to a vinstr context. + */ +static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) +{ + if (!vcli) + return; + + kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); + kfree(vcli->dump_bufs_meta); + kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); + kbase_hwcnt_enable_map_free(&vcli->enable_map); + kfree(vcli); +} + +/** + * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to + * the vinstr context. + * @vctx: Non-NULL pointer to vinstr context. + * @setup: Non-NULL pointer to hardware counter ioctl setup structure. + * setup->buffer_count must not be 0. + * @out_vcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_client_create( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup, + struct kbase_vinstr_client **out_vcli) +{ + int errcode; + struct kbase_vinstr_client *vcli; + struct kbase_hwcnt_physical_enable_map phys_em; + + WARN_ON(!vctx); + WARN_ON(!setup); + WARN_ON(setup->buffer_count == 0); + + vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); + if (!vcli) + return -ENOMEM; + + vcli->vctx = vctx; + + errcode = kbase_hwcnt_enable_map_alloc( + vctx->metadata, &vcli->enable_map); + if (errcode) + goto error; + + phys_em.jm_bm = setup->jm_bm; + phys_em.shader_bm = setup->shader_bm; + phys_em.tiler_bm = setup->tiler_bm; + phys_em.mmu_l2_bm = setup->mmu_l2_bm; + kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); + + errcode = kbase_hwcnt_dump_buffer_array_alloc( + vctx->metadata, setup->buffer_count, &vcli->dump_bufs); + if (errcode) + goto error; + + errcode = -ENOMEM; + vcli->dump_bufs_meta = kmalloc_array( + setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL); + if (!vcli->dump_bufs_meta) + goto error; + + errcode = kbase_hwcnt_virtualizer_client_create( + vctx->hvirt, &vcli->enable_map, &vcli->hvcli); + if (errcode) + goto error; + + init_waitqueue_head(&vcli->waitq); + + *out_vcli = vcli; + return 0; +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; +} + +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx) +{ + struct kbase_vinstr_context *vctx; + const struct kbase_hwcnt_metadata *metadata; + + if (!hvirt || !out_vctx) + return -EINVAL; + + metadata = kbase_hwcnt_virtualizer_metadata(hvirt); + if (!metadata) + return -EINVAL; + + vctx = kzalloc(sizeof(*vctx), GFP_KERNEL); + if (!vctx) + return -ENOMEM; + + vctx->hvirt = hvirt; + vctx->metadata = metadata; + + mutex_init(&vctx->lock); + INIT_LIST_HEAD(&vctx->clients); + hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + vctx->dump_timer.function = kbasep_vinstr_dump_timer; + INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker); + + *out_vctx = vctx; + return 0; +} + +void kbase_vinstr_term(struct kbase_vinstr_context *vctx) +{ + if (!vctx) + return; + + cancel_work_sync(&vctx->dump_work); + + /* Non-zero client count implies client leak */ + if (WARN_ON(vctx->client_count != 0)) { + struct kbase_vinstr_client *pos, *n; + + list_for_each_entry_safe(pos, n, &vctx->clients, node) { + list_del(&pos->node); + vctx->client_count--; + kbasep_vinstr_client_destroy(pos); + } + } + + WARN_ON(vctx->client_count != 0); + kfree(vctx); +} + +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) +{ + if (WARN_ON(!vctx)) + return; + + mutex_lock(&vctx->lock); + + if (!WARN_ON(vctx->suspend_count == SIZE_MAX)) + vctx->suspend_count++; + + mutex_unlock(&vctx->lock); + + /* Always sync cancel the timer and then the worker, regardless of the + * new suspend count. + * + * This ensures concurrent calls to kbase_vinstr_suspend() always block + * until vinstr is fully suspended. + * + * The timer is cancelled before the worker, as the timer + * unconditionally re-enqueues the worker, but the worker checks the + * suspend_count that we just incremented before rescheduling the timer. + * + * Therefore if we cancel the worker first, the timer might re-enqueue + * the worker before we cancel the timer, but the opposite is not + * possible. + */ + hrtimer_cancel(&vctx->dump_timer); + cancel_work_sync(&vctx->dump_work); +} + +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) +{ + if (WARN_ON(!vctx)) + return; + + mutex_lock(&vctx->lock); + + if (!WARN_ON(vctx->suspend_count == 0)) { + vctx->suspend_count--; + + /* Last resume, so re-enqueue the worker if we have any periodic + * clients. + */ + if (vctx->suspend_count == 0) { + struct kbase_vinstr_client *pos; + bool has_periodic_clients = false; + + list_for_each_entry(pos, &vctx->clients, node) { + if (pos->dump_interval_ns != 0) { + has_periodic_clients = true; + break; + } + } + + if (has_periodic_clients) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &vctx->dump_work); +#else + queue_work(system_highpri_wq, &vctx->dump_work); +#endif + } + } + + mutex_unlock(&vctx->lock); +} + +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vctx, + struct kbase_ioctl_hwcnt_reader_setup *setup) +{ + int errcode; + int fd; + struct kbase_vinstr_client *vcli = NULL; + + if (!vctx || !setup || + (setup->buffer_count == 0) || + (setup->buffer_count > MAX_BUFFER_COUNT)) + return -EINVAL; + + errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); + if (errcode) + goto error; + + /* Add the new client. No need to reschedule worker, as not periodic */ + mutex_lock(&vctx->lock); + + vctx->client_count++; + list_add(&vcli->node, &vctx->clients); + + mutex_unlock(&vctx->lock); + + /* Expose to user-space only once the client is fully initialized */ + errcode = anon_inode_getfd( + "[mali_vinstr_desc]", + &vinstr_client_fops, + vcli, + O_RDONLY | O_CLOEXEC); + if (errcode < 0) + goto client_installed_error; + + fd = errcode; + + return fd; + +client_installed_error: + mutex_lock(&vctx->lock); + + vctx->client_count--; + list_del(&vcli->node); + + mutex_unlock(&vctx->lock); +error: + kbasep_vinstr_client_destroy(vcli); + return errcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready + * buffers. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: Non-zero if client has at least one dumping buffer filled that was + * not notified to user yet. + */ +static int kbasep_vinstr_hwcnt_reader_buffer_ready( + struct kbase_vinstr_client *cli) +{ + WARN_ON(!cli); + return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx); +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_dump( + struct kbase_vinstr_client *cli) +{ + int errcode; + + mutex_lock(&cli->vctx->lock); + + errcode = kbasep_vinstr_client_dump( + cli, BASE_HWCNT_READER_EVENT_MANUAL); + + mutex_unlock(&cli->vctx->lock); + return errcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_clear( + struct kbase_vinstr_client *cli) +{ + int errcode; + + mutex_lock(&cli->vctx->lock); + + errcode = kbasep_vinstr_client_clear(cli); + + mutex_unlock(&cli->vctx->lock); + return errcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) +{ + unsigned int meta_idx = atomic_read(&cli->meta_idx); + unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; + + struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; + + /* Metadata sanity check. */ + WARN_ON(idx != meta->buffer_idx); + + if (sizeof(struct kbase_hwcnt_reader_metadata) != size) + return -EINVAL; + + /* Check if there is any buffer available. */ + if (atomic_read(&cli->write_idx) == meta_idx) + return -EAGAIN; + + /* Check if previously taken buffer was put back. */ + if (atomic_read(&cli->read_idx) != meta_idx) + return -EBUSY; + + /* Copy next available buffer's metadata to user. */ + if (copy_to_user(buffer, meta, size)) + return -EFAULT; + + atomic_inc(&cli->meta_idx); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @buffer: Non-NULL pointer to userspace buffer. + * @size: Size of buffer. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + struct kbase_vinstr_client *cli, + void __user *buffer, + size_t size) +{ + unsigned int read_idx = atomic_read(&cli->read_idx); + unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; + + struct kbase_hwcnt_reader_metadata meta; + + if (sizeof(struct kbase_hwcnt_reader_metadata) != size) + return -EINVAL; + + /* Check if any buffer was taken. */ + if (atomic_read(&cli->meta_idx) == read_idx) + return -EPERM; + + /* Check if correct buffer is put back. */ + if (copy_from_user(&meta, buffer, size)) + return -EFAULT; + if (idx != meta.buffer_idx) + return -EINVAL; + + atomic_inc(&cli->read_idx); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @interval: Periodic dumping interval (disable periodic dumping if 0). + * + * Return: 0 always. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + struct kbase_vinstr_client *cli, + u32 interval) +{ + mutex_lock(&cli->vctx->lock); + + if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS)) + interval = DUMP_INTERVAL_MIN_NS; + /* Update the interval, and put in a dummy next dump time */ + cli->dump_interval_ns = interval; + cli->next_dump_time_ns = 0; + + /* + * If it's a periodic client, kick off the worker early to do a proper + * timer reschedule. Return value is ignored, as we don't care if the + * worker is already queued. + */ + if ((interval != 0) && (cli->vctx->suspend_count == 0)) +#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE + queue_work(system_wq, &cli->vctx->dump_work); +#else + queue_work(system_highpri_wq, &cli->vctx->dump_work); +#endif + + mutex_unlock(&cli->vctx->lock); + + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to enable. + * + * Return: 0 always. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) +{ + /* No-op, as events aren't supported */ + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl + * command. + * @cli: Non-NULL pointer to vinstr client. + * @event_id: ID of event to disable. + * + * Return: 0 always. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + struct kbase_vinstr_client *cli, + enum base_hwcnt_reader_event event_id) +{ + /* No-op, as events aren't supported */ + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command. + * @cli: Non-NULL pointer to vinstr client. + * @hwver: Non-NULL pointer to user buffer where HW version will be stored. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + struct kbase_vinstr_client *cli, + u32 __user *hwver) +{ + u32 ver = 0; + const enum kbase_hwcnt_gpu_group_type type = + kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); + + switch (type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V4: + ver = 4; + break; + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + ver = 5; + break; + default: + WARN_ON(true); + } + + if (ver != 0) { + return put_user(ver, hwver); + } else { + return -EINVAL; + } +} + +/** + * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl. + * @filp: Non-NULL pointer to file structure. + * @cmd: User command. + * @arg: Command's argument. + * + * Return: 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long arg) +{ + long rcode; + struct kbase_vinstr_client *cli; + + if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER)) + return -EINVAL; + + cli = filp->private_data; + if (!cli) + return -EINVAL; + + switch (cmd) { + case KBASE_HWCNT_READER_GET_API_VERSION: + rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); + break; + case KBASE_HWCNT_READER_GET_HWVER: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( + cli, (u32 __user *)arg); + break; + case KBASE_HWCNT_READER_GET_BUFFER_SIZE: + rcode = put_user( + (u32)cli->vctx->metadata->dump_buf_bytes, + (u32 __user *)arg); + break; + case KBASE_HWCNT_READER_DUMP: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); + break; + case KBASE_HWCNT_READER_CLEAR: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); + break; + case KBASE_HWCNT_READER_GET_BUFFER: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; + case KBASE_HWCNT_READER_PUT_BUFFER: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( + cli, (void __user *)arg, _IOC_SIZE(cmd)); + break; + case KBASE_HWCNT_READER_SET_INTERVAL: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( + cli, (u32)arg); + break; + case KBASE_HWCNT_READER_ENABLE_EVENT: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; + case KBASE_HWCNT_READER_DISABLE_EVENT: + rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( + cli, (enum base_hwcnt_reader_event)arg); + break; + default: + pr_warn("Unknown HWCNT ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); + rcode = -EINVAL; + break; + } + + return rcode; +} + +/** + * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll. + * @filp: Non-NULL pointer to file structure. + * @wait: Non-NULL pointer to poll table. + * + * Return: POLLIN if data can be read without blocking, 0 if data can not be + * read without blocking, else error code. + */ +static unsigned int kbasep_vinstr_hwcnt_reader_poll( + struct file *filp, + poll_table *wait) +{ + struct kbase_vinstr_client *cli; + + if (!filp || !wait) + return -EINVAL; + + cli = filp->private_data; + if (!cli) + return -EINVAL; + + poll_wait(filp, &cli->waitq, wait); + if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli)) + return POLLIN; + return 0; +} + +/** + * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap. + * @filp: Non-NULL pointer to file structure. + * @vma: Non-NULL pointer to vma structure. + * + * Return: 0 on success, else error code. + */ +static int kbasep_vinstr_hwcnt_reader_mmap( + struct file *filp, + struct vm_area_struct *vma) +{ + struct kbase_vinstr_client *cli; + unsigned long vm_size, size, addr, pfn, offset; + + if (!filp || !vma) + return -EINVAL; + + cli = filp->private_data; + if (!cli) + return -EINVAL; + + vm_size = vma->vm_end - vma->vm_start; + size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; + + if (vma->vm_pgoff > (size >> PAGE_SHIFT)) + return -EINVAL; + + offset = vma->vm_pgoff << PAGE_SHIFT; + if (vm_size > size - offset) + return -EINVAL; + + addr = __pa(cli->dump_bufs.page_addr + offset); + pfn = addr >> PAGE_SHIFT; + + return remap_pfn_range( + vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot); +} + +/** + * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release. + * @inode: Non-NULL pointer to inode structure. + * @filp: Non-NULL pointer to file structure. + * + * Return: 0 always. + */ +static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, + struct file *filp) +{ + struct kbase_vinstr_client *vcli = filp->private_data; + + mutex_lock(&vcli->vctx->lock); + + vcli->vctx->client_count--; + list_del(&vcli->node); + + mutex_unlock(&vcli->vctx->lock); + + kbasep_vinstr_client_destroy(vcli); + + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h new file mode 100644 index 0000000..81d315f --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -0,0 +1,91 @@ +/* + * + * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * Vinstr, used to provide an ioctl for userspace access to periodic hardware + * counters. + */ + +#ifndef _KBASE_VINSTR_H_ +#define _KBASE_VINSTR_H_ + +struct kbase_vinstr_context; +struct kbase_hwcnt_virtualizer; +struct kbase_ioctl_hwcnt_reader_setup; + +/** + * kbase_vinstr_init() - Initialise a vinstr context. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr + * context will be stored on success. + * + * On creation, the suspend count of the context will be 0. + * + * Return: 0 on success, else error code. + */ +int kbase_vinstr_init( + struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_vinstr_context **out_vctx); + +/** + * kbase_vinstr_term() - Terminate a vinstr context. + * @vctx: Pointer to the vinstr context to be terminated. + */ +void kbase_vinstr_term(struct kbase_vinstr_context *vctx); + +/** + * kbase_vinstr_suspend() - Increment the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be suspended. + * + * After this function call returns, it is guaranteed that all timers and + * workers in vinstr will be cancelled, and will not be re-triggered until + * after the context has been resumed. In effect, this means no new counter + * dumps will occur for any existing or subsequently added periodic clients. + */ +void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx); + +/** + * kbase_vinstr_resume() - Decrement the suspend count of the context. + * @vctx: Non-NULL pointer to the vinstr context to be resumed. + * + * If a call to this function decrements the suspend count from 1 to 0, then + * normal operation of vinstr will be resumed (i.e. counter dumps will once + * again be automatically triggered for all periodic clients). + * + * It is only valid to call this function one time for each prior returned call + * to kbase_vinstr_suspend. + */ +void kbase_vinstr_resume(struct kbase_vinstr_context *vctx); + +/** + * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader + * client. + * @vinstr_ctx: Non-NULL pointer to the vinstr context. + * @setup: Non-NULL pointer to the hwcnt reader configuration. + * + * Return: file descriptor on success, else a (negative) error code. + */ +int kbase_vinstr_hwcnt_reader_setup( + struct kbase_vinstr_context *vinstr_ctx, + struct kbase_ioctl_hwcnt_reader_setup *setup); + +#endif /* _KBASE_VINSTR_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h new file mode 100644 index 0000000..f618755 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -0,0 +1,532 @@ +/* + * + * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali + +#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_H + +#include + +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#define MALI_JOB_SLOTS_EVENT_CHANGED + +/** + * mali_job_slots_event - Reports change of job slot status. + * @gpu_id: Kbase device id + * @event_id: ORed together bitfields representing a type of event, + * made with the GATOR_MAKE_EVENT() macro. + */ +TRACE_EVENT(mali_job_slots_event, + TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid, + u8 job_id), + TP_ARGS(gpu_id, event_id, tgid, pid, job_id), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(u32, event_id) + __field(u32, tgid) + __field(u32, pid) + __field(u8, job_id) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + __entry->tgid = tgid; + __entry->pid = pid; + __entry->job_id = job_id; + ), + TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u", + __entry->gpu_id, __entry->event_id, + __entry->tgid, __entry->pid, __entry->job_id) +); + +/** + * mali_pm_status - Reports change of power management status. + * @gpu_id: Kbase device id + * @event_id: Core type (shader, tiler, L2 cache) + * @value: 64bits bitmask reporting either power status of + * the cores (1-ON, 0-OFF) + */ +TRACE_EVENT(mali_pm_status, + TP_PROTO(u32 gpu_id, u32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(u32, event_id) + __field(u64, value) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("gpu=%u event %u = %llu", + __entry->gpu_id, __entry->event_id, __entry->value) +); + +/** + * mali_page_fault_insert_pages - Reports an MMU page fault + * resulting in new pages being mapped. + * @gpu_id: Kbase device id + * @event_id: MMU address space number + * @value: Number of newly allocated pages + */ +TRACE_EVENT(mali_page_fault_insert_pages, + TP_PROTO(u32 gpu_id, s32 event_id, u64 value), + TP_ARGS(gpu_id, event_id, value), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(s32, event_id) + __field(u64, value) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("gpu=%u event %d = %llu", + __entry->gpu_id, __entry->event_id, __entry->value) +); + +/** + * mali_total_alloc_pages_change - Reports that the total number of + * allocated pages has changed. + * @gpu_id: Kbase device id + * @event_id: Total number of pages allocated + */ +TRACE_EVENT(mali_total_alloc_pages_change, + TP_PROTO(u32 gpu_id, s64 event_id), + TP_ARGS(gpu_id, event_id), + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(s64, event_id) + ), + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->event_id = event_id; + ), + TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id) +); +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +/* + * MMU subsystem tracepoints + */ + +/* Fault status and exception code helpers + * + * Must be macros to allow use by user-side tracepoint tools + * + * bits 0:1 masked off code, and used for the level + * + * Tracepoint files get included more than once - protect against multiple + * definition + */ +#ifndef __TRACE_MALI_MMU_HELPERS +#define __TRACE_MALI_MMU_HELPERS +/* Complex macros should be enclosed in parenthesis. + * + * We need to have those parentheses removed for our arrays of symbolic look-ups + * for __print_symbolic() whilst also being able to use them outside trace code + */ +#define _ENSURE_PARENTHESIS(args...) args + +#define KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT(code) \ + (!KBASE_MMU_FAULT_CODE_VALID(code) ? "UNKNOWN,level=" : \ + __print_symbolic(((code) & ~3u), \ + KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS)) +#define KBASE_MMU_FAULT_CODE_LEVEL(code) \ + (((((code) & ~0x3u) == 0xC4) ? 4 : 0) + ((code) & 0x3u)) + +#define KBASE_MMU_FAULT_STATUS_CODE(status) \ + ((status) & 0xFFu) +#define KBASE_MMU_FAULT_STATUS_DECODED_STRING(status) \ + (((status) & (1u << 10)) ? "DECODER_FAULT" : "SLAVE_FAULT") + +#define KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(status) \ + KBASE_MMU_FAULT_CODE_EXCEPTION_NAME_PRINT( \ + KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_LEVEL(status) \ + KBASE_MMU_FAULT_CODE_LEVEL(KBASE_MMU_FAULT_STATUS_CODE(status)) + +#define KBASE_MMU_FAULT_STATUS_ACCESS(status) \ + ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) +#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ + {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ + {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) +#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ + __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ + KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) + +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEF) && \ + (!(code >= 0xC5 && code <= 0xC6)) && \ + (!(code >= 0xCC && code <= 0xCF)) && \ + (!(code >= 0xD4 && code <= 0xD7)) && \ + (!(code >= 0xDC && code <= 0xDF))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {0xC0, "TRANSLATION_FAULT_" }, \ + {0xC4, "TRANSLATION_FAULT(_7==_IDENTITY)_" }, \ + {0xC8, "PERMISSION_FAULT_" }, \ + {0xD0, "TRANSTAB_BUS_FAULT_" }, \ + {0xD8, "ACCESS_FLAG_" }, \ + {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ + {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) +#endif /* __TRACE_MALI_MMU_HELPERS */ + +/* trace_mali_mmu_page_fault_grow + * + * Tracepoint about a successful grow of a region due to a GPU page fault + */ +TRACE_EVENT(mali_mmu_page_fault_grow, + TP_PROTO(struct kbase_va_region *reg, struct kbase_fault *fault, + size_t new_pages), + TP_ARGS(reg, fault, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, fault_addr) + __field(u64, fault_extra_addr) + __field(size_t, new_pages) + __field(u32, status) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->fault_addr = fault->addr; + __entry->fault_extra_addr = fault->extra_addr; + __entry->new_pages = new_pages; + __entry->status = fault->status; + ), + TP_printk("start=0x%llx fault_addr=0x%llx fault_extra_addr=0x%llx new_pages=%zu raw_fault_status=0x%x decoded_faultstatus=%s exception_type=0x%x,%s%u access_type=0x%x,%s source_id=0x%x", + __entry->start_addr, __entry->fault_addr, + __entry->fault_extra_addr, __entry->new_pages, + __entry->status, + KBASE_MMU_FAULT_STATUS_DECODED_STRING(__entry->status), + KBASE_MMU_FAULT_STATUS_CODE(__entry->status), + KBASE_MMU_FAULT_STATUS_EXCEPTION_NAME_PRINT(__entry->status), + KBASE_MMU_FAULT_STATUS_LEVEL(__entry->status), + KBASE_MMU_FAULT_STATUS_ACCESS(__entry->status) >> 8, + KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(__entry->status), + __entry->status >> 16) +); + + + + +/* + * Just-in-time memory allocation subsystem tracepoints + */ + +/* Just-in-time memory allocation soft-job template. Override the TP_printk + * further if need be. jit_id can be 0. + */ +DECLARE_EVENT_CLASS(mali_jit_softjob_template, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, nr_pages) + __field(size_t, backed_pages) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->nr_pages = reg->nr_pages; + __entry->backed_pages = kbase_reg_current_backed_size(reg); + __entry->jit_id = jit_id; + ), + TP_printk("jit_id=%u start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->jit_id, __entry->start_addr, __entry->nr_pages, + __entry->backed_pages) +); + +/* trace_mali_jit_alloc() + * + * Tracepoint about a just-in-time memory allocation soft-job successfully + * allocating memory + */ +DEFINE_EVENT(mali_jit_softjob_template, mali_jit_alloc, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id)); + +/* trace_mali_jit_free() + * + * Tracepoint about memory that was allocated just-in-time being freed + * (which may happen either on free soft-job, or during rollback error + * paths of an allocation soft-job, etc) + * + * Free doesn't immediately have the just-in-time memory allocation ID so + * it's currently suppressed from the output - set jit_id to 0 + */ +DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, + TP_PROTO(struct kbase_va_region *reg, u8 jit_id), + TP_ARGS(reg, jit_id), + TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", + __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); + +#if MALI_JIT_PRESSURE_LIMIT +/* trace_mali_jit_report + * + * Tracepoint about the GPU data structure read to form a just-in-time memory + * allocation report, and its calculated physical page usage + */ +TRACE_EVENT(mali_jit_report, + TP_PROTO(struct kbase_jd_atom *katom, struct kbase_va_region *reg, + unsigned int id_idx, u64 read_val, u64 used_pages), + TP_ARGS(katom, reg, id_idx, read_val, used_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, read_val) + __field(u64, used_pages) + __field(unsigned long, flags) + __field(u8, id_idx) + __field(u8, jit_id) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->read_val = read_val; + __entry->used_pages = used_pages; + __entry->flags = reg->flags; + __entry->id_idx = id_idx; + __entry->jit_id = katom->jit_ids[id_idx]; + ), + TP_printk("start=0x%llx jit_ids[%u]=%u read_type='%s' read_val=0x%llx used_pages=%llu", + __entry->start_addr, __entry->id_idx, __entry->jit_id, + __print_symbolic(__entry->flags, + { 0, "address"}, + { KBASE_REG_TILER_ALIGN_TOP, "address with align" }, + { KBASE_REG_HEAP_INFO_IS_SIZE, "size" }, + { KBASE_REG_HEAP_INFO_IS_SIZE | + KBASE_REG_TILER_ALIGN_TOP, + "size with align (invalid)" } + ), + __entry->read_val, __entry->used_pages) +); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE) +TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); +#endif + +#if MALI_JIT_PRESSURE_LIMIT +/* trace_mali_jit_report_pressure + * + * Tracepoint about change in physical memory pressure, due to the information + * about a region changing. Examples include: + * - a report on a region that was allocated just-in-time + * - just-in-time allocation of a region + * - free of a region that was allocated just-in-time + */ +TRACE_EVENT(mali_jit_report_pressure, + TP_PROTO(struct kbase_va_region *reg, u64 new_used_pages, + u64 new_pressure, unsigned int flags), + TP_ARGS(reg, new_used_pages, new_pressure, flags), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(u64, used_pages) + __field(u64, new_used_pages) + __field(u64, new_pressure) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->used_pages = reg->used_pages; + __entry->new_used_pages = new_used_pages; + __entry->new_pressure = new_pressure; + __entry->flags = flags; + ), + TP_printk("start=0x%llx old_used_pages=%llu new_used_pages=%llu new_pressure=%llu report_flags=%s", + __entry->start_addr, __entry->used_pages, + __entry->new_used_pages, __entry->new_pressure, + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); +#endif /* MALI_JIT_PRESSURE_LIMIT */ + +#ifndef __TRACE_SYSGRAPH_ENUM +#define __TRACE_SYSGRAPH_ENUM +/* Enum of sysgraph message IDs */ +enum sysgraph_msg { + SGR_ARRIVE, + SGR_DEP_RES, + SGR_SUBMIT, + SGR_COMPLETE, + SGR_POST, + SGR_ACTIVE, + SGR_INACTIVE +}; +#endif /* __TRACE_SYSGRAPH_ENUM */ + +/* A template for SYSGRAPH events + * + * Most of the sysgraph events contain only one input argument + * which is atom_id therefore they will be using a common template + */ +TRACE_EVENT(sysgraph, + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, + unsigned int atom_id), + TP_ARGS(message, proc_id, atom_id), + TP_STRUCT__entry( + __field(unsigned int, proc_id) + __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) + ), + TP_fast_assign( + __entry->proc_id = proc_id; + __entry->message = message; + __entry->atom_id = atom_id; + ), + TP_printk("msg=%u proc_id=%u, param1=%d\n", __entry->message, + __entry->proc_id, __entry->atom_id) +); + +/* A template for SYSGRAPH GPU events + * + * Sysgraph events that record start/complete events + * on GPU also record a js value in addition to the + * atom id. + */ +TRACE_EVENT(sysgraph_gpu, + TP_PROTO(enum sysgraph_msg message, unsigned int proc_id, + unsigned int atom_id, unsigned int js), + TP_ARGS(message, proc_id, atom_id, js), + TP_STRUCT__entry( + __field(unsigned int, proc_id) + __field(enum sysgraph_msg, message) + __field(unsigned int, atom_id) + __field(unsigned int, js) + ), + TP_fast_assign( + __entry->proc_id = proc_id; + __entry->message = message; + __entry->atom_id = atom_id; + __entry->js = js; + ), + TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d\n", + __entry->message, __entry->proc_id, + __entry->atom_id, __entry->js) +); + +/* Tracepoint files get included more than once - protect against multiple + * definition + */ +#undef KBASE_JIT_REPORT_GPU_MEM_SIZE + +/* Size in bytes of the memory surrounding the location used for a just-in-time + * memory allocation report + */ +#define KBASE_JIT_REPORT_GPU_MEM_SIZE (4 * sizeof(u64)) + +/* trace_mali_jit_report_gpu_mem + * + * Tracepoint about the GPU memory nearby the location used for a just-in-time + * memory allocation report + */ +TRACE_EVENT(mali_jit_report_gpu_mem, + TP_PROTO(u64 base_addr, u64 reg_addr, u64 *gpu_mem, unsigned int flags), + TP_ARGS(base_addr, reg_addr, gpu_mem, flags), + TP_STRUCT__entry( + __field(u64, base_addr) + __field(u64, reg_addr) + __array(u64, mem_values, + KBASE_JIT_REPORT_GPU_MEM_SIZE / sizeof(u64)) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->base_addr = base_addr; + __entry->reg_addr = reg_addr; + memcpy(__entry->mem_values, gpu_mem, + sizeof(__entry->mem_values)); + __entry->flags = flags; + ), + TP_printk("start=0x%llx read GPU memory base=0x%llx values=%s report_flags=%s", + __entry->reg_addr, __entry->base_addr, + __print_array(__entry->mem_values, + ARRAY_SIZE(__entry->mem_values), sizeof(u64)), + __print_flags(__entry->flags, "|", + { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, + "HAPPENED_ON_ALLOC_OR_FREE" })) +); + +/* trace_mali_jit_trim_from_region + * + * Tracepoint about trimming physical pages from a region + */ +TRACE_EVENT(mali_jit_trim_from_region, + TP_PROTO(struct kbase_va_region *reg, size_t freed_pages, + size_t old_pages, size_t available_pages, size_t new_pages), + TP_ARGS(reg, freed_pages, old_pages, available_pages, new_pages), + TP_STRUCT__entry( + __field(u64, start_addr) + __field(size_t, freed_pages) + __field(size_t, old_pages) + __field(size_t, available_pages) + __field(size_t, new_pages) + ), + TP_fast_assign( + __entry->start_addr = ((u64)reg->start_pfn) << PAGE_SHIFT; + __entry->freed_pages = freed_pages; + __entry->old_pages = old_pages; + __entry->available_pages = available_pages; + __entry->new_pages = new_pages; + ), + TP_printk("start=0x%llx freed_pages=%zu old_pages=%zu available_pages=%zu new_pages=%zu", + __entry->start_addr, __entry->freed_pages, __entry->old_pages, + __entry->available_pages, __entry->new_pages) +); + +/* trace_mali_jit_trim + * + * Tracepoint about total trimmed physical pages + */ +TRACE_EVENT(mali_jit_trim, + TP_PROTO(size_t freed_pages), + TP_ARGS(freed_pages), + TP_STRUCT__entry( + __field(size_t, freed_pages) + ), + TP_fast_assign( + __entry->freed_pages = freed_pages; + ), + TP_printk("freed_pages=%zu", __entry->freed_pages) +); + +#include "mali_kbase_debug_linux_ktrace.h" + +#endif /* _TRACE_MALI_H */ + +#undef TRACE_INCLUDE_PATH +/* lwn.net/Articles/383362 suggests this should remain as '.', and instead + * extend CFLAGS + */ +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_linux_trace + +/* This part must be outside protection */ +#include diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h new file mode 100644 index 0000000..3a4db10 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_malisw.h @@ -0,0 +1,109 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Kernel-wide include for common macros and types. + */ + +#ifndef _MALISW_H_ +#define _MALISW_H_ + +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) +#endif /* LINUX_VERSION_CODE */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) +#define SIZE_MAX (~(size_t)0) +#endif /* LINUX_VERSION_CODE */ + +/** + * MIN - Return the lesser of two values. + * + * As a macro it may evaluate its arguments more than once. + * Refer to MAX macro for more details + */ +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +/** + * MAX - Return the greater of two values. + * + * As a macro it may evaluate its arguments more than once. + * If called on the same two arguments as MIN it is guaranteed to return + * the one that MIN didn't return. This is significant for types where not + * all values are comparable e.g. NaNs in floating-point types. But if you want + * to retrieve the min and max of two values, consider using a conditional swap + * instead. + */ +#define MAX(x, y) ((x) < (y) ? (y) : (x)) + +/** + * @hideinitializer + * Function-like macro for suppressing unused variable warnings. Where possible + * such variables should be removed; this macro is present for cases where we + * much support API backwards compatibility. + */ +#define CSTD_UNUSED(x) ((void)(x)) + +/** + * @hideinitializer + * Function-like macro for use where "no behavior" is desired. This is useful + * when compile time macros turn a function-like macro in to a no-op, but + * where having no statement is otherwise invalid. + */ +#define CSTD_NOP(...) ((void)#__VA_ARGS__) + +/** + * @hideinitializer + * Function-like macro for stringizing a single level macro. + * @code + * #define MY_MACRO 32 + * CSTD_STR1( MY_MACRO ) + * > "MY_MACRO" + * @endcode + */ +#define CSTD_STR1(x) #x + +/** + * @hideinitializer + * Function-like macro for stringizing a macro's value. This should not be used + * if the macro is defined in a way which may have no value; use the + * alternative @c CSTD_STR2N macro should be used instead. + * @code + * #define MY_MACRO 32 + * CSTD_STR2( MY_MACRO ) + * > "32" + * @endcode + */ +#define CSTD_STR2(x) CSTD_STR1(x) + +#endif /* _MALISW_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_uk.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_uk.h similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mali_uk.h rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_uk.h diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c new file mode 100644 index 0000000..2d8fb51 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c @@ -0,0 +1,424 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Base kernel MMU management specific for Job Manager GPU. + */ + +#include +#include +#include +#include +#include +#include "../mali_kbase_mmu_internal.h" +#include "mali_kbase_device_internal.h" + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup) +{ + /* Set up the required caching policies at the correct indices + * in the memattr register. + */ + setup->memattr = + (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_AARCH64_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + (AS_MEMATTR_AARCH64_NON_CACHEABLE << + (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)); + + setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; + setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; +} + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault) +{ + struct kbase_device *const kbdev = kctx->kbdev; + u32 const status = fault->status; + u32 const exception_type = (status & 0xFF); + u32 const exception_data = (status >> 8) & 0xFFFFFF; + int const as_no = as->number; + unsigned long flags; + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "GPU bus fault in AS%d at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "exception data 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + status, + exception_type, kbase_gpu_exception_name(exception_type), + exception_data, + kctx->pid); + + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter + * dumping AS transaction begin + */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Set the MMU into unmapped mode */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); +} + +/** + * The caller must ensure it's retained the ctx to prevent it from being + * scheduled out whilst it's being worked on. + */ +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault) +{ + unsigned long flags; + u32 exception_type; + u32 access_type; + u32 source_id; + int as_no; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + + as_no = as->number; + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + + /* Make sure the context was active */ + if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) + return; + + /* decode the fault status */ + exception_type = fault->status & 0xFF; + access_type = (fault->status >> 8) & 0x3; + source_id = (fault->status >> 16); + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + reason_str, + fault->status, + exception_type, kbase_gpu_exception_name(exception_type), + access_type, kbase_gpu_access_type_name(fault->status), + source_id, + kctx->pid); + + /* hardware counters dump fault handling */ + if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) { + if ((fault->addr >= kbdev->hwcnt.addr) && + (fault->addr < (kbdev->hwcnt.addr + + kbdev->hwcnt.addr_bytes))) + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + } + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled - this will occur on releasing the context's refcount + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Kill any running jobs from the context. Submit is disallowed, so no + * more jobs from this context can appear in the job slots from this + * point on + */ + kbase_backend_jm_kill_running_jobs_from_kctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* switch to UNMAPPED mode, will abort all jobs and stop + * any hw counter dumping + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* AS transaction end */ + /* Clear down the fault */ + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); +} + +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + dev_dbg(kbdev->dev, + "Entering %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); + + if (!kctx) { + dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", + kbase_as_has_bus_fault(as, fault) ? + "Bus error" : "Page fault", + as->number, fault->addr); + + /* Since no ctx was found, the MMU must be disabled. */ + WARN_ON(as->current_setup.transtab); + + if (kbase_as_has_bus_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + } else if (kbase_as_has_page_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } + + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + /* + * hw counters dumping in progress, signal the + * other thread that it failed + */ + if ((kbdev->hwcnt.kctx == kctx) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.backend.state = + KBASE_INSTR_STATE_FAULT; + + /* + * Stop the kctx from submitting more jobs and cause it + * to be scheduled out/rescheduled when all references + * to it are released + */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, fault->addr, + fault->extra_addr); + else + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", + as->number, fault->addr); + + /* + * We need to switch to UNMAPPED mode - but we do this in a + * worker so that we can sleep + */ + WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); + atomic_inc(&kbdev->faults_pending); + } else { + WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); + atomic_inc(&kbdev->faults_pending); + } + + dev_dbg(kbdev->dev, + "Leaving %s kctx %p, as %p\n", + __func__, (void *)kctx, (void *)as); +} + +static void validate_protected_page_fault(struct kbase_device *kbdev) +{ + /* GPUs which support (native) protected mode shall not report page + * fault addresses unless it has protected debug mode and protected + * debug mode is turned on + */ + u32 protected_debug_mode = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) { + protected_debug_mode = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_STATUS)) & GPU_DBGEN; + } + + if (!protected_debug_mode) { + /* fault_addr should never be reported in protected mode. + * However, we just continue by printing an error message + */ + dev_err(kbdev->dev, "Fault address reported in protected mode\n"); + } +} + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) +{ + const int num_as = 16; + const int busfault_shift = MMU_PAGE_FAULT_FLAGS; + const int pf_shift = 0; + const unsigned long as_bit_mask = (1UL << num_as) - 1; + unsigned long flags; + u32 new_mask; + u32 tmp, bf_bits, pf_bits; + bool gpu_lost = false; + + dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", + __func__, irq_stat); + /* bus faults */ + bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; + /* page faults (note: Ignore ASes with both pf and bf) */ + pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + + if (WARN_ON(kbdev == NULL)) + return; + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (bf_bits | pf_bits) { + struct kbase_as *as; + int as_no; + struct kbase_context *kctx; + struct kbase_fault *fault; + + /* + * the while logic ensures we have a bit set, no need to check + * for not-found here + */ + as_no = ffs(bf_bits | pf_bits) - 1; + as = &kbdev->as[as_no]; + + /* find the fault type */ + if (bf_bits & (1 << as_no)) + fault = &as->bf_data; + else + fault = &as->pf_data; + + /* + * Refcount the kctx ASAP - it shouldn't disappear anyway, since + * Bus/Page faults _should_ only occur whilst jobs are running, + * and a job causing the Bus/Page fault shouldn't complete until + * the MMU is updated + */ + kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); + + /* find faulting address */ + fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI)); + fault->addr <<= 32; + fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO)); + /* Mark the fault protected or not */ + fault->protected_mode = kbdev->protected_mode; + + if (kbdev->protected_mode && fault->addr) { + /* check if address reporting is allowed */ + validate_protected_page_fault(kbdev); + } + + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + + /* record the fault status */ + fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTSTATUS)); + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + } + + /* check if we still have GPU */ + gpu_lost = kbase_is_gpu_lost(kbdev); + if (gpu_lost) { + if (kctx) + kbasep_js_runpool_release_ctx(kbdev, kctx); + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + /* Mark bus fault as handled. + * Note that a bus fault is processed first in case + * where both a bus fault and page fault occur. + */ + bf_bits &= ~(1UL << as_no); + + /* remove the queued BF (and PF) from the mask */ + new_mask &= ~(MMU_BUS_ERROR(as_no) | + MMU_PAGE_FAULT(as_no)); + } else { + /* Mark page fault as handled */ + pf_bits &= ~(1UL << as_no); + + /* remove the queued PF from the mask */ + new_mask &= ~MMU_PAGE_FAULT(as_no); + } + + /* Process the interrupt for this address space */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as, fault); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", + __func__, irq_stat); +} + +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + dev_dbg(kctx->kbdev->dev, + "Switching to incremental rendering for region %p\n", + (void *)reg); + return kbase_job_slot_softstop_start_rp(kctx, reg); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c new file mode 100644 index 0000000..c2c7c4b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c @@ -0,0 +1,2205 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * @file mali_kbase_mmu.c + * Base kernel MMU management. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KBASE_MMU_PAGE_ENTRIES 512 + +/** + * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * + * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. + * + * If sync is not set then transactions still in flight when the flush is issued + * may use the old page tables and the data they write will not be written out + * to memory, this function returns after the flush has been issued but + * before all accesses which might effect the flushed region have completed. + * + * If sync is set then accesses in the flushed region will be drained + * before data is flush and invalidated through L1, L2 and into memory, + * after which point this function will return. + */ +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync); + +/** + * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches. + * @kbdev: Device pointer. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * @as_nr: GPU address space number for which flush + invalidate is required. + * + * This is used for MMU tables which do not belong to a user space context. + */ +static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr); + +/** + * kbase_mmu_sync_pgd - sync page directory to memory + * @kbdev: Device pointer. + * @handle: Address of DMA region. + * @size: Size of the region to sync. + * + * This should be called after each page directory update. + */ + +static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, + dma_addr_t handle, size_t size) +{ + /* If page table is not coherent then ensure the gpu can read + * the pages from memory + */ + if (kbdev->system_coherency != COHERENCY_ACE) + dma_sync_single_for_device(kbdev->dev, handle, size, + DMA_TO_DEVICE); +} + +/* + * Definitions: + * - PGD: Page Directory. + * - PTE: Page Table Entry. A 64bit value pointing to the next + * level of translation + * - ATE: Address Translation Entry. A 64bit value pointing to + * a 4kB physical page. + */ + +static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id); + +/** + * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to + * a region on a GPU page fault + * + * @reg: The region that will be backed with more pages + * @fault_rel_pfn: PFN of the fault relative to the start of the region + * + * This calculates how much to increase the backing of a region by, based on + * where a GPU page fault occurred and the flags in the region. + * + * This can be more than the minimum number of pages that would reach + * @fault_rel_pfn, for example to reduce the overall rate of page fault + * interrupts on a region, or to ensure that the end address is aligned. + * + * Return: the number of backed pages to increase by + */ +static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, + struct kbase_va_region *reg, size_t fault_rel_pfn) +{ + size_t multiple = reg->extent; + size_t reg_current_size = kbase_reg_current_backed_size(reg); + size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; + size_t remainder; + + if (!multiple) { + dev_warn(kbdev->dev, + "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); + return minimum_extra; + } + + /* Calculate the remainder to subtract from minimum_extra to make it + * the desired (rounded down) multiple of the extent. + * Depending on reg's flags, the base used for calculating multiples is + * different + */ + if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + /* multiple is based from the top of the initial commit, which + * has been allocated in such a way that (start_pfn + + * initial_commit) is already aligned to multiple. Hence the + * pfn for the end of committed memory will also be aligned to + * multiple + */ + size_t initial_commit = reg->initial_commit; + + if (fault_rel_pfn < initial_commit) { + /* this case is just to catch in case it's been + * recommitted by userspace to be smaller than the + * initial commit + */ + minimum_extra = initial_commit - reg_current_size; + remainder = 0; + } else { + /* same as calculating + * (fault_rel_pfn - initial_commit + 1) + */ + size_t pages_after_initial = minimum_extra + + reg_current_size - initial_commit; + + remainder = pages_after_initial % multiple; + } + } else { + /* multiple is based from the current backed size, even if the + * current backed size/pfn for end of committed memory are not + * themselves aligned to multiple + */ + remainder = minimum_extra % multiple; + } + + if (remainder == 0) + return minimum_extra; + + return minimum_extra + multiple - remainder; +} + +#ifdef CONFIG_MALI_CINSTR_GWT +static void kbase_gpu_mmu_handle_write_faulting_as( + struct kbase_device *kbdev, + struct kbase_as *faulting_as, + u64 start_pfn, size_t nr, u32 op) +{ + mutex_lock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_do_operation(kbdev, faulting_as, start_pfn, + nr, op, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); +} + +static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx, + struct kbase_as *faulting_as) +{ + struct kbasep_gwt_list_element *pos; + struct kbase_va_region *region; + struct kbase_device *kbdev; + struct kbase_fault *fault; + u64 fault_pfn, pfn_offset; + u32 op; + int ret; + int as_no; + + as_no = faulting_as->number; + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + fault = &faulting_as->pf_data; + fault_pfn = fault->addr >> PAGE_SHIFT; + + kbase_gpu_vm_lock(kctx); + + /* Find region and check if it should be writable. */ + region = kbase_region_tracker_find_region_enclosing_address(kctx, + fault->addr); + if (kbase_is_region_invalid_or_free(region)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not mapped on the GPU", + &faulting_as->pf_data); + return; + } + + if (!(region->flags & KBASE_REG_GPU_WR)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Region does not have write permissions", + &faulting_as->pf_data); + return; + } + + /* Capture addresses of faulting write location + * for job dumping if write tracking is enabled. + */ + if (kctx->gwt_enabled) { + u64 page_addr = fault->addr & PAGE_MASK; + bool found = false; + /* Check if this write was already handled. */ + list_for_each_entry(pos, &kctx->gwt_current_list, link) { + if (page_addr == pos->page_addr) { + found = true; + break; + } + } + + if (!found) { + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (pos) { + pos->region = region; + pos->page_addr = page_addr; + pos->num_pages = 1; + list_add(&pos->link, &kctx->gwt_current_list); + } else { + dev_warn(kbdev->dev, "kmalloc failure"); + } + } + } + + pfn_offset = fault_pfn - region->start_pfn; + /* Now make this faulting page writable to GPU. */ + ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + 1, region->flags, region->gpu_alloc->group_id); + + /* flush L2 and unlock the VA (resumes the MMU) */ + op = AS_COMMAND_FLUSH_PT; + + kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, + fault_pfn, 1, op); + + kbase_gpu_vm_unlock(kctx); +} + +static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, + struct kbase_as *faulting_as) +{ + struct kbase_fault *fault = &faulting_as->pf_data; + + switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) { + case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); + break; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Execute Permission fault", fault); + break; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Read Permission fault", fault); + break; + default: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown Permission fault", fault); + break; + } +} +#endif + +#define MAX_POOL_LEVEL 2 + +/** + * page_fault_try_alloc - Try to allocate memory from a context pool + * @kctx: Context pointer + * @region: Region to grow + * @new_pages: Number of 4 kB pages to allocate + * @pages_to_grow: Pointer to variable to store number of outstanding pages on + * failure. This can be either 4 kB or 2 MB pages, depending on + * the number of pages requested. + * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true + * for 2 MB, false for 4 kB. + * @prealloc_sas: Pointer to kbase_sub_alloc structures + * + * This function will try to allocate as many pages as possible from the context + * pool, then if required will try to allocate the remaining pages from the + * device pool. + * + * This function will not allocate any new memory beyond that that is already + * present in the context or device pools. This is because it is intended to be + * called with the vm_lock held, which could cause recursive locking if the + * allocation caused the out-of-memory killer to run. + * + * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be + * a count of 2 MB pages, otherwise it will be a count of 4 kB pages. + * + * Return: true if successful, false on failure + */ +static bool page_fault_try_alloc(struct kbase_context *kctx, + struct kbase_va_region *region, size_t new_pages, + int *pages_to_grow, bool *grow_2mb_pool, + struct kbase_sub_alloc **prealloc_sas) +{ + struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL}; + struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL}; + size_t pages_alloced[MAX_POOL_LEVEL] = {0}; + struct kbase_mem_pool *pool, *root_pool; + int pool_level = 0; + bool alloc_failed = false; + size_t pages_still_required; + + if (WARN_ON(region->gpu_alloc->group_id >= + MEMORY_GROUP_MANAGER_NR_GROUPS)) { + /* Do not try to grow the memory pool */ + *pages_to_grow = 0; + return false; + } + +#ifdef CONFIG_MALI_2MB_ALLOC + if (new_pages >= (SZ_2M / SZ_4K)) { + root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; + *grow_2mb_pool = true; + } else { +#endif + root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; + *grow_2mb_pool = false; +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + + if (region->gpu_alloc != region->cpu_alloc) + new_pages *= 2; + + pages_still_required = new_pages; + + /* Determine how many pages are in the pools before trying to allocate. + * Don't attempt to allocate & free if the allocation can't succeed. + */ + for (pool = root_pool; pool != NULL; pool = pool->next_pool) { + size_t pool_size_4k; + + kbase_mem_pool_lock(pool); + + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + if (pool_size_4k >= pages_still_required) + pages_still_required = 0; + else + pages_still_required -= pool_size_4k; + + kbase_mem_pool_unlock(pool); + + if (!pages_still_required) + break; + } + + if (pages_still_required) { + /* Insufficient pages in pools. Don't try to allocate - just + * request a grow. + */ + *pages_to_grow = pages_still_required; + + return false; + } + + /* Since we've dropped the pool locks, the amount of memory in the pools + * may change between the above check and the actual allocation. + */ + pool = root_pool; + for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) { + size_t pool_size_4k; + size_t pages_to_alloc_4k; + size_t pages_to_alloc_4k_per_alloc; + + kbase_mem_pool_lock(pool); + + /* Allocate as much as possible from this pool*/ + pool_size_4k = kbase_mem_pool_size(pool) << pool->order; + pages_to_alloc_4k = MIN(new_pages, pool_size_4k); + if (region->gpu_alloc == region->cpu_alloc) + pages_to_alloc_4k_per_alloc = pages_to_alloc_4k; + else + pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1; + + pages_alloced[pool_level] = pages_to_alloc_4k; + if (pages_to_alloc_4k) { + gpu_pages[pool_level] = + kbase_alloc_phy_pages_helper_locked( + region->gpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[0]); + + if (!gpu_pages[pool_level]) { + alloc_failed = true; + } else if (region->gpu_alloc != region->cpu_alloc) { + cpu_pages[pool_level] = + kbase_alloc_phy_pages_helper_locked( + region->cpu_alloc, pool, + pages_to_alloc_4k_per_alloc, + &prealloc_sas[1]); + + if (!cpu_pages[pool_level]) + alloc_failed = true; + } + } + + kbase_mem_pool_unlock(pool); + + if (alloc_failed) { + WARN_ON(!new_pages); + WARN_ON(pages_to_alloc_4k >= new_pages); + WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages); + break; + } + + new_pages -= pages_to_alloc_4k; + + if (!new_pages) + break; + + pool = pool->next_pool; + if (!pool) + break; + } + + if (new_pages) { + /* Allocation was unsuccessful */ + int max_pool_level = pool_level; + + pool = root_pool; + + /* Free memory allocated so far */ + for (pool_level = 0; pool_level <= max_pool_level; + pool_level++) { + kbase_mem_pool_lock(pool); + + if (region->gpu_alloc != region->cpu_alloc) { + if (pages_alloced[pool_level] && + cpu_pages[pool_level]) + kbase_free_phy_pages_helper_locked( + region->cpu_alloc, + pool, cpu_pages[pool_level], + pages_alloced[pool_level]); + } + + if (pages_alloced[pool_level] && gpu_pages[pool_level]) + kbase_free_phy_pages_helper_locked( + region->gpu_alloc, + pool, gpu_pages[pool_level], + pages_alloced[pool_level]); + + kbase_mem_pool_unlock(pool); + + pool = pool->next_pool; + } + + /* + * If the allocation failed despite there being enough memory in + * the pool, then just fail. Otherwise, try to grow the memory + * pool. + */ + if (alloc_failed) + *pages_to_grow = 0; + else + *pages_to_grow = new_pages; + + return false; + } + + /* Allocation was successful. No pages to grow, return success. */ + *pages_to_grow = 0; + + return true; +} + +/* Small wrapper function to factor out GPU-dependent context releasing */ +static void release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + kbasep_js_runpool_release_ctx(kbdev, kctx); +} + +void page_fault_worker(struct work_struct *data) +{ + u64 fault_pfn; + u32 fault_status; + size_t new_pages; + size_t fault_rel_pfn; + struct kbase_as *faulting_as; + int as_no; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_va_region *region; + struct kbase_fault *fault; + int err; + bool grown = false; + int pages_to_grow; + bool grow_2mb_pool; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; + size_t current_backed_size; + + + faulting_as = container_of(data, struct kbase_as, work_pagefault); + fault = &faulting_as->pf_data; + fault_pfn = fault->addr >> PAGE_SHIFT; + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + dev_dbg(kbdev->dev, + "Entering %s %p, fault_pfn %lld, as_no %d\n", + __func__, (void *)data, fault_pfn, as_no); + + /* Grab the context that was already refcounted in kbase_mmu_interrupt() + * Therefore, it cannot be scheduled out of this AS until we explicitly + * release it + */ + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); + if (!kctx) { + atomic_dec(&kbdev->faults_pending); + return; + } + + KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + + if (unlikely(fault->protected_mode)) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Protected mode fault", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + + goto fault_done; + } + + fault_status = fault->status; + switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { + + case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: + /* need to check against the region to handle this one */ + break; + + case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: +#ifdef CONFIG_MALI_CINSTR_GWT + /* If GWT was ever enabled then we need to handle + * write fault pages even if the feature was disabled later. + */ + if (kctx->gwt_was_enabled) { + kbase_gpu_mmu_handle_permission_fault(kctx, + faulting_as); + goto fault_done; + } +#endif + + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Translation table bus fault", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: + /* nothing to do, but we don't expect this fault currently */ + dev_warn(kbdev->dev, "Access flag unexpectedly set"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault", fault); + else + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code", fault); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault", fault); + else + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code", fault); + goto fault_done; + + default: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code", fault); + goto fault_done; + } + +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs if necessary */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Failed pre-allocating memory for sub-allocations' metadata", + fault); + goto fault_done; + } + } +#endif /* CONFIG_MALI_2MB_ALLOC */ + +page_fault_retry: + /* so we have a translation fault, + * let's see if it is for growable memory + */ + kbase_gpu_vm_lock(kctx); + + region = kbase_region_tracker_find_region_enclosing_address(kctx, + fault->addr); + if (kbase_is_region_invalid_or_free(region)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not mapped on the GPU", fault); + goto fault_done; + } + + if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "DMA-BUF is not mapped on the GPU", fault); + goto fault_done; + } + + if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Bad physical memory group ID", fault); + goto fault_done; + } + + if ((region->flags & GROWABLE_FLAGS_REQUIRED) + != GROWABLE_FLAGS_REQUIRED) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not growable", fault); + goto fault_done; + } + + if ((region->flags & KBASE_REG_DONT_NEED)) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Don't need memory can't be grown", fault); + goto fault_done; + } + + /* find the size we need to grow it by + * we know the result fit in a size_t due to + * kbase_region_tracker_find_region_enclosing_address + * validating the fault_address to be within a size_t from the start_pfn + */ + fault_rel_pfn = fault_pfn - region->start_pfn; + + current_backed_size = kbase_reg_current_backed_size(region); + + if (fault_rel_pfn < current_backed_size) { + dev_dbg(kbdev->dev, + "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", + fault->addr, region->start_pfn, + region->start_pfn + + current_backed_size); + + mutex_lock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + /* [1] in case another page fault occurred while we were + * handling the (duplicate) page fault we need to ensure we + * don't loose the other page fault as result of us clearing + * the MMU IRQ. Therefore, after we clear the MMU IRQ we send + * an UNLOCK command that will retry any stalled memory + * transaction (which should cause the other page fault to be + * raised again). + */ + kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, + AS_COMMAND_UNLOCK, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + + goto fault_done; + } + + new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn); + + /* cap to max vsize */ + new_pages = min(new_pages, region->nr_pages - current_backed_size); + dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n", + new_pages); + + if (new_pages == 0) { + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Duplicate of a fault we've already handled, nothing to do */ + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + /* See comment [1] about UNLOCK usage */ + kbase_mmu_hw_do_operation(kbdev, faulting_as, 0, 0, + AS_COMMAND_UNLOCK, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + goto fault_done; + } + + pages_to_grow = 0; + + spin_lock(&kctx->mem_partials_lock); + grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, + &grow_2mb_pool, prealloc_sas); + spin_unlock(&kctx->mem_partials_lock); + + if (grown) { + u64 pfn_offset; + u32 op; + + /* alloc success */ + WARN_ON(kbase_reg_current_backed_size(region) > + region->nr_pages); + + /* set up the new pages */ + pfn_offset = kbase_reg_current_backed_size(region) - new_pages; + /* + * Note: + * Issuing an MMU operation will unlock the MMU and cause the + * translation to be replayed. If the page insertion fails then + * rather then trying to continue the context should be killed + * so the no_flush version of insert_pages is used which allows + * us to unlock the MMU as we see fit. + */ + err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu, + region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + new_pages, region->flags, region->gpu_alloc->group_id); + if (err) { + kbase_free_phy_pages_helper(region->gpu_alloc, + new_pages); + if (region->gpu_alloc != region->cpu_alloc) + kbase_free_phy_pages_helper(region->cpu_alloc, + new_pages); + kbase_gpu_vm_unlock(kctx); + /* The locked VA region will be unlocked and the cache + * invalidated in here + */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page table update failure", fault); + goto fault_done; + } + KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, + (u64)new_pages); + trace_mali_mmu_page_fault_grow(region, fault, new_pages); + +#if MALI_INCREMENTAL_RENDERING + /* Switch to incremental rendering if we have nearly run out of + * memory in a JIT memory allocation. + */ + if (region->threshold_pages && + kbase_reg_current_backed_size(region) > + region->threshold_pages) { + + dev_dbg(kctx->kbdev->dev, + "%zu pages exceeded IR threshold %zu\n", + new_pages + current_backed_size, + region->threshold_pages); + + if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { + dev_dbg(kctx->kbdev->dev, + "Get region %p for IR\n", + (void *)region); + kbase_va_region_alloc_get(kctx, region); + } + } +#endif + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* flush L2 and unlock the VA (resumes the MMU) */ + op = AS_COMMAND_FLUSH_PT; + + /* clear MMU interrupt - this needs to be done after updating + * the page tables but before issuing a FLUSH command. The + * FLUSH cmd has a side effect that it restarts stalled memory + * transactions in other address spaces which may cause + * another fault to occur. If we didn't clear the interrupt at + * this stage a new IRQ might not be raised when the GPU finds + * a MMU IRQ is already pending. + */ + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + + kbase_mmu_hw_do_operation(kbdev, faulting_as, + fault->addr >> PAGE_SHIFT, + new_pages, op, 1); + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + /* reenable this in the mask */ + kbase_mmu_hw_enable_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_PAGE); + +#ifdef CONFIG_MALI_CINSTR_GWT + if (kctx->gwt_enabled) { + /* GWT also tracks growable regions. */ + struct kbasep_gwt_list_element *pos; + + pos = kmalloc(sizeof(*pos), GFP_KERNEL); + if (pos) { + pos->region = region; + pos->page_addr = (region->start_pfn + + pfn_offset) << + PAGE_SHIFT; + pos->num_pages = new_pages; + list_add(&pos->link, + &kctx->gwt_current_list); + } else { + dev_warn(kbdev->dev, "kmalloc failure"); + } + } +#endif + kbase_gpu_vm_unlock(kctx); + } else { + int ret = -ENOMEM; + + kbase_gpu_vm_unlock(kctx); + + /* If the memory pool was insufficient then grow it and retry. + * Otherwise fail the allocation. + */ + if (pages_to_grow > 0) { +#ifdef CONFIG_MALI_2MB_ALLOC + if (grow_2mb_pool) { + /* Round page requirement up to nearest 2 MB */ + struct kbase_mem_pool *const lp_mem_pool = + &kctx->mem_pools.large[ + region->gpu_alloc->group_id]; + + pages_to_grow = (pages_to_grow + + ((1 << lp_mem_pool->order) - 1)) + >> lp_mem_pool->order; + + ret = kbase_mem_pool_grow(lp_mem_pool, + pages_to_grow); + } else { +#endif + struct kbase_mem_pool *const mem_pool = + &kctx->mem_pools.small[ + region->gpu_alloc->group_id]; + + ret = kbase_mem_pool_grow(mem_pool, + pages_to_grow); +#ifdef CONFIG_MALI_2MB_ALLOC + } +#endif + } + if (ret < 0) { + /* failed to extend, handle as a normal PF */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page allocation failure", fault); + } else { + dev_dbg(kbdev->dev, "Try again after pool_grow\n"); + goto page_fault_retry; + } + } + +fault_done: + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); + + /* + * By this point, the fault was handled in some way, + * so release the ctx refcount + */ + release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); +} + +static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut) +{ + u64 *page; + int i; + struct page *p; + + p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); + if (!p) + return 0; + + page = kmap(p); + if (page == NULL) + goto alloc_free; + + /* If the MMU tables belong to a context then account the memory usage + * to that context, otherwise the MMU tables are device wide and are + * only accounted to the device. + */ + if (mmut->kctx) { + int new_page_count; + + new_page_count = atomic_add_return(1, + &mmut->kctx->used_pages); + KBASE_TLSTREAM_AUX_PAGESALLOC( + kbdev, + mmut->kctx->id, + (u64)new_page_count); + kbase_process_page_usage_inc(mmut->kctx, 1); + } + + atomic_add(1, &kbdev->memdev.used_pages); + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) + kbdev->mmu_mode->entry_invalidate(&page[i]); + + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); + + kunmap(p); + return page_to_phys(p); + +alloc_free: + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, + false); + + return 0; +} + +/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the + * new table from the pool if needed and possible + */ +static int mmu_get_next_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + phys_addr_t *pgd, u64 vpfn, int level) +{ + u64 *page; + phys_addr_t target_pgd; + struct page *p; + + KBASE_DEBUG_ASSERT(*pgd); + + lockdep_assert_held(&mmut->mmu_lock); + + /* + * Architecture spec defines level-0 as being the top-most. + * This is a bit unfortunate here, but we keep the same convention. + */ + vpfn >>= (3 - level) * 9; + vpfn &= 0x1FF; + + p = pfn_to_page(PFN_DOWN(*pgd)); + page = kmap(p); + if (page == NULL) { + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + return -EINVAL; + } + + target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + + if (!target_pgd) { + target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); + if (!target_pgd) { + dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n", + __func__); + kunmap(p); + return -ENOMEM; + } + + kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); + + kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE); + /* Rely on the caller to update the address space flags. */ + } + + kunmap(p); + *pgd = target_pgd; + + return 0; +} + +/* + * Returns the PGD for the specified level of translation + */ +static int mmu_get_pgd_at_level(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 vpfn, + int level, + phys_addr_t *out_pgd) +{ + phys_addr_t pgd; + int l; + + lockdep_assert_held(&mmut->mmu_lock); + pgd = mmut->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { + int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + /* Handle failure condition */ + if (err) { + dev_dbg(kbdev->dev, + "%s: mmu_get_next_pgd failure at level %d\n", + __func__, l); + return err; + } + } + + *out_pgd = pgd; + + return 0; +} + +static int mmu_get_bottom_pgd(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 vpfn, + phys_addr_t *out_pgd) +{ + return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, + out_pgd); +} + +static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + u64 from_vpfn, u64 to_vpfn) +{ + phys_addr_t pgd; + u64 vpfn = from_vpfn; + struct kbase_mmu_mode const *mmu_mode; + + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn); + + lockdep_assert_held(&mmut->mmu_lock); + + mmu_mode = kbdev->mmu_mode; + + while (vpfn < to_vpfn) { + unsigned int i; + unsigned int idx = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx; + unsigned int pcount = 0; + unsigned int left = to_vpfn - vpfn; + int level; + u64 *page; + + if (count > left) + count = left; + + /* need to check if this is a 2MB page or a 4kB */ + pgd = mmut->pgd; + + for (level = MIDGARD_MMU_TOPLEVEL; + level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { + idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; + page = kmap(phys_to_page(pgd)); + if (mmu_mode->ate_is_valid(page[idx], level)) + break; /* keep the mapping */ + kunmap(phys_to_page(pgd)); + pgd = mmu_mode->pte_to_phy_addr(page[idx]); + } + + switch (level) { + case MIDGARD_MMU_LEVEL(2): + /* remap to single entry to update */ + pcount = 1; + break; + case MIDGARD_MMU_BOTTOMLEVEL: + /* page count is the same as the logical count */ + pcount = count; + break; + default: + dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n", + __func__, level); + goto next; + } + + /* Invalidate the entries we added */ + for (i = 0; i < pcount; i++) + mmu_mode->entry_invalidate(&page[idx + i]); + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(phys_to_page(pgd)) + 8 * idx, + 8 * pcount); + kunmap(phys_to_page(pgd)); + +next: + vpfn += count; + } +} + +/* + * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, + unsigned long flags, int const group_id) +{ + phys_addr_t pgd; + u64 *pgd_page; + /* In case the insert_single_page only partially completes + * we need to be able to recover + */ + bool recover_required = false; + u64 start_vpfn = vpfn; + size_t recover_count = 0; + size_t remain = nr; + int err; + struct kbase_device *kbdev; + + if (WARN_ON(kctx == NULL)) + return -EINVAL; + + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + kbdev = kctx->kbdev; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&kctx->mmu.mmu_lock); + + while (remain) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > remain) + count = remain; + + /* + * Repeatedly calling mmu_get_bottom_pte() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + do { + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, + vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[ + kctx->mmu.group_id], + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu.mmu_lock); + } while (!err); + if (err) { + dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + &kctx->mmu, + start_vpfn, + start_vpfn + recover_count); + } + goto fail_unlock; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + &kctx->mmu, + start_vpfn, + start_vpfn + recover_count); + } + err = -ENOMEM; + goto fail_unlock; + } + + for (i = 0; i < count; i++) { + unsigned int ofs = index + i; + + /* Fail if the current page is a valid ATE entry */ + KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + + pgd_page[ofs] = kbase_mmu_create_ate(kbdev, + phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id); + } + + vpfn += count; + remain -= count; + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + /* We have started modifying the page table. + * If further pages need inserting and fail we need to undo what + * has already taken place + */ + recover_required = true; + recover_count += count; + } + mutex_unlock(&kctx->mmu.mmu_lock); + kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu.mmu_lock); + kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false); + return err; +} + +static inline void cleanup_empty_pte(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 *pte) +{ + phys_addr_t tmp_pgd; + struct page *tmp_p; + + tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); + tmp_p = phys_to_page(tmp_pgd); + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], + tmp_p, false); + + /* If the MMU tables belong to a context then we accounted the memory + * usage to that context, so decrement here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } + atomic_sub(1, &kbdev->memdev.used_pages); +} + +u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, + struct tagged_addr const phy, unsigned long const flags, + int const level, int const group_id) +{ + u64 entry; + + kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level); + return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev, + group_id, level, entry); +} + +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + const u64 start_vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, + int const group_id) +{ + phys_addr_t pgd; + u64 *pgd_page; + u64 insert_vpfn = start_vpfn; + size_t remain = nr; + int err; + struct kbase_mmu_mode const *mmu_mode; + + /* Note that 0 is a valid start_vpfn */ + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); + + mmu_mode = kbdev->mmu_mode; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&mmut->mmu_lock); + + while (remain) { + unsigned int i; + unsigned int vindex = insert_vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; + struct page *p; + int cur_level; + + if (count > remain) + count = remain; + + if (!vindex && is_huge_head(*phys)) + cur_level = MIDGARD_MMU_LEVEL(2); + else + cur_level = MIDGARD_MMU_BOTTOMLEVEL; + + /* + * Repeatedly calling mmu_get_pgd_at_level() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + do { + err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, + cur_level, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&mmut->mmu_lock); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[mmut->group_id], + cur_level); + mutex_lock(&mmut->mmu_lock); + } while (!err); + + if (err) { + dev_warn(kbdev->dev, + "%s: mmu_get_bottom_pgd failure\n", __func__); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + mmut, start_vpfn, insert_vpfn); + } + goto fail_unlock; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure\n", + __func__); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially + * completed + */ + mmu_insert_pages_failure_recovery(kbdev, + mmut, start_vpfn, insert_vpfn); + } + err = -ENOMEM; + goto fail_unlock; + } + + if (cur_level == MIDGARD_MMU_LEVEL(2)) { + int level_index = (insert_vpfn >> 9) & 0x1FF; + u64 *target = &pgd_page[level_index]; + + if (mmu_mode->pte_is_valid(*target, cur_level)) + cleanup_empty_pte(kbdev, mmut, target); + *target = kbase_mmu_create_ate(kbdev, *phys, flags, + cur_level, group_id); + } else { + for (i = 0; i < count; i++) { + unsigned int ofs = vindex + i; + u64 *target = &pgd_page[ofs]; + + /* Warn if the current page is a valid ATE + * entry. The page table shouldn't have anything + * in the place where we are trying to put a + * new entry. Modification to page table entries + * should be performed with + * kbase_mmu_update_pages() + */ + WARN_ONCE((*target & 1UL) != 0, + "vindex=0x%x, count=%d, pgd=0x%pa, " + "insert_vpfn=0x%llx, start_vpfn=0x%llx, " + "nr=%d, remain=%d, group_id=%d, " + "is_huge_head(*phys)=%d\n", + vindex, count, &pgd, + insert_vpfn, start_vpfn, + (u32)nr, (u32)remain, group_id, + is_huge_head(*phys)); + + *target = kbase_mmu_create_ate(kbdev, + phys[i], flags, cur_level, group_id); + } + } + + phys += count; + insert_vpfn += count; + remain -= count; + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (vindex * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + } + + err = 0; + +fail_unlock: + mutex_unlock(&mmut->mmu_lock); + return err; +} + +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space + * number 'as_nr'. + */ +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id) +{ + int err; + + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, + phys, nr, flags, group_id); + + if (mmut->kctx) + kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false); + else + kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, + as_nr); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); + +/** + * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches + * without retaining the kbase context. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * @sync: Set if the operation should be synchronous or not. + * + * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any + * other locking. + */ +static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) +{ + struct kbase_device *kbdev = kctx->kbdev; + int err; + u32 op; + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + vpfn, nr, op, 0); + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to recover + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } +} + +/* Perform a flush/invalidate on a particular address space + */ +static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, + struct kbase_as *as, + u64 vpfn, size_t nr, bool sync) +{ + int err; + u32 op; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* GPU is off so there's no need to perform flush/invalidate */ + return; + } + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + if (sync) + op = AS_COMMAND_FLUSH_MEM; + else + op = AS_COMMAND_FLUSH_PT; + + err = kbase_mmu_hw_do_operation(kbdev, + as, vpfn, nr, op, 0); + + if (err) { + /* Flush failed to complete, assume the GPU has hung and + * perform a reset to recover + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + kbase_pm_context_idle(kbdev); +} + +static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, + u64 vpfn, size_t nr, bool sync, int as_nr) +{ + /* Skip if there is nothing to do */ + if (nr) { + kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn, + nr, sync); + } +} + +static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, + u64 vpfn, size_t nr, bool sync) +{ + struct kbase_device *kbdev; + bool ctx_is_in_runpool; + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + kbdev = kctx->kbdev; + mutex_lock(&kbdev->js_data.queue_mutex); + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); + mutex_unlock(&kbdev->js_data.queue_mutex); + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], + vpfn, nr, sync); + + release_ctx(kbdev, kctx); + } +} + +void kbase_mmu_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + int as_nr) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID); + + kbdev->mmu_mode->update(kbdev, mmut, as_nr); +} +KBASE_EXPORT_TEST_API(kbase_mmu_update); + +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + +void kbase_mmu_disable(struct kbase_context *kctx) +{ + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the hwaccess_lock + */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + /* + * The address space is being disabled, drain all knowledge of it out + * from the caches as pages and page tables might be freed after this. + * + * The job scheduler code will already be holding the locks and context + * so just do the flush. + */ + kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); + + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} +KBASE_EXPORT_TEST_API(kbase_mmu_disable); + +/* + * We actually only discard the ATE, and not the page table + * pages. There is a potential DoS here, as we'll leak memory by + * having PTEs that are potentially unused. Will require physical + * page accounting, so MMU pages are part of the process allocation. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr) +{ + phys_addr_t pgd; + u64 start_vpfn = vpfn; + size_t requested_nr = nr; + struct kbase_mmu_mode const *mmu_mode; + int err = -EFAULT; + + if (nr == 0) { + /* early out if nothing to do */ + return 0; + } + + mutex_lock(&mmut->mmu_lock); + + mmu_mode = kbdev->mmu_mode; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + unsigned int pcount; + int level; + u64 *page; + + if (count > nr) + count = nr; + + /* need to check if this is a 2MB or a 4kB page */ + pgd = mmut->pgd; + + for (level = MIDGARD_MMU_TOPLEVEL; + level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { + phys_addr_t next_pgd; + + index = (vpfn >> ((3 - level) * 9)) & 0x1FF; + page = kmap(phys_to_page(pgd)); + if (mmu_mode->ate_is_valid(page[index], level)) + break; /* keep the mapping */ + else if (!mmu_mode->pte_is_valid(page[index], level)) { + /* nothing here, advance */ + switch (level) { + case MIDGARD_MMU_LEVEL(0): + count = 134217728; + break; + case MIDGARD_MMU_LEVEL(1): + count = 262144; + break; + case MIDGARD_MMU_LEVEL(2): + count = 512; + break; + case MIDGARD_MMU_LEVEL(3): + count = 1; + break; + } + if (count > nr) + count = nr; + goto next; + } + next_pgd = mmu_mode->pte_to_phy_addr(page[index]); + kunmap(phys_to_page(pgd)); + pgd = next_pgd; + } + + switch (level) { + case MIDGARD_MMU_LEVEL(0): + case MIDGARD_MMU_LEVEL(1): + dev_warn(kbdev->dev, + "%s: No support for ATEs at level %d\n", + __func__, level); + kunmap(phys_to_page(pgd)); + goto out; + case MIDGARD_MMU_LEVEL(2): + /* can only teardown if count >= 512 */ + if (count >= 512) { + pcount = 1; + } else { + dev_warn(kbdev->dev, + "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n", + __func__, count); + pcount = 0; + } + break; + case MIDGARD_MMU_BOTTOMLEVEL: + /* page count is the same as the logical count */ + pcount = count; + break; + default: + dev_err(kbdev->dev, + "%s: found non-mapped memory, early out\n", + __func__); + vpfn += count; + nr -= count; + continue; + } + + /* Invalidate the entries we added */ + for (i = 0; i < pcount; i++) + mmu_mode->entry_invalidate(&page[index + i]); + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(phys_to_page(pgd)) + + 8 * index, 8*pcount); + +next: + kunmap(phys_to_page(pgd)); + vpfn += count; + nr -= count; + } + err = 0; +out: + mutex_unlock(&mmut->mmu_lock); + + if (mmut->kctx) + kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr, + true); + else + kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn, requested_nr, + true, as_nr); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + +/** + * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU + * + * This will update page table entries that already exist on the GPU based on + * the new flags that are passed. It is used as a response to the changes of + * the memory attributes + * + * The caller is responsible for validating the memory attributes + * + * @kctx: Kbase context + * @vpfn: Virtual PFN (Page Frame Number) of the first page to update + * @phys: Tagged physical addresses of the physical pages to replace the + * current mappings + * @nr: Number of pages to update + * @flags: Flags + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + */ +static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id) +{ + phys_addr_t pgd; + u64 *pgd_page; + int err; + struct kbase_device *kbdev; + + if (WARN_ON(kctx == NULL)) + return -EINVAL; + + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + mutex_lock(&kctx->mmu.mmu_lock); + + kbdev = kctx->kbdev; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + size_t count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + do { + err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, + vpfn, &pgd); + if (err != -ENOMEM) + break; + /* Fill the memory pool with enough pages for + * the page walk to succeed + */ + mutex_unlock(&kctx->mmu.mmu_lock); + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[ + kctx->mmu.group_id], + MIDGARD_MMU_BOTTOMLEVEL); + mutex_lock(&kctx->mmu.mmu_lock); + } while (!err); + if (err) { + dev_warn(kbdev->dev, + "mmu_get_bottom_pgd failure\n"); + goto fail_unlock; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "kmap failure\n"); + err = -ENOMEM; + goto fail_unlock; + } + + for (i = 0; i < count; i++) + pgd_page[index + i] = kbase_mmu_create_ate(kbdev, + phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL, + group_id); + + phys += count; + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kbdev, + kbase_dma_addr(p) + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + } + + mutex_unlock(&kctx->mmu.mmu_lock); + return 0; + +fail_unlock: + mutex_unlock(&kctx->mmu.mmu_lock); + return err; +} + +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id) +{ + int err; + + err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags, + group_id); + kbase_mmu_flush_invalidate(kctx, vpfn, nr, true); + return err; +} + +static void mmu_teardown_level(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, phys_addr_t pgd, + int level, u64 *pgd_page_buffer) +{ + phys_addr_t target_pgd; + struct page *p; + u64 *pgd_page; + int i; + struct kbase_mmu_mode const *mmu_mode; + + lockdep_assert_held(&mmut->mmu_lock); + + pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail. */ + if (WARN_ON(pgd_page == NULL)) + return; + /* Copy the page to our preallocated buffer so that we can minimize + * kmap_atomic usage + */ + memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + kunmap_atomic(pgd_page); + pgd_page = pgd_page_buffer; + + mmu_mode = kbdev->mmu_mode; + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); + + if (target_pgd) { + if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + mmu_teardown_level(kbdev, mmut, + target_pgd, + level + 1, + pgd_page_buffer + + (PAGE_SIZE / sizeof(u64))); + } + } + } + + p = pfn_to_page(PFN_DOWN(pgd)); + + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], + p, true); + + atomic_sub(1, &kbdev->memdev.used_pages); + + /* If MMU tables belong to a context then pages will have been accounted + * against it, so we must decrement the usage counts here. + */ + if (mmut->kctx) { + kbase_process_page_usage_dec(mmut->kctx, 1); + atomic_sub(1, &mmut->kctx->used_pages); + } +} + +int kbase_mmu_init(struct kbase_device *const kbdev, + struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, + int const group_id) +{ + if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || + WARN_ON(group_id < 0)) + return -EINVAL; + + mmut->group_id = group_id; + mutex_init(&mmut->mmu_lock); + mmut->kctx = kctx; + + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ + mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + + if (mmut->mmu_teardown_pages == NULL) + return -ENOMEM; + + mmut->pgd = 0; + /* We allocate pages into the kbdev memory pool, then + * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to + * avoid allocations from the kernel happening with the lock held. + */ + while (!mmut->pgd) { + int err; + + err = kbase_mem_pool_grow( + &kbdev->mem_pools.small[mmut->group_id], + MIDGARD_MMU_BOTTOMLEVEL); + if (err) { + kbase_mmu_term(kbdev, mmut); + return -ENOMEM; + } + + mutex_lock(&mmut->mmu_lock); + mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); + } + + return 0; +} + +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) +{ + if (mmut->pgd) { + mutex_lock(&mmut->mmu_lock); + mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL, + mmut->mmu_teardown_pages); + mutex_unlock(&mmut->mmu_lock); + + if (mmut->kctx) + KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); + } + + kfree(mmut->mmu_teardown_pages); + mutex_destroy(&mmut->mmu_lock); +} + +static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, + int level, char ** const buffer, size_t *size_left) +{ + phys_addr_t target_pgd; + u64 *pgd_page; + int i; + size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); + size_t dump_size; + struct kbase_device *kbdev; + struct kbase_mmu_mode const *mmu_mode; + + if (WARN_ON(kctx == NULL)) + return 0; + lockdep_assert_held(&kctx->mmu.mmu_lock); + + kbdev = kctx->kbdev; + mmu_mode = kbdev->mmu_mode; + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kbdev->dev, "%s: kmap failure\n", __func__); + return 0; + } + + if (*size_left >= size) { + /* A modified physical address that contains + * the page table level + */ + u64 m_pgd = pgd | level; + + /* Put the modified physical address in the output buffer */ + memcpy(*buffer, &m_pgd, sizeof(m_pgd)); + *buffer += sizeof(m_pgd); + + /* Followed by the page table itself */ + memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); + *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; + + *size_left -= size; + } + + if (level < MIDGARD_MMU_BOTTOMLEVEL) { + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if (mmu_mode->pte_is_valid(pgd_page[i], level)) { + target_pgd = mmu_mode->pte_to_phy_addr( + pgd_page[i]); + + dump_size = kbasep_mmu_dump_level(kctx, + target_pgd, level + 1, + buffer, size_left); + if (!dump_size) { + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return 0; + } + size += dump_size; + } + } + } + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + + return size; +} + +void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) +{ + void *kaddr; + size_t size_left; + + KBASE_DEBUG_ASSERT(kctx); + + if (nr_pages == 0) { + /* can't dump in a 0 sized buffer, early out */ + return NULL; + } + + size_left = nr_pages * PAGE_SIZE; + + if (WARN_ON(size_left == 0)) + return NULL; + kaddr = vmalloc_user(size_left); + + mutex_lock(&kctx->mmu.mmu_lock); + + if (kaddr) { + u64 end_marker = 0xFFULL; + char *buffer; + char *mmu_dump_buffer; + u64 config[3]; + size_t dump_size, size = 0; + struct kbase_mmu_setup as_setup; + + buffer = (char *)kaddr; + mmu_dump_buffer = buffer; + + kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu, + &as_setup); + config[0] = as_setup.transtab; + config[1] = as_setup.memattr; + config[2] = as_setup.transcfg; + memcpy(buffer, &config, sizeof(config)); + mmu_dump_buffer += sizeof(config); + size_left -= sizeof(config); + size += sizeof(config); + + dump_size = kbasep_mmu_dump_level(kctx, + kctx->mmu.pgd, + MIDGARD_MMU_TOPLEVEL, + &mmu_dump_buffer, + &size_left); + + if (!dump_size) + goto fail_free; + + size += dump_size; + + /* Add on the size for the end marker */ + size += sizeof(u64); + + if (size > (nr_pages * PAGE_SIZE)) { + /* The buffer isn't big enough - free the memory and + * return failure + */ + goto fail_free; + } + + /* Add the end marker */ + memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); + } + + mutex_unlock(&kctx->mmu.mmu_lock); + return kaddr; + +fail_free: + vfree(kaddr); + mutex_unlock(&kctx->mmu.mmu_lock); + return NULL; +} +KBASE_EXPORT_TEST_API(kbase_mmu_dump); + +void bus_fault_worker(struct work_struct *data) +{ + struct kbase_as *faulting_as; + int as_no; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_fault *fault; + + faulting_as = container_of(data, struct kbase_as, work_busfault); + fault = &faulting_as->bf_data; + + /* Ensure that any pending page fault worker has completed */ + flush_work(&faulting_as->work_pagefault); + + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + + /* Grab the context, already refcounted in kbase_mmu_interrupt() on + * flagging of the bus-fault. Therefore, it cannot be scheduled out of + * this AS until we explicitly release it + */ + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no); + if (!kctx) { + atomic_dec(&kbdev->faults_pending); + return; + } + + if (unlikely(fault->protected_mode)) { + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure", fault); + kbase_mmu_hw_clear_fault(kbdev, faulting_as, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); + release_ctx(kbdev, kctx); + atomic_dec(&kbdev->faults_pending); + return; + + } + + /* NOTE: If GPU already powered off for suspend, + * we don't need to switch to unmapped + */ + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault); + kbase_pm_context_idle(kbdev); + } + + release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); +} + +void kbase_flush_mmu_wqs(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbase_as *as = &kbdev->as[i]; + + flush_workqueue(as->pf_wq); + } +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h new file mode 100644 index 0000000..c9e27b1 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h @@ -0,0 +1,118 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MMU_H_ +#define _KBASE_MMU_H_ + +/** + * kbase_mmu_init - Initialise an object representing GPU page tables + * + * The structure should be terminated using kbase_mmu_term() + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables to be initialized. + * @kctx: Optional kbase context, may be NULL if this set of MMU tables + * is not associated with a context. + * @group_id: The physical group ID from which to allocate GPU page tables. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * Return: 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + struct kbase_context *kctx, int group_id); + +/** + * kbase_mmu_interrupt - Process an MMU interrupt. + * + * Process the MMU interrupt that was reported by the &kbase_device. + * + * @kbdev: Pointer to the kbase device for which the interrupt happened. + * @irq_stat: Value of the MMU_IRQ_STATUS register. + */ +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** + * kbase_mmu_term - Terminate an object representing GPU page tables + * + * This will free any page tables that have been allocated + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables to be destroyed. + */ +void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut); + +/** + * kbase_mmu_create_ate - Create an address translation entry + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @phy: Physical address of the page to be mapped for GPU access. + * @flags: Bitmask of attributes of the GPU memory region being mapped. + * @level: Page table level for which to build an address translation entry. + * @group_id: The physical memory group in which the page was allocated. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * + * This function creates an address translation entry to encode the physical + * address of a page to be mapped for access by the GPU, along with any extra + * attributes required for the GPU memory region. + * + * Return: An address translation entry, either in LPAE or AArch64 format + * (depending on the driver's configuration). + */ +u64 kbase_mmu_create_ate(struct kbase_device *kbdev, + struct tagged_addr phy, unsigned long flags, int level, int group_id); + +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, + const u64 start_vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int group_id); +int kbase_mmu_insert_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id); +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, + unsigned long flags, int group_id); + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + size_t nr, int as_nr); +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id); + +/** + * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt. + * + * Process the bus fault interrupt that was reported for a particular GPU + * address space. + * + * @kbdev: Pointer to the kbase device for which bus fault was reported. + * @status: Value of the GPU_FAULTSTATUS register. + * @as_nr: GPU address space for which the bus fault occurred. + * + * Return: zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, + u32 as_nr); + +#endif /* _KBASE_MMU_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h new file mode 100644 index 0000000..e6eef86 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h @@ -0,0 +1,107 @@ +/* + * + * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * DOC: Interface file for accessing MMU hardware functionality + * + * This module provides an abstraction for accessing the functionality provided + * by the midgard MMU and thus allows all MMU HW access to be contained within + * one common place and allows for different backends (implementations) to + * be provided. + */ + +#ifndef _KBASE_MMU_HW_H_ +#define _KBASE_MMU_HW_H_ + +/* Forward declarations */ +struct kbase_device; +struct kbase_as; +struct kbase_context; + +/** + * enum kbase_mmu_fault_type - MMU fault type descriptor. + */ +enum kbase_mmu_fault_type { + KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, + KBASE_MMU_FAULT_TYPE_PAGE, + KBASE_MMU_FAULT_TYPE_BUS, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED, + KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED +}; + +/** + * kbase_mmu_hw_configure - Configure an address space for use. + * @kbdev: kbase device to configure. + * @as: address space to configure. + * + * Configure the MMU using the address space details setup in the + * kbase_context structure. + */ +void kbase_mmu_hw_configure(struct kbase_device *kbdev, + struct kbase_as *as); + +/** + * kbase_mmu_hw_do_operation - Issue an operation to the MMU. + * @kbdev: kbase device to issue the MMU operation on. + * @as: address space to issue the MMU operation on. + * @vpfn: MMU Virtual Page Frame Number to start the operation on. + * @nr: Number of pages to work on. + * @type: Operation type (written to ASn_COMMAND). + * @handling_irq: Is this operation being called during the handling + * of an interrupt? + * + * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that + * is associated with the provided kbase_context over the specified range + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + u64 vpfn, u32 nr, u32 type, + unsigned int handling_irq); + +/** + * kbase_mmu_hw_clear_fault - Clear a fault that has been previously reported by + * the MMU. + * @kbdev: kbase device to clear the fault from. + * @as: address space to clear the fault from. + * @type: The type of fault that needs to be cleared. + * + * Clear a bus error or page fault that has been reported by the MMU. + */ +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type); + +/** + * kbase_mmu_hw_enable_fault - Enable fault that has been previously reported by + * the MMU. + * @kbdev: kbase device to again enable the fault from. + * @as: address space to again enable the fault from. + * @type: The type of fault that needs to be enabled again. + * + * After a page fault or bus error has been reported by the MMU these + * will be disabled. After these are handled this function needs to be + * called to enable the page fault or bus error fault again. + */ +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type); + +#endif /* _KBASE_MMU_HW_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c new file mode 100755 index 0000000..f22e73e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c @@ -0,0 +1,272 @@ +/* + * + * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * lock_region() - Generate lockaddr to lock memory region in MMU + * @pfn: Starting page frame number of the region to lock + * @num_pages: Number of pages to lock. It must be greater than 0. + * @lockaddr: Address and size of memory region to lock + * + * The lockaddr value is a combination of the starting address and + * the size of the region that encompasses all the memory pages to lock. + * + * The size is expressed as a logarithm: it is represented in a way + * that is compatible with the HW specification and it also determines + * how many of the lowest bits of the address are cleared. + * + * Return: 0 if success, or an error code on failure. + */ +static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr) +{ + const u64 lockaddr_base = pfn << PAGE_SHIFT; + u64 lockaddr_size_log2, region_frame_number_start, + region_frame_number_end; + + if (num_pages == 0) + return -EINVAL; + + /* The size is expressed as a logarithm and should take into account + * the possibility that some pages might spill into the next region. + */ + lockaddr_size_log2 = fls(num_pages) + PAGE_SHIFT - 1; + + /* Round up if the number of pages is not a power of 2. */ + if (num_pages != ((u32)1 << (lockaddr_size_log2 - PAGE_SHIFT))) + lockaddr_size_log2 += 1; + + /* Round up if some memory pages spill into the next region. */ + region_frame_number_start = pfn >> (lockaddr_size_log2 - PAGE_SHIFT); + region_frame_number_end = + (pfn + num_pages - 1) >> (lockaddr_size_log2 - PAGE_SHIFT); + + if (region_frame_number_start < region_frame_number_end) + lockaddr_size_log2 += 1; + + /* Represent the size according to the HW specification. */ + lockaddr_size_log2 = MAX(lockaddr_size_log2, + KBASE_LOCK_REGION_MIN_SIZE_LOG2); + + if (lockaddr_size_log2 > KBASE_LOCK_REGION_MAX_SIZE_LOG2) + return -EINVAL; + + /* The lowest bits are cleared and then set to size - 1 to represent + * the size in a way that is compatible with the HW specification. + */ + *lockaddr = lockaddr_base & ~((1ull << lockaddr_size_log2) - 1); + *lockaddr |= lockaddr_size_log2 - 1; + + return 0; +} + +static int wait_ready(struct kbase_device *kbdev, + unsigned int as_nr) +{ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + u32 val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + + /* Wait for the MMU status to indicate there is no active command, in + * case one is pending. Do not log remaining register accesses. + */ + while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) + val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + + if (max_loops == 0) { + dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); + return -1; + } + + /* If waiting in loop was performed, log last read value. */ + if (KBASE_AS_INACTIVE_MAX_LOOPS - 1 > max_loops) + kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)); + + return 0; +} + +static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) +{ + int status; + + /* write AS_COMMAND when MMU is ready to accept another command */ + status = wait_ready(kbdev, as_nr); + if (status == 0) + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); + + return status; +} + +void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) +{ + struct kbase_mmu_setup *current_setup = &as->current_setup; + u64 transcfg = 0; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { + transcfg = current_setup->transcfg; + + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK + * Clear PTW_MEMATTR bits + */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; + + if (kbdev->system_coherency == COHERENCY_ACE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) + * Clear PTW_SH bits + */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (transcfg >> 32) & 0xFFFFFFFFUL); + } else { + if (kbdev->system_coherency == COHERENCY_ACE) + current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + } + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), + current_setup->transtab & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), + (current_setup->transtab >> 32) & 0xFFFFFFFFUL); + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), + current_setup->memattr & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), + (current_setup->memattr >> 32) & 0xFFFFFFFFUL); + + KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, + current_setup->transtab, + current_setup->memattr, + transcfg); + + write_cmd(kbdev, as->number, AS_COMMAND_UPDATE); +} + +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + u64 vpfn, u32 nr, u32 op, + unsigned int handling_irq) +{ + int ret; + + lockdep_assert_held(&kbdev->mmu_hw_mutex); + + if (op == AS_COMMAND_UNLOCK) { + /* Unlock doesn't require a lock first */ + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); + } else { + u64 lock_addr; + + ret = lock_region(vpfn, nr, &lock_addr); + + if (!ret) { + /* Lock the region that needs to be updated */ + kbase_reg_write(kbdev, + MMU_AS_REG(as->number, AS_LOCKADDR_LO), + lock_addr & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, + MMU_AS_REG(as->number, AS_LOCKADDR_HI), + (lock_addr >> 32) & 0xFFFFFFFFUL); + write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + + /* Run the MMU operation */ + write_cmd(kbdev, as->number, op); + + /* Wait for the flush to complete */ + ret = wait_ready(kbdev, as->number); + } + } + + return ret; +} + +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 pf_bf_mask; + + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + + /* Clear the page (and bus fault IRQ as well in case one occurred) */ + pf_bf_mask = MMU_PAGE_FAULT(as->number); + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + pf_bf_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 irq_mask; + + /* Enable the page fault IRQ + * (and bus fault IRQ as well in case one occurred) + */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + /* + * A reset is in-flight and we're flushing the IRQ + bottom half + * so don't update anything as it could race with the reset code. + */ + if (kbdev->irq_reset_flush) + goto unlock; + + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | + MMU_PAGE_FAULT(as->number); + + if (type == KBASE_MMU_FAULT_TYPE_BUS || + type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) + irq_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); + +unlock: + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h new file mode 100644 index 0000000..28bd341 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KBASE_MMU_INTERNAL_H_ +#define _KBASE_MMU_INTERNAL_H_ + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup); + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault); + +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault); + +/** + * kbase_mmu_interrupt_process - Process a bus or page fault. + * @kbdev The kbase_device the fault happened on + * @kctx The kbase_context for the faulting address space if one was found. + * @as The address space that has the fault + * @fault Data relating to the fault + * + * This function will process a fault on a specific address space + */ +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault); + +/** + * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible + * @kctx The kbase_context for the faulting address space. + * @reg Reference of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. + * + * Used to switch to incremental rendering if we have nearly run out of + * virtual address space in a growable memory region. + * + * Return 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_switch_to_ir(struct kbase_context *kctx, + struct kbase_va_region *reg); + +#endif /* _KBASE_MMU_INTERNAL_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c new file mode 100644 index 0000000..02493e9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c @@ -0,0 +1,200 @@ +/* + * + * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase.h" +#include +#include "mali_kbase_defs.h" +#include +#include + +#define ENTRY_TYPE_MASK 3ULL +/* For valid ATEs bit 1 = ((level == 3) ? 1 : 0). + * Valid ATE entries at level 3 are flagged with the value 3. + * Valid ATE entries at level 0-2 are flagged with the value 1. + */ +#define ENTRY_IS_ATE_L3 3ULL +#define ENTRY_IS_ATE_L02 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */ +#define ENTRY_ACCESS_RO (3ULL << 6) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +/* Helper Function to perform assignment of page table entries, to + * ensure the use of strd, which is required on LPAE systems. + */ +static inline void page_table_entry_set(u64 *pte, u64 phy) +{ +#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE + WRITE_ONCE(*pte, phy); +#else +#ifdef CONFIG_64BIT + barrier(); + *pte = phy; + barrier(); +#elif defined(CONFIG_ARM) + barrier(); + asm volatile("ldrd r0, [%1]\n\t" + "strd r0, %0\n\t" + : "=m" (*pte) + : "r" (&phy) + : "r0", "r1"); + barrier(); +#else +#error "64-bit atomic write must be implemented for your architecture" +#endif +#endif +} + +static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int as_nr) +{ + struct kbase_as *as; + struct kbase_mmu_setup *current_setup; + + if (WARN_ON(as_nr == KBASEP_AS_NR_INVALID)) + return; + + as = &kbdev->as[as_nr]; + current_setup = &as->current_setup; + + kbase_mmu_get_as_setup(mmut, current_setup); + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as); +} + +static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + struct kbase_as * const as = &kbdev->as[as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + current_setup->transtab = 0ULL; + current_setup->transcfg = AS_TRANSCFG_ADRMODE_UNMAPPED; + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as); +} + +static phys_addr_t pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +static int ate_is_valid(u64 ate, int const level) +{ + if (level == MIDGARD_MMU_BOTTOMLEVEL) + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3); + else + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02); +} + +static int pte_is_valid(u64 pte, int const level) +{ + /* PTEs cannot exist at the bottom level */ + if (level == MIDGARD_MMU_BOTTOMLEVEL) + return false; + return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +} + +/* + * Map KBASE_REG flags to MMU flags + */ +static u64 get_mmu_flags(unsigned long flags) +{ + u64 mmu_flags; + + /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ + mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + + /* Set access flags - note that AArch64 stage 1 does not support + * write-only access, so we use read/write instead + */ + if (flags & KBASE_REG_GPU_WR) + mmu_flags |= ENTRY_ACCESS_RW; + else if (flags & KBASE_REG_GPU_RD) + mmu_flags |= ENTRY_ACCESS_RO; + + /* nx if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + + if (flags & KBASE_REG_SHARE_BOTH) { + /* inner and outer shareable */ + mmu_flags |= SHARE_BOTH_BITS; + } else if (flags & KBASE_REG_SHARE_IN) { + /* inner shareable coherency */ + mmu_flags |= SHARE_INNER_BITS; + } + + return mmu_flags; +} + +static void entry_set_ate(u64 *entry, + struct tagged_addr phy, + unsigned long flags, + int const level) +{ + if (level == MIDGARD_MMU_BOTTOMLEVEL) + page_table_entry_set(entry, as_phys_addr_t(phy) | + get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L3); + else + page_table_entry_set(entry, as_phys_addr_t(phy) | + get_mmu_flags(flags) | + ENTRY_ACCESS_BIT | ENTRY_IS_ATE_L02); +} + +static void entry_set_pte(u64 *entry, phys_addr_t phy) +{ + page_table_entry_set(entry, (phy & PAGE_MASK) | + ENTRY_ACCESS_BIT | ENTRY_IS_PTE); +} + +static void entry_invalidate(u64 *entry) +{ + page_table_entry_set(entry, ENTRY_IS_INVAL); +} + +static struct kbase_mmu_mode const aarch64_mode = { + .update = mmu_update, + .get_as_setup = kbase_mmu_get_as_setup, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entry_invalidate = entry_invalidate, + .flags = KBASE_MMU_MODE_HAS_NON_CACHEABLE +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void) +{ + return &aarch64_mode; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_lpae.c diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig new file mode 100644 index 0000000..ef9fb96 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/Kconfig @@ -0,0 +1,30 @@ +# +# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + + +# Add your platform specific Kconfig file here +# +# "drivers/gpu/arm/midgard/platform/xxx/Kconfig" +# +# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME +# + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild new file mode 100644 index 0000000..0a82eaf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild @@ -0,0 +1,39 @@ +# +# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + + +USE_GPPLL?=0 +ifdef CONFIG_AM_VIDEO + USE_GPPLL:=1 +endif + +ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL) + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ + $(MALI_PLATFORM_DIR)/mali_clock.o \ + $(MALI_PLATFORM_DIR)/mpgpu.o \ + $(MALI_PLATFORM_DIR)/meson_main2.o \ + $(MALI_PLATFORM_DIR)/platform_gx.o \ + $(MALI_PLATFORM_DIR)/scaling.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c new file mode 100644 index 0000000..ea788a4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.c @@ -0,0 +1,741 @@ +/* + * mali_clock.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include "mali_scaling.h" +#include "mali_clock.h" + +#ifndef AML_CLK_LOCK_ERROR +#define AML_CLK_LOCK_ERROR 1 +#endif + +static unsigned gpu_dbg_level = 0; +module_param(gpu_dbg_level, uint, 0644); +MODULE_PARM_DESC(gpu_dbg_level, "gpu debug level"); + +#define gpu_dbg(level, fmt, arg...) \ + do { \ + if (gpu_dbg_level >= (level)) \ + printk("gpu_debug"fmt , ## arg); \ + } while (0) + +#define GPU_CLK_DBG(fmt, arg...) + +//disable print +//#define _dev_info(...) + +//static DEFINE_SPINLOCK(lock); +static mali_plat_info_t* pmali_plat = NULL; +//static u32 mali_extr_backup = 0; +//static u32 mali_extr_sample_backup = 0; +int mali_pm_statue = 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)) +int mali_clock_init_clk_tree(struct platform_device* pdev) +{ + mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock]; + struct clk *clk_mali_0_parent = dvfs_tbl->clkp_handle; + struct clk *clk_mali_0 = pmali_plat->clk_mali_0; +#ifdef AML_CLK_LOCK_ERROR + struct clk *clk_mali_1 = pmali_plat->clk_mali_1; +#endif + struct clk *clk_mali = pmali_plat->clk_mali; + + clk_set_parent(clk_mali_0, clk_mali_0_parent); + + clk_prepare_enable(clk_mali_0); + + clk_set_parent(clk_mali, clk_mali_0); + +#ifdef AML_CLK_LOCK_ERROR + clk_set_parent(clk_mali_1, clk_mali_0_parent); + clk_prepare_enable(clk_mali_1); +#endif + + GPU_CLK_DBG("%s:enable(%d), %s:enable(%d)\n", + clk_mali_0->name, clk_mali_0->enable_count, + clk_mali_0_parent->name, clk_mali_0_parent->enable_count); + + return 0; +} + +int mali_clock_init(mali_plat_info_t *pdev) +{ + *pdev = *pdev; + return 0; +} + +int mali_clock_critical(critical_t critical, size_t param) +{ + int ret = 0; + + ret = critical(param); + + return ret; +} + +static int critical_clock_set(size_t param) +{ + int ret = 0; + unsigned int idx = param; + mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx]; + + struct clk *clk_mali_0 = pmali_plat->clk_mali_0; + struct clk *clk_mali_1 = pmali_plat->clk_mali_1; + struct clk *clk_mali_x = NULL; + struct clk *clk_mali_x_parent = NULL; + struct clk *clk_mali_x_old = NULL; + struct clk *clk_mali = pmali_plat->clk_mali; + + clk_mali_x_old = clk_get_parent(clk_mali); + + if (!clk_mali_x_old) { + printk("gpu: could not get clk_mali_x_old or clk_mali_x_old\n"); + return 0; + } + if (clk_mali_x_old == clk_mali_0) { + clk_mali_x = clk_mali_1; + } else if (clk_mali_x_old == clk_mali_1) { + clk_mali_x = clk_mali_0; + } else { + printk("gpu: unmatched clk_mali_x_old\n"); + return 0; + } + + GPU_CLK_DBG("idx=%d, clk_freq=%d\n", idx, dvfs_tbl->clk_freq); + clk_mali_x_parent = dvfs_tbl->clkp_handle; + if (!clk_mali_x_parent) { + printk("gpu: could not get clk_mali_x_parent\n"); + return 0; + } + + GPU_CLK_DBG(); + ret = clk_set_rate(clk_mali_x_parent, dvfs_tbl->clkp_freq); + GPU_CLK_DBG(); + ret = clk_set_parent(clk_mali_x, clk_mali_x_parent); + GPU_CLK_DBG(); + ret = clk_set_rate(clk_mali_x, dvfs_tbl->clk_freq); + GPU_CLK_DBG(); +#ifndef AML_CLK_LOCK_ERROR + ret = clk_prepare_enable(clk_mali_x); +#endif + GPU_CLK_DBG("new %s:enable(%d)\n", clk_mali_x->name, clk_mali_x->enable_count); + udelay(1);// delay 10ns + ret = clk_set_parent(clk_mali, clk_mali_x); + GPU_CLK_DBG(); + +#ifndef AML_CLK_LOCK_ERROR + clk_disable_unprepare(clk_mali_x_old); +#endif + + return 0; +} + +int mali_clock_set(unsigned int clock) +{ + return mali_clock_critical(critical_clock_set, (size_t)clock); +} + +void disable_clock(void) +{ + struct clk *clk_mali = pmali_plat->clk_mali; + struct clk *clk_mali_x = NULL; + + clk_mali_x = clk_get_parent(clk_mali); + GPU_CLK_DBG(); +#ifndef AML_CLK_LOCK_ERROR + clk_disable_unprepare(clk_mali_x); +#endif + GPU_CLK_DBG(); +} + +void enable_clock(void) +{ + struct clk *clk_mali = pmali_plat->clk_mali; + struct clk *clk_mali_x = NULL; + + clk_mali_x = clk_get_parent(clk_mali); + GPU_CLK_DBG(); +#ifndef AML_CLK_LOCK_ERROR + clk_prepare_enable(clk_mali_x); +#endif + GPU_CLK_DBG(); +} + +u32 get_mali_freq(u32 idx) +{ + if (!mali_pm_statue) { + return pmali_plat->clk_sample[idx]; + } else { + return 0; + } +} + +void set_str_src(u32 data) +{ + printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__); +} + +int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata) +{ + struct device_node *gpu_dn = pdev->dev.of_node; + struct device_node *gpu_clk_dn; + phandle dvfs_clk_hdl; + mali_dvfs_threshold_table *dvfs_tbl = NULL; + uint32_t *clk_sample = NULL; + + struct property *prop; + const __be32 *p; + int length = 0, i = 0; + u32 u; + int ret = 0; + if (!gpu_dn) { + dev_notice(&pdev->dev, "gpu device node not right\n"); + return -ENODEV; + } + + ret = of_property_read_u32(gpu_dn,"num_of_pp", + &mpdata->cfg_pp); + if (ret) { + dev_notice(&pdev->dev, "set max pp to default 6\n"); + mpdata->cfg_pp = 6; + } + mpdata->scale_info.maxpp = mpdata->cfg_pp; + mpdata->maxpp_sysfs = mpdata->cfg_pp; + _dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp); + + ret = of_property_read_u32(gpu_dn,"min_pp", + &mpdata->cfg_min_pp); + if (ret) { + dev_notice(&pdev->dev, "set min pp to default 1\n"); + mpdata->cfg_min_pp = 1; + } + mpdata->scale_info.minpp = mpdata->cfg_min_pp; + _dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp); + + ret = of_property_read_u32(gpu_dn,"min_clk", + &mpdata->cfg_min_clock); + if (ret) { + dev_notice(&pdev->dev, "set min clk default to 0\n"); + mpdata->cfg_min_clock = 0; + } + mpdata->scale_info.minclk = mpdata->cfg_min_clock; + _dev_info(&pdev->dev, "min clk is %d\n", mpdata->scale_info.minclk); + + mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3); + _dev_info(&pdev->dev, "hiu io source 0x%p\n", mpdata->reg_base_hiubus); + + mpdata->reg_base_aobus = of_iomap(gpu_dn, 2); + _dev_info(&pdev->dev, "ao io source 0x%p\n", mpdata->reg_base_aobus); + + ret = of_property_read_u32(gpu_dn,"sc_mpp", + &mpdata->sc_mpp); + if (ret) { + dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp); + mpdata->sc_mpp = mpdata->cfg_pp; + } + _dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp); + + of_get_property(gpu_dn, "tbl", &length); + + length = length /sizeof(u32); + _dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length); + + mpdata->dvfs_table = devm_kzalloc(&pdev->dev, + sizeof(struct mali_dvfs_threshold_table)*length, + GFP_KERNEL); + dvfs_tbl = mpdata->dvfs_table; + if (mpdata->dvfs_table == NULL) { + dev_err(&pdev->dev, "failed to alloc dvfs table\n"); + return -ENOMEM; + } + mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL); + if (mpdata->clk_sample == NULL) { + dev_err(&pdev->dev, "failed to alloc clk_sample table\n"); + return -ENOMEM; + } + clk_sample = mpdata->clk_sample; + mpdata->dvfs_table_size = 0; + + of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) { + dvfs_clk_hdl = (phandle) u; + gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl); + ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq); + if (ret) { + dev_notice(&pdev->dev, "read clk_freq failed\n"); + } + ret = of_property_read_string(gpu_clk_dn,"clk_parent", + &dvfs_tbl->clk_parent); + if (ret) { + dev_notice(&pdev->dev, "read clk_parent failed\n"); + } + dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent); + if (IS_ERR(dvfs_tbl->clkp_handle)) { + dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent); + } + ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq); + if (ret) { + dev_notice(&pdev->dev, "read clk_parent freq failed\n"); + } + ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage); + if (ret) { + dev_notice(&pdev->dev, "read voltage failed\n"); + } + ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count); + if (ret) { + dev_notice(&pdev->dev, "read keep_count failed\n"); + } + //downthreshold and upthreshold shall be u32 + ret = of_property_read_u32_array(gpu_clk_dn,"threshold", + &dvfs_tbl->downthreshold, 2); + if (ret) { + dev_notice(&pdev->dev, "read threshold failed\n"); + } + dvfs_tbl->freq_index = i; + + *clk_sample = dvfs_tbl->clk_freq / 1000000; + + dvfs_tbl ++; + clk_sample ++; + i++; + mpdata->dvfs_table_size ++; + } + dev_notice(&pdev->dev, "dvfs table is %d\n", mpdata->dvfs_table_size); + dev_notice(&pdev->dev, "dvfs table addr %p, ele size=%zd\n", + mpdata->dvfs_table, + sizeof(mpdata->dvfs_table[0])); + + ret = of_property_read_u32(gpu_dn,"max_clk", + &mpdata->cfg_clock); + if (ret) { + dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2); + mpdata->cfg_clock = mpdata->dvfs_table_size-2; + } + + mpdata->cfg_clock_bkup = mpdata->cfg_clock; + mpdata->maxclk_sysfs = mpdata->cfg_clock; + mpdata->scale_info.maxclk = mpdata->cfg_clock; + _dev_info(&pdev->dev, "max clk is %d\n", mpdata->scale_info.maxclk); + + ret = of_property_read_u32(gpu_dn,"turbo_clk", + &mpdata->turbo_clock); + if (ret) { + dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1); + mpdata->turbo_clock = mpdata->dvfs_table_size-1; + } + _dev_info(&pdev->dev, "turbo clk is %d\n", mpdata->turbo_clock); + + ret = of_property_read_u32(gpu_dn,"def_clk", + &mpdata->def_clock); + if (ret) { + mpdata->def_clock = mpdata->scale_info.maxclk; + dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock); + } + if (mpdata->def_clock > mpdata->scale_info.maxclk) + mpdata->def_clock = mpdata->scale_info.maxclk; + + _dev_info(&pdev->dev, "default clk is %d\n", mpdata->def_clock); + + dvfs_tbl = mpdata->dvfs_table; + clk_sample = mpdata->clk_sample; + for (i = 0; i< mpdata->dvfs_table_size; i++) { + _dev_info(&pdev->dev, "====================%d====================\n" + "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n", + i, + dvfs_tbl->clk_freq, dvfs_tbl->clk_parent, + dvfs_tbl->voltage, dvfs_tbl->keep_count, + dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample); + dvfs_tbl ++; + clk_sample ++; + } + + mpdata->clk_mali = devm_clk_get(&pdev->dev, "clk_gpu"); + mpdata->clk_mali_0 = devm_clk_get(&pdev->dev, "clk_gpu_0"); + mpdata->clk_mali_1 = devm_clk_get(&pdev->dev, "clk_gpu_1"); + if (IS_ERR(mpdata->clk_mali) || IS_ERR(mpdata->clk_mali_0) || IS_ERR(mpdata->clk_mali_1)) { + dev_err(&pdev->dev, "failed to get clock pointer\n"); + return -EFAULT; + } + + pmali_plat = mpdata; + mpdata->pdev = pdev; + return 0; +} +#else +int mali_clock_init_clk_tree(struct platform_device* pdev) +{ + mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[pmali_plat->def_clock]; + struct clk *clk_mali = pmali_plat->clk_mali; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) + if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) && + !IS_ERR(dvfs_tbl->clkp_handle) && + (0 != dvfs_tbl->clkp_freq)) { + clk_prepare_enable(dvfs_tbl->clkp_handle); + clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq); + } + clk_prepare_enable(clk_mali); + clk_set_rate(clk_mali, dvfs_tbl->clk_freq); +#else + pr_info("kernel version >= 5.4\n"); + clk_set_rate(clk_mali, dvfs_tbl->clk_freq); + clk_prepare_enable(clk_mali); +#endif + + return 0; +} + +int mali_clock_init(mali_plat_info_t *pdev) +{ + *pdev = *pdev; + return 0; +} + +int mali_clock_critical(critical_t critical, size_t param) +{ + int ret = 0; + + ret = critical(param); + + return ret; +} + +static int critical_clock_set(size_t param) +{ + int ret = 0; + unsigned int idx = param; + mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[idx]; + + struct clk *clk_mali = pmali_plat->clk_mali; + + GPU_CLK_DBG(); + ret = clk_set_rate(clk_mali, dvfs_tbl->clk_freq); + GPU_CLK_DBG(); + +#ifndef AML_CLK_LOCK_ERROR + clk_disable_unprepare(clk_mali_x_old); +#endif + + return 0; +} + +int mali_clock_set(unsigned int clock) +{ + return mali_clock_critical(critical_clock_set, (size_t)clock); +} + +void disable_clock(void) +{ +#ifndef AML_CLK_LOCK_ERROR + struct clk *clk_mali = pmali_plat->clk_mali; + + GPU_CLK_DBG(); + clk_disable_unprepare(clk_mali); +#endif + GPU_CLK_DBG(); +} + +void enable_clock(void) +{ +#ifndef AML_CLK_LOCK_ERROR + struct clk *clk_mali = pmali_plat->clk_mali; + + clk_prepare_enable(clk_mali); +#endif + GPU_CLK_DBG(); +} + +u32 get_mali_freq(u32 idx) +{ + if (!mali_pm_statue) { + return pmali_plat->clk_sample[idx]; + } else { + return 0; + } +} + +void set_str_src(u32 data) +{ + printk("gpu: %s, %s, %d\n", __FILE__, __func__, __LINE__); +} + +int mali_reset_info(struct platform_device *pdev, struct device_node *gpu_dn, + struct mali_plat_info_t *mpdata) +{ + struct device_node *reset_dn, *apb_reset_dn; + int ret = 0; + + reset_dn = of_get_child_by_name(gpu_dn, "reset_cfg"); + if (!reset_dn) { + mpdata->reset_flag = 0; + dev_info(&pdev->dev, "no reset_cfg\n"); + } else { + ret = of_property_read_u32(reset_dn,"reg_level", + &mpdata->module_reset.reg_level); + if (ret) { + dev_err(&pdev->dev, "no reg_level for reset\n"); + return -ENOMEM; + } + ret = of_property_read_u32(reset_dn,"reg_mask", + &mpdata->module_reset.reg_mask); + if (ret) { + dev_err(&pdev->dev, "no reg_mask for reset\n"); + return -ENOMEM; + } + ret = of_property_read_u32(reset_dn,"reg_bit", + &mpdata->module_reset.reg_bit); + if (ret) { + dev_err(&pdev->dev, "no reg_bit for reset\n"); + return -ENOMEM; + } + mpdata->reset_flag = 1; + } + apb_reset_dn = of_get_child_by_name(gpu_dn, "capb_reset"); + if (!apb_reset_dn) { + mpdata->reset_flag = 0; + dev_info(&pdev->dev, "no apb_reset\n"); + } else { + ret = of_property_read_u32(apb_reset_dn,"reg_level", + &mpdata->apb_reset.reg_level); + if (ret) { + dev_err(&pdev->dev, "no reg_level for apb_reset\n"); + return -ENOMEM; + } + ret = of_property_read_u32(apb_reset_dn,"reg_mask", + &mpdata->apb_reset.reg_mask); + if (ret) { + dev_err(&pdev->dev, "no reg_mask for apb_reset\n"); + return -ENOMEM; + } + ret = of_property_read_u32(apb_reset_dn,"reg_bit", + &mpdata->apb_reset.reg_bit); + if (ret) { + dev_err(&pdev->dev, "no reg_bit for apb_reset\n"); + return -ENOMEM; + } + mpdata->reset_flag = 1; + } + return ret; +} + +int mali_dt_info(struct platform_device *pdev, struct mali_plat_info_t *mpdata) +{ + struct device_node *gpu_dn = pdev->dev.of_node; + struct device_node *gpu_clk_dn; + phandle dvfs_clk_hdl; + mali_dvfs_threshold_table *dvfs_tbl = NULL; + uint32_t *clk_sample = NULL; + + struct property *prop; + const __be32 *p; + int length = 0, i = 0; + u32 u; + + int ret = 0; + if (!gpu_dn) { + dev_notice(&pdev->dev, "gpu device node not right\n"); + return -ENODEV; + } + + ret = of_property_read_u32(gpu_dn,"num_of_pp", + &mpdata->cfg_pp); + if (ret) { + dev_notice(&pdev->dev, "set max pp to default 6\n"); + mpdata->cfg_pp = 6; + } + mpdata->scale_info.maxpp = mpdata->cfg_pp; + mpdata->maxpp_sysfs = mpdata->cfg_pp; + _dev_info(&pdev->dev, "max pp is %d\n", mpdata->scale_info.maxpp); + + ret = of_property_read_u32(gpu_dn,"min_pp", + &mpdata->cfg_min_pp); + if (ret) { + dev_notice(&pdev->dev, "set min pp to default 1\n"); + mpdata->cfg_min_pp = 1; + } + mpdata->scale_info.minpp = mpdata->cfg_min_pp; + _dev_info(&pdev->dev, "min pp is %d\n", mpdata->scale_info.minpp); + + ret = of_property_read_u32(gpu_dn,"min_clk", + &mpdata->cfg_min_clock); + if (ret) { + dev_notice(&pdev->dev, "set min clk default to 0\n"); + mpdata->cfg_min_clock = 0; + } + mpdata->scale_info.minclk = mpdata->cfg_min_clock; + _dev_info(&pdev->dev, "min clk is %d\n", mpdata->scale_info.minclk); + + mpdata->reg_base_hiubus = of_iomap(gpu_dn, 3); + _dev_info(&pdev->dev, "hiu io source 0x%p\n", mpdata->reg_base_hiubus); + + mpdata->reg_base_aobus = of_iomap(gpu_dn, 2); + _dev_info(&pdev->dev, "aobus base 0x%p\n", mpdata->reg_base_aobus); + + mpdata->reg_base_reset = of_iomap(gpu_dn, 1); + _dev_info(&pdev->dev, "reset bus 0x%p\n", mpdata->reg_base_reset); + + mpdata->reset_g12a = 0; + ret = of_property_read_u32(gpu_dn,"clk_cntl_reg", + &mpdata->sc_mpp); + if (ret) { + mpdata->clk_cntl_reg = 0x6c; + mpdata->reset_g12a = 1; + } + _dev_info(&pdev->dev, "clk cntl reg = 0x%x\n", mpdata->clk_cntl_reg); + + ret = of_property_read_u32(gpu_dn,"sc_mpp", + &mpdata->sc_mpp); + if (ret) { + dev_notice(&pdev->dev, "set pp used most of time default to %d\n", mpdata->cfg_pp); + mpdata->sc_mpp = mpdata->cfg_pp; + } + _dev_info(&pdev->dev, "num of pp used most of time %d\n", mpdata->sc_mpp); + + of_get_property(gpu_dn, "tbl", &length); + + length = length /sizeof(u32); + _dev_info(&pdev->dev, "clock dvfs cfg table size is %d\n", length); + + mpdata->dvfs_table = devm_kzalloc(&pdev->dev, + sizeof(struct mali_dvfs_threshold_table)*length, + GFP_KERNEL); + dvfs_tbl = mpdata->dvfs_table; + if (mpdata->dvfs_table == NULL) { + dev_err(&pdev->dev, "failed to alloc dvfs table\n"); + return -ENOMEM; + } + mpdata->clk_sample = devm_kzalloc(&pdev->dev, sizeof(u32)*length, GFP_KERNEL); + if (mpdata->clk_sample == NULL) { + dev_err(&pdev->dev, "failed to alloc clk_sample table\n"); + return -ENOMEM; + } + clk_sample = mpdata->clk_sample; + /* mali external reset reg */ + ret = mali_reset_info(pdev, gpu_dn, mpdata); + if (ret) + return ret; + /* dvfs clk table */ + of_property_for_each_u32(gpu_dn, "tbl", prop, p, u) { + dvfs_clk_hdl = (phandle) u; + gpu_clk_dn = of_find_node_by_phandle(dvfs_clk_hdl); + ret = of_property_read_u32(gpu_clk_dn,"clk_freq", &dvfs_tbl->clk_freq); + if (ret) { + dev_notice(&pdev->dev, "read clk_freq failed\n"); + } + + ret = of_property_read_string(gpu_clk_dn,"clk_parent", + &dvfs_tbl->clk_parent); + if (ret) { + dev_notice(&pdev->dev, "read clk_parent failed\n"); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) + } else if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) { + dvfs_tbl->clkp_handle = devm_clk_get(&pdev->dev, dvfs_tbl->clk_parent); + if (IS_ERR(dvfs_tbl->clkp_handle)) { + dev_notice(&pdev->dev, "failed to get %s's clock pointer\n", dvfs_tbl->clk_parent); + } + ret = of_property_read_u32(gpu_clk_dn,"clkp_freq", &dvfs_tbl->clkp_freq); + if (ret) { + dev_notice(&pdev->dev, "read clk_parent freq failed\n"); + } +#endif + } + + ret = of_property_read_u32(gpu_clk_dn,"voltage", &dvfs_tbl->voltage); + if (ret) { + dev_notice(&pdev->dev, "read voltage failed\n"); + } + ret = of_property_read_u32(gpu_clk_dn,"keep_count", &dvfs_tbl->keep_count); + if (ret) { + dev_notice(&pdev->dev, "read keep_count failed\n"); + } + //downthreshold and upthreshold shall be u32 + ret = of_property_read_u32_array(gpu_clk_dn,"threshold", + &dvfs_tbl->downthreshold, 2); + if (ret) { + dev_notice(&pdev->dev, "read threshold failed\n"); + } + dvfs_tbl->freq_index = i; + + *clk_sample = dvfs_tbl->clk_freq / 1000000; + + dvfs_tbl ++; + clk_sample ++; + i++; + mpdata->dvfs_table_size ++; + } + + ret = of_property_read_u32(gpu_dn,"max_clk", + &mpdata->cfg_clock); + if (ret) { + dev_notice(&pdev->dev, "max clk set %d\n", mpdata->dvfs_table_size-2); + mpdata->cfg_clock = mpdata->dvfs_table_size-2; + } + + mpdata->cfg_clock_bkup = mpdata->cfg_clock; + mpdata->maxclk_sysfs = mpdata->cfg_clock; + mpdata->scale_info.maxclk = mpdata->cfg_clock; + _dev_info(&pdev->dev, "max clk is %d\n", mpdata->scale_info.maxclk); + + ret = of_property_read_u32(gpu_dn,"turbo_clk", + &mpdata->turbo_clock); + if (ret) { + dev_notice(&pdev->dev, "turbo clk set to %d\n", mpdata->dvfs_table_size-1); + mpdata->turbo_clock = mpdata->dvfs_table_size-1; + } + _dev_info(&pdev->dev, "turbo clk is %d\n", mpdata->turbo_clock); + + ret = of_property_read_u32(gpu_dn,"def_clk", + &mpdata->def_clock); + if (ret) { + mpdata->def_clock = mpdata->scale_info.maxclk; + dev_notice(&pdev->dev, "default clk set to %d\n", mpdata->def_clock); + } + if (mpdata->def_clock > mpdata->scale_info.maxclk) + mpdata->def_clock = mpdata->scale_info.maxclk; + _dev_info(&pdev->dev, "default clk is %d\n", mpdata->def_clock); + + dvfs_tbl = mpdata->dvfs_table; + clk_sample = mpdata->clk_sample; + for (i = 0; i< mpdata->dvfs_table_size; i++) { + _dev_info(&pdev->dev, "====================%d====================\n" + "clk_freq=%10d, clk_parent=%9s, voltage=%d, keep_count=%d, threshod=<%d %d>, clk_sample=%d\n", + i, + dvfs_tbl->clk_freq, dvfs_tbl->clk_parent, + dvfs_tbl->voltage, dvfs_tbl->keep_count, + dvfs_tbl->downthreshold, dvfs_tbl->upthreshold, *clk_sample); + dvfs_tbl ++; + clk_sample ++; + } + _dev_info(&pdev->dev, "clock dvfs table size is %d\n", mpdata->dvfs_table_size); + + mpdata->clk_mali = devm_clk_get(&pdev->dev, "gpu_mux"); + if (IS_ERR(mpdata->clk_mali)) { + dev_err(&pdev->dev, "failed to get clock pointer\n"); + return -EFAULT; + } + + pmali_plat = mpdata; + mpdata->pdev = pdev; + return 0; +} + +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h new file mode 100644 index 0000000..1ddc48b --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_clock.h @@ -0,0 +1,54 @@ +/* + * mali_clock.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __MALI_CLOCK_H__ +#define __MALI_CLOCK_H__ +#include +#include +#include +#include +#include + +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 29)) && (LINUX_VERSION_CODE <= KERNEL_VERSION(4, 10, 0)) +#include +#endif + +#ifndef HHI_MALI_CLK_CNTL +#define HHI_MALI_CLK_CNTL 0x6C +#define mplt_read(r) readl((pmali_plat->reg_base_hiubus) + ((r)<<2)) +#define mplt_write(r, v) writel((v), ((pmali_plat->reg_base_hiubus) + ((r)<<2))) +#define mplt_setbits(r, m) mplt_write((r), (mplt_read(r) | (m))); +#define mplt_clrbits(r, m) mplt_write((r), (mplt_read(r) & (~(m)))); +#endif + +//extern int mali_clock_init(struct platform_device *dev); +int mali_clock_init_clk_tree(struct platform_device *pdev); + +typedef int (*critical_t)(size_t param); +int mali_clock_critical(critical_t critical, size_t param); + +int mali_clock_init(mali_plat_info_t*); +int mali_clock_set(unsigned int index); +void disable_clock(void); +void enable_clock(void); +u32 get_mali_freq(u32 idx); +void set_str_src(u32 data); +int mali_dt_info(struct platform_device *pdev, + struct mali_plat_info_t *mpdata); +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c new file mode 100644 index 0000000..55a50c4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c @@ -0,0 +1,84 @@ +/* + * mali_kbase_config_devicetree.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifdef CONFIG_DEVFREQ_THERMAL +#include +#include +#include + +#define FALLBACK_STATIC_TEMPERATURE 55000 + +#ifdef CONFIG_MALI_DEVFREQ +static unsigned long t83x_static_power(unsigned long voltage) +{ + return 0; +} + +static unsigned long t83x_dynamic_power(unsigned long freq, + unsigned long voltage) +{ + /* The inputs: freq (f) is in Hz, and voltage (v) in mV. + * The coefficient (c) is in mW/(MHz mV mV). + * + * This function calculates the dynamic power after this formula: + * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) + */ + const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ + const unsigned long f_mhz = freq / 1000000; /* MHz */ + const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ + + return (coefficient * v2 * f_mhz) / 1000000; /* mW */ +} +#endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16)) +struct devfreq_cooling_ops t83x_model_ops = { +#else +struct devfreq_cooling_power t83x_model_ops = { +#endif +#ifdef CONFIG_MALI_DEVFREQ + .get_static_power = t83x_static_power, + .get_dynamic_power = t83x_dynamic_power, +#endif +}; + +#endif + +#include + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} + +static struct kbase_platform_config dummy_platform_config; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &dummy_platform_config; +} + +#ifndef CONFIG_OF +int kbase_platform_register(void) +{ + return 0; +} + +void kbase_platform_unregister(void) +{ +} +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h new file mode 100644 index 0000000..233a18e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -0,0 +1,94 @@ +/* + * mali_kbase_config_platform.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX (750000) +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN (100000) + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (NULL) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +extern struct kbase_platform_funcs_conf dt_funcs_conf; +#define PLATFORM_FUNCS (&dt_funcs_conf) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#ifdef CONFIG_DEVFREQ_THERMAL +#define POWER_MODEL_CALLBACKS (&t83x_model_ops) +extern struct devfreq_cooling_ops t83x_model_ops; +#else +#define POWER_MODEL_CALLBACKS (NULL) +#endif +extern struct kbase_pm_callback_conf pm_callbacks; + +void mali_dev_freeze(void); +void mali_dev_restore(void); + +/** + * Autosuspend delay + * + * The delay time (in milliseconds) to be used for autosuspend + */ +#define AUTO_SUSPEND_DELAY (100) diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c new file mode 100644 index 0000000..ee18718 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -0,0 +1,313 @@ +/* + * mali_kbase_runtime_pm.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +//#define DEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mali_kbase_config_platform.h" +#include "mali_scaling.h" + +void *reg_base_reset = NULL; +static int first = 1; + +//remove this if sc2 worked fine. +//g12a +#define RESET0_MASK 0x10 +#define RESET1_MASK 0x11 +#define RESET2_MASK 0x12 +#define RESET0_LEVEL 0x20 +#define RESET1_LEVEL 0x21 +#define RESET2_LEVEL 0x22 +//sc2 +#define RESETCTRL_RESET1_LEVEL 0x11 +#define RESETCTRL_RESET1_MASK 0x21 + +#define Rd(r) readl((reg_base_reset) + ((r)<<2)) +#define Wr(r, v) writel((v), ((reg_base_reset) + ((r)<<2))) +#define Mali_WrReg(regnum, value) writel((value), kbdev->reg + (regnum)) +#define Mali_RdReg(regnum) readl(kbdev->reg + (regnum)) + +extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); + +//[0]:CG [1]:SC0 [2]:SC2 +static void Mali_pwr_on_with_kdev( struct kbase_device *kbdev, uint32_t mask) +{ + uint32_t part1_done; + uint32_t shader_present; + uint32_t tiler_present; + uint32_t l2_present; + + part1_done = 0; + Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts + + shader_present = Mali_RdReg(0x100); + tiler_present = Mali_RdReg(0x110); + l2_present = Mali_RdReg(0x120); + dev_info(kbdev->dev, "shader_present=%d, tiler_present=%d, l2_present=%d\n", + shader_present, tiler_present, l2_present); + + if ( mask == 0 ) { + Mali_WrReg(0x00000180, 0xffffffff); // Power on all cores (shader low) + Mali_WrReg(0x00000184, 0xffffffff); // Power on all cores (shader high) + Mali_WrReg(0x00000190, 0xffffffff); // Power on all cores (tiler low) + Mali_WrReg(0x00000194, 0xffffffff); // Power on all cores (tiler high) + Mali_WrReg(0x000001a0, 0xffffffff); // Power on all cores (l2 low) + Mali_WrReg(0x000001a4, 0xffffffff); // Power on all cores (l2 high) + } else { + Mali_WrReg(0x00000180, mask); // Power on all cores (shader low) + Mali_WrReg(0x00000184, 0); // Power on all cores (shader high) + Mali_WrReg(0x00000190, mask); // Power on all cores (tiler low) + Mali_WrReg(0x00000194, 0); // Power on all cores (tiler high) + Mali_WrReg(0x000001a0, mask); // Power on all cores (l2 low) + Mali_WrReg(0x000001a4, 0); // Power on all cores (l2 high) + } + + part1_done = Mali_RdReg(0x0000020); + while (0 == part1_done) { part1_done = Mali_RdReg(0x00000020); } + Mali_WrReg(0x0000024, 0xffffffff); // clear interrupts +} + +/*reset function before t7 by register*/ +static void mali_reset_v0(int reset_g12a) +{ + u32 value; + + if (reset_g12a) { + value = Rd(RESET0_MASK); + value = value & (~(0x1<<20)); + Wr(RESET0_MASK, value); + + value = Rd(RESET0_LEVEL); + value = value & (~(0x1<<20)); + Wr(RESET0_LEVEL, value); + + value = Rd(RESET2_MASK); + value = value & (~(0x1<<14)); + Wr(RESET2_MASK, value); + + value = Rd(RESET2_LEVEL); + value = value & (~(0x1<<14)); + Wr(RESET2_LEVEL, value); + + value = Rd(RESET0_LEVEL); + value = value | ((0x1<<20)); + Wr(RESET0_LEVEL, value); + + value = Rd(RESET2_LEVEL); + value = value | ((0x1<<14)); + Wr(RESET2_LEVEL, value); + } else { + /* JOHNT: remove this if sc2 worked fine. */ + value = ~(1 << 2); + Wr(RESETCTRL_RESET1_MASK, value); + Wr(RESETCTRL_RESET1_LEVEL, value); + Wr(RESETCTRL_RESET1_LEVEL, 0xFFFFFFFF); + } +} + +/* + * reset note: + * there have mali reset and mali_capb bus reset. + * reset step:[vlsi suggestion] + * step1. mali-reset and mali-capb-reset level and mask config to 0 + * step2. mali-reset and mali-capb-reset level config to 1 + * Todo: sync old ic similar with t7,reset reg config by dts + */ +static void mali_reset(void) +{ + mali_plat_info_t* pmali_plat = get_mali_plat_data(); + u32 value = 0; + int reset_g12a = pmali_plat->reset_g12a; + + if (!pmali_plat->reset_flag) { + mali_reset_v0(reset_g12a); + return; + } + + /* + * for t7 , if old ic also sync with the follow method, + * need sync the dts similar with T7 + */ + value = Rd(pmali_plat->module_reset.reg_mask); + value = value & (~(0x1<module_reset.reg_bit)); + Wr(pmali_plat->module_reset.reg_mask, value); + + value = Rd(pmali_plat->module_reset.reg_level); + value = value & (~(0x1<module_reset.reg_bit)); + Wr(pmali_plat->module_reset.reg_level, value); + + value = Rd(pmali_plat->apb_reset.reg_mask); + value = value & (~(0x1<apb_reset.reg_bit)); + Wr(pmali_plat->apb_reset.reg_mask, value); + + value = Rd(pmali_plat->apb_reset.reg_level); + value = value & (~(0x1<apb_reset.reg_bit)); + Wr(pmali_plat->apb_reset.reg_level, value); + + value = Rd(pmali_plat->apb_reset.reg_level); + value = value | ((0x1<apb_reset.reg_bit)); + Wr(pmali_plat->apb_reset.reg_level, value); + + value = Rd(pmali_plat->module_reset.reg_level); + value = value | ((0x1<module_reset.reg_bit)); + Wr(pmali_plat->module_reset.reg_level, value); +} + +/* + * 1.mali-reset;2.init pwr_override1;3.pwr_on one core manully + */ +static void mali_hw_init(struct kbase_device *kbdev) +{ + mali_reset(); + udelay(10); // OR POLL for reset done + Mali_WrReg(GPU_CONTROL_REG(PWR_KEY), 0x2968A819); + Mali_WrReg(GPU_CONTROL_REG(PWR_OVERRIDE1), 0xfff | (0x20<<16)); + Mali_pwr_on_with_kdev(kbdev, 0x1); +} + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + int ret = 0; + + struct mali_plat_info_t *mpdata = (struct mali_plat_info_t *) kbdev->platform_context; + reg_base_reset = mpdata->reg_base_reset; + + + if (first == 0) goto out; + if (!pm_runtime_enabled(kbdev->dev)) { + pm_runtime_enable(kbdev->dev); + dev_info(kbdev->dev, "pm_runtime not enabled, enable it here\n"); + ret = pm_runtime_get_sync(kbdev->dev); + udelay(100); + dev_info(kbdev->dev, "pm_runtime_get_sync returned %d\n", ret); + } else { + dev_info(kbdev->dev, "pm_runtime enabled\n"); + } + + first = 0; + mali_hw_init(kbdev); +out: + return ret; +} + +/* + * pm_callback_power_off will be called when gpu have no job todo. + * the pm_runtime_put_autosuspend will trigger gpu power reset + * which will affect gpu work on T7,so we can't do put here + * the out power of gpu on t7 will be power off by platform when suspend + */ +static void pm_callback_power_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_power_off\n"); +} + +#ifdef KBASE_PM_RUNTIME +static int kbase_device_runtime_init(struct kbase_device *kbdev) +{ + int ret = 0; + + dev_info(kbdev->dev, "kbase_device_runtime_init\n"); + pm_runtime_enable(kbdev->dev); + ret = pm_runtime_get_sync(kbdev->dev); + dev_info(kbdev->dev, "pm_runtime_get_sync ret=%d\n", ret); + + return ret; +} + +static void kbase_device_runtime_disable(struct kbase_device *kbdev) +{ + dev_info(kbdev->dev, "kbase_device_runtime_disable\n"); + pm_runtime_disable(kbdev->dev); +} +#endif + +static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ + dev_info(kbdev->dev, "pm_callback_runtime_on\n"); + + return 0; +} + +static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ + dev_info(kbdev->dev, "pm_callback_runtime_off\n"); +} + +/* + * the out power of gpu on t7[vdd_gpu] will be power off by platform when suspend, + * which will result in the gpu internal register reset to default. + * all of the ic before t7 use the vdd_ee,which will never power down even suspend, + * so we need do once init and check by pwr_override1. + */ +static void pm_callback_resume(struct kbase_device *kbdev) +{ + int ret; + u32 pwr_override1; + + dev_info(kbdev->dev, "pm_callback_resume in\n"); + if (!pm_runtime_enabled(kbdev->dev)) { + pm_runtime_enable(kbdev->dev); + dev_info(kbdev->dev, "pm_runtime not enable, enable here\n"); + } else { + dev_info(kbdev->dev, "pm_runtime enabled already\n"); + } + ret = pm_runtime_get_sync(kbdev->dev); + dev_info(kbdev->dev, "pm_runtime_get_sync ret=%d\n", ret); + Mali_WrReg(GPU_CONTROL_REG(PWR_KEY), 0x2968A819); + pwr_override1 = Mali_RdReg(GPU_CONTROL_REG(PWR_OVERRIDE1)); + if (!pwr_override1) { + dev_info(kbdev->dev, "pwr_override1=0,need do once init\n"); + mali_hw_init(kbdev); + } + ret = pm_callback_runtime_on(kbdev); + dev_info(kbdev->dev, "pm_callback_resume out\n"); +} + +/* the out power of gpu on t7 will be power off by platform when suspend */ +static void pm_callback_suspend(struct kbase_device *kbdev) +{ + dev_info(kbdev->dev, "pm_callback_suspend in\n"); + pm_callback_runtime_off(kbdev); + pm_runtime_put_sync(kbdev->dev); + pm_runtime_disable(kbdev->dev); + dev_info(kbdev->dev, "pm_callback_suspend out\n"); +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = pm_callback_suspend, + .power_resume_callback = pm_callback_resume, +#ifdef KBASE_PM_RUNTIME + .power_runtime_init_callback = kbase_device_runtime_init, + .power_runtime_term_callback = kbase_device_runtime_disable, + .power_runtime_on_callback = pm_callback_runtime_on, + .power_runtime_off_callback = pm_callback_runtime_off, +#else /* KBASE_PM_RUNTIME */ + .power_runtime_init_callback = NULL, + .power_runtime_term_callback = NULL, + .power_runtime_on_callback = NULL, + .power_runtime_off_callback = NULL, +#endif /* KBASE_PM_RUNTIME */ +}; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h new file mode 100644 index 0000000..724112f --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_platform.h @@ -0,0 +1,25 @@ +/* + * mali_platform.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#ifndef MALI_PLATFORM_H_ +#define MALI_PLATFORM_H_ + +extern u32 mali_gp_reset_fail; +extern u32 mali_core_timeout; + +#endif /* MALI_PLATFORM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h new file mode 100644 index 0000000..a55ae00 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_scaling.h @@ -0,0 +1,154 @@ +/* + * mali_scaling.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/** + * @file arm_core_scaling.h + * Example core scaling policy. + */ + +#ifndef __ARM_CORE_SCALING_H__ +#define __ARM_CORE_SCALING_H__ + +#include +#include +#include +#include + +enum mali_scale_mode_t { + MALI_PP_SCALING = 0, + MALI_PP_FS_SCALING, + MALI_SCALING_DISABLE, + MALI_TURBO_MODE, + MALI_SCALING_MODE_MAX +}; + +typedef struct mali_dvfs_threshold_table { + uint32_t freq_index; + uint32_t voltage; + uint32_t keep_count; + uint32_t downthreshold; + uint32_t upthreshold; + uint32_t clk_freq; + const char *clk_parent; + struct clk *clkp_handle; + uint32_t clkp_freq; +} mali_dvfs_threshold_table; + +/** + * restrictions on frequency and number of pp. + */ +typedef struct mali_scale_info_t { + u32 minpp; + u32 maxpp; + u32 minclk; + u32 maxclk; +} mali_scale_info_t; + +typedef struct mali_reset_info_t { + u32 reg_level; + u32 reg_mask; + u32 reg_bit; +} mali_reset_info_t; + +/** + * Platform spesific data for meson chips. + */ +typedef struct mali_plat_info_t { + u32 cfg_pp; /* number of pp. */ + u32 cfg_min_pp; + u32 turbo_clock; /* reserved clock src. */ + u32 def_clock; /* gpu clock used most of time.*/ + u32 cfg_clock; /* max clock could be used.*/ + u32 cfg_clock_bkup; /* same as cfg_clock, for backup. */ + u32 cfg_min_clock; + + u32 sc_mpp; /* number of pp used most of time.*/ + u32 bst_gpu; /* threshold for boosting gpu. */ + u32 bst_pp; /* threshold for boosting PP. */ + + u32 *clk; + u32 *clk_sample; + u32 clk_len; + u32 have_switch; /* have clock gate switch or not. */ + + mali_dvfs_threshold_table *dvfs_table; + u32 dvfs_table_size; + + mali_scale_info_t scale_info; + u32 maxclk_sysfs; + u32 maxpp_sysfs; + + /* mali external reset reg info */ + u32 reset_flag; + mali_reset_info_t module_reset; + mali_reset_info_t apb_reset; + + /* set upper limit of pp or frequency, for THERMAL thermal or band width saving.*/ + u32 limit_on; + + /* for boost up gpu by user. */ + void (*plat_preheat)(void); + + struct platform_device *pdev; + void __iomem *reg_base_hiubus; + void __iomem *reg_base_aobus; + struct work_struct wq_work; + struct clk *clk_mali; + struct clk *clk_mali_0; + struct clk *clk_mali_1; + void __iomem *reg_base_reset; + u32 clk_cntl_reg; + s32 reset_g12a; +} mali_plat_info_t; +mali_plat_info_t* get_mali_plat_data(void); + +/** + * Initialize core scaling policy. + * + * @note The core scaling policy will assume that all PP cores are on initially. + * + * @param num_pp_cores Total number of PP cores. + */ +int mali_core_scaling_init(mali_plat_info_t*); + +/** + * Terminate core scaling policy. + */ +void mali_core_scaling_term(void); + +/** + * cancel and flush scaling job queue. + */ +void flush_scaling_job(void); + +/* get current state(pp, clk). */ +void get_mali_rt_clkpp(u32* clk, u32* pp); +u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush); +void revise_mali_rt(void); +/* get max gpu clk level of this chip*/ +int get_gpu_max_clk_level(void); + +/* get or set the scale mode. */ +u32 get_mali_schel_mode(void); +void set_mali_schel_mode(u32 mode); + +/* for frequency reporter in DS-5 streamline. */ +u32 get_current_frequency(void); +void mali_dev_freeze(void); +void mali_dev_restore(void); + +#endif /* __ARM_CORE_SCALING_H__ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c new file mode 100644 index 0000000..09feaaf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.c @@ -0,0 +1,126 @@ +/* + * meson_main2.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/** + * @file mali_platform.c + * Platform specific Mali driver functions for: + * meson8m2 and the newer chip + */ +#include +#include +#include +#include +#include +#ifdef CONFIG_PM_RUNTIME +#include +#endif +#include +#include +#include + +#include +#include +#include +#include + +#include "mali_scaling.h" +#include "mali_clock.h" +#include "meson_main2.h" + +extern void mali_post_init(void); +struct kbase_device; +//static int gpu_dvfs_probe(struct platform_device *pdev) +int platform_dt_init_func(struct kbase_device *kbdev) +{ + struct device *dev = kbdev->dev; + struct platform_device *pdev = to_platform_device(dev); + + int err = -1; + + err = mali_meson_init_start(pdev); + mali_meson_init_finish(pdev); + mpgpu_class_init(); + mali_post_init(); + return err; +} + +//static int gpu_dvfs_remove(struct platform_device *pdev) +void platform_dt_term_func(struct kbase_device *kbdev) +{ + struct device *dev = kbdev->dev; + struct platform_device *pdev = to_platform_device(dev); + + printk("%s, %d\n", __FILE__, __LINE__); + + mpgpu_class_exit(); + mali_meson_uninit(pdev); + +} + +static u32 last_utilisation, last_util_gl_share, last_util_cl_share[2]; +inline int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, + u32 util_gl_share, u32 util_cl_share[2]) +{ + last_utilisation = utilisation; + last_util_gl_share = util_gl_share; + last_util_cl_share[0] = util_cl_share[0]; + last_util_cl_share[1] = util_cl_share[1]; + mali_gpu_utilization_callback(utilisation*255/100); + return 1; +} + +u32 mpgpu_get_utilization(void) +{ + return last_utilisation; +} +u32 mpgpu_get_util_gl_share(void) +{ + return last_util_gl_share; +} +u32 mpgpu_get_util_cl_share(u32 *util) +{ + util[0] = last_util_cl_share[0]; + util[1] = last_util_cl_share[1]; + return 0; +} + +struct kbase_platform_funcs_conf dt_funcs_conf = { + .platform_init_func = platform_dt_init_func, + .platform_term_func = platform_dt_term_func, +}; +#if 0 +static const struct of_device_id gpu_dvfs_ids[] = { + { .compatible = "meson, gpu-dvfs-1.00.a" }, + { }, +}; +MODULE_DEVICE_TABLE(of, gpu_dvfs_ids); + +static struct platform_driver gpu_dvfs_driver = { + .driver = { + .name = "meson-gpu-dvfs", + .owner = THIS_MODULE, + .of_match_table = gpu_dvfs_ids, + }, + .probe = gpu_dvfs_probe, + .remove = gpu_dvfs_remove, +}; +module_platform_driver(gpu_dvfs_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Amlogic SH, MM"); +MODULE_DESCRIPTION("Driver for the Meson GPU dvfs"); +#endif diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h new file mode 100644 index 0000000..a7b4769 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/meson_main2.h @@ -0,0 +1,48 @@ +/* + * meson_main2.h + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef MESON_MAIN_H_ +#define MESON_MAIN_H_ +#include +#include +#include +#ifdef CONFIG_PM_RUNTIME +#include +#endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29)) +#include +#endif + +#include "mali_scaling.h" +#include "mali_clock.h" + +u32 set_max_mali_freq(u32 idx); +u32 get_max_mali_freq(void); + +int mali_meson_init_start(struct platform_device* ptr_plt_dev); +int mali_meson_init_finish(struct platform_device* ptr_plt_dev); +int mali_meson_uninit(struct platform_device* ptr_plt_dev); +int mpgpu_class_init(void); +void mpgpu_class_exit(void); +void mali_gpu_utilization_callback(int utilization_pp); + +u32 mpgpu_get_utilization(void); +u32 mpgpu_get_util_gl_share(void); +u32 mpgpu_get_util_cl_share(u32 *util); +u32 mpgpu_get_gpu_err_count(void); + +#endif /* MESON_MAIN_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c new file mode 100644 index 0000000..ee7c1da --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/mpgpu.c @@ -0,0 +1,326 @@ +/* + * mpgpu.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/* Standard Linux headers */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29)) +#include +#include +#include +#endif + +#include +#include + +#include "meson_main2.h" + +int meson_gpu_data_invalid_count = 0; +int meson_gpu_fault = 0; + +extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); +static ssize_t domain_stat_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + unsigned int val; + u64 core_ready; + mali_plat_info_t* pmali_plat = get_mali_plat_data(); + struct platform_device* ptr_plt_dev = pmali_plat->pdev; + struct kbase_device *kbdev = dev_get_drvdata(&ptr_plt_dev->dev); + + core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + val = core_ready; + return sprintf(buf, "%x\n", val); +} + +#define PREHEAT_CMD "preheat" +#define PLL2_CMD "mpl2" /* mpl2 [11] or [0xxxxxxx] */ +#define SCMPP_CMD "scmpp" /* scmpp [number of pp your want in most of time]. */ +#define BSTGPU_CMD "bstgpu" /* bstgpu [0-256] */ +#define BSTPP_CMD "bstpp" /* bstpp [0-256] */ +#define LIMIT_CMD "lmt" /* lmt [0 or 1] */ +#define MAX_TOKEN 20 +#define FULL_UTILIZATION 256 + +static ssize_t mpgpu_write(struct class *class, + struct class_attribute *attr, const char *buf, size_t count) +{ + char *pstart, *cprt = NULL; + u32 val = 0; + mali_plat_info_t* pmali_plat = get_mali_plat_data(); + + cprt = skip_spaces(buf); + pstart = strsep(&cprt," "); + if (strlen(pstart) < 1) + goto quit; + + if (!strncmp(pstart, PREHEAT_CMD, MAX_TOKEN)) { + if (pmali_plat->plat_preheat) { + pmali_plat->plat_preheat(); + } + } else if (!strncmp(pstart, PLL2_CMD, MAX_TOKEN)) { + int base = 10; + if ((strlen(cprt) > 2) && (cprt[0] == '0') && + (cprt[1] == 'x' || cprt[1] == 'X')) + base = 16; + if (kstrtouint(cprt, base, &val) <0) + goto quit; + if (val < 11) + pmali_plat->cfg_clock = pmali_plat->cfg_clock_bkup; + else + pmali_plat->cfg_clock = pmali_plat->turbo_clock; + pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; + set_str_src(val); + } else if (!strncmp(pstart, SCMPP_CMD, MAX_TOKEN)) { + if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) + goto quit; + if ((val > 0) && (val < pmali_plat->cfg_pp)) { + pmali_plat->sc_mpp = val; + } + } else if (!strncmp(pstart, BSTGPU_CMD, MAX_TOKEN)) { + if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) + goto quit; + if ((val > 0) && (val < FULL_UTILIZATION)) { + pmali_plat->bst_gpu = val; + } + } else if (!strncmp(pstart, BSTPP_CMD, MAX_TOKEN)) { + if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) + goto quit; + if ((val > 0) && (val < FULL_UTILIZATION)) { + pmali_plat->bst_pp = val; + } + } else if (!strncmp(pstart, LIMIT_CMD, MAX_TOKEN)) { + if ((kstrtouint(cprt, 10, &val) <0) || pmali_plat == NULL) + goto quit; + + if (val < 2) { + pmali_plat->limit_on = val; + if (val == 0) { + pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; + pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp; + revise_mali_rt(); + } + } + } +quit: + return count; +} + +static ssize_t scale_mode_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", get_mali_schel_mode()); +} + +static ssize_t scale_mode_write(struct class *class, + struct class_attribute *attr, const char *buf, size_t count) +{ + int ret; + unsigned int val; + + ret = kstrtouint(buf, 10, &val); + if (0 != ret) + { + return -EINVAL; + } + + set_mali_schel_mode(val); + + return count; +} + +static ssize_t max_freq_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + mali_plat_info_t* pmali_plat = get_mali_plat_data(); + printk("maxclk:%d, maxclk_sys:%d, max gpu level=%d\n", + pmali_plat->scale_info.maxclk, pmali_plat->maxclk_sysfs, get_gpu_max_clk_level()); + return sprintf(buf, "%d\n", get_gpu_max_clk_level()); +} + +static ssize_t max_freq_write(struct class *class, + struct class_attribute *attr, const char *buf, size_t count) +{ + int ret; + unsigned int val; + mali_plat_info_t* pmali_plat; + mali_scale_info_t* pinfo; + + pmali_plat = get_mali_plat_data(); + pinfo = &pmali_plat->scale_info; + + ret = kstrtouint(buf, 10, &val); + if ((0 != ret) || (val > pmali_plat->cfg_clock) || (val < pinfo->minclk)) + return -EINVAL; + + pmali_plat->maxclk_sysfs = val; + pinfo->maxclk = val; + revise_mali_rt(); + + return count; +} + +static ssize_t min_freq_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + mali_plat_info_t* pmali_plat = get_mali_plat_data(); + return sprintf(buf, "%d\n", pmali_plat->scale_info.minclk); +} + +static ssize_t min_freq_write(struct class *class, + struct class_attribute *attr, const char *buf, size_t count) +{ + int ret; + unsigned int val; + mali_plat_info_t* pmali_plat; + mali_scale_info_t* pinfo; + + pmali_plat = get_mali_plat_data(); + pinfo = &pmali_plat->scale_info; + + ret = kstrtouint(buf, 10, &val); + if ((0 != ret) || (val > pinfo->maxclk)) + return -EINVAL; + + pinfo->minclk = val; + revise_mali_rt(); + + return count; +} + +static ssize_t freq_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", get_current_frequency()); +} + +static ssize_t freq_write(struct class *class, + struct class_attribute *attr, const char *buf, size_t count) +{ + int ret; + unsigned int val; + u32 clk, pp; + get_mali_rt_clkpp(&clk, &pp); + + ret = kstrtouint(buf, 10, &val); + if (0 != ret) + return -EINVAL; + + set_mali_rt_clkpp(val, pp, 1); + + return count; +} + +static ssize_t utilization_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", mpgpu_get_utilization()); +} + +static ssize_t util_gl_share_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", mpgpu_get_util_gl_share()); +} + +static ssize_t util_cl_share_read(struct class *class, + struct class_attribute *attr, char *buf) +{ + u32 val[2]; + + mpgpu_get_util_cl_share(val); + + return sprintf(buf, "%d %d\n", val[0], val[1]); +} + +u32 mpgpu_get_gpu_err_count(void) +{ + return (meson_gpu_fault + meson_gpu_data_invalid_count); +} + +static ssize_t meson_gpu_get_err_count(struct class *class, + struct class_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", mpgpu_get_gpu_err_count()); +} + +static ssize_t mpgpu_set_err_count(struct class *class, + struct class_attribute *attr, const char *buf, size_t count) +{ + int ret; + unsigned int val; + + ret = kstrtouint(buf, 10, &val); + if (0 != ret) + return -EINVAL; + + meson_gpu_fault = val; + + return count; +} + +static struct class_attribute mali_class_attrs[] = { + __ATTR(domain_stat, 0644, domain_stat_read, NULL), + __ATTR(mpgpucmd, 0644, NULL, mpgpu_write), + __ATTR(scale_mode, 0644, scale_mode_read, scale_mode_write), + __ATTR(min_freq, 0644, min_freq_read, min_freq_write), + __ATTR(max_freq, 0644, max_freq_read, max_freq_write), + __ATTR(cur_freq, 0644, freq_read, freq_write), + __ATTR(utilization, 0644, utilization_read, NULL), + __ATTR(util_gl, 0644, util_gl_share_read, NULL), + __ATTR(util_cl, 0644, util_cl_share_read, NULL), + __ATTR(gpu_err, 0644, meson_gpu_get_err_count, mpgpu_set_err_count), +}; + +static struct class mpgpu_class = { + .name = "mpgpu", +}; + +int mpgpu_class_init(void) +{ + int ret = 0; + int i; + int attr_num = ARRAY_SIZE(mali_class_attrs); + + ret = class_register(&mpgpu_class); + if (ret) { + printk(KERN_ERR "%s: class_register failed\n", __func__); + return ret; + } + for (i = 0; i< attr_num; i++) { + ret = class_create_file(&mpgpu_class, &mali_class_attrs[i]); + if (ret) { + printk(KERN_ERR "%d ST: class item failed to register\n", i); + } + } + return ret; +} + +void mpgpu_class_exit(void) +{ + class_unregister(&mpgpu_class); +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c new file mode 100644 index 0000000..c8ff2f5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/platform_gx.c @@ -0,0 +1,262 @@ +/* + * platform_gx.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include /* kernel module definitions */ +#include /* request_mem_region */ +#include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 29)) +#include +#include +#include +#endif +#include +#ifdef CONFIG_AMLOGIC_GPU_THERMAL +#include +#include +//#include +#include +#endif + +#include +#include + +#include "mali_scaling.h" +#include "mali_clock.h" +#include "meson_main2.h" + +/* + * For Meson 8 M2. + * + */ +static void mali_plat_preheat(void); +static mali_plat_info_t mali_plat_data = { + .bst_gpu = 210, /* threshold for boosting gpu. */ + .bst_pp = 160, /* threshold for boosting PP. */ + .have_switch = 1, + .limit_on = 1, + .plat_preheat = mali_plat_preheat, +}; + +static void mali_plat_preheat(void) +{ +#ifndef CONFIG_MALI_DVFS + u32 pre_fs; + u32 clk, pp; + + if (get_mali_schel_mode() != MALI_PP_FS_SCALING) + return; + + get_mali_rt_clkpp(&clk, &pp); + pre_fs = mali_plat_data.def_clock + 1; + if (clk < pre_fs) + clk = pre_fs; + if (pp < mali_plat_data.sc_mpp) + pp = mali_plat_data.sc_mpp; + set_mali_rt_clkpp(clk, pp, 1); +#endif +} + +mali_plat_info_t* get_mali_plat_data(void) { + return &mali_plat_data; +} + +int get_mali_freq_level(int freq) +{ + int i = 0, level = -1; + int mali_freq_num; + + if (freq < 0) + return level; + + mali_freq_num = mali_plat_data.dvfs_table_size - 1; + if (freq <= mali_plat_data.clk_sample[0]) + level = mali_freq_num-1; + else if (freq >= mali_plat_data.clk_sample[mali_freq_num - 1]) + level = 0; + else { + for (i=0; i= mali_plat_data.clk_sample[i] && freq <= mali_plat_data.clk_sample[i + 1]) { + level = i; + level = mali_freq_num-level - 1; + } + } + } + return level; +} + +unsigned int get_mali_max_level(void) +{ + return mali_plat_data.dvfs_table_size - 1; +} + +int get_gpu_max_clk_level(void) +{ + return mali_plat_data.cfg_clock; +} + +#ifdef CONFIG_AMLOGIC_GPU_THERMAL +static void set_limit_mali_freq(u32 idx) +{ + if (mali_plat_data.limit_on == 0) + return; + if (idx > mali_plat_data.turbo_clock || idx < mali_plat_data.scale_info.minclk) + return; + if (idx > mali_plat_data.maxclk_sysfs) { + printk("idx > max freq\n"); + return; + } + mali_plat_data.scale_info.maxclk= idx; + revise_mali_rt(); +} + +static u32 get_limit_mali_freq(void) +{ + return mali_plat_data.scale_info.maxclk; +} + +#ifdef CONFIG_DEVFREQ_THERMAL +static u32 get_mali_utilization(void) +{ + u32 util= mpgpu_get_utilization(); + return (util * 100) / 256; +} +#endif +#endif + +#ifdef CONFIG_AMLOGIC_GPU_THERMAL +static u32 set_limit_pp_num(u32 num) +{ + u32 ret = -1; + if (mali_plat_data.limit_on == 0) + goto quit; + if (num > mali_plat_data.cfg_pp || + num < mali_plat_data.scale_info.minpp) + goto quit; + + if (num > mali_plat_data.maxpp_sysfs) { + printk("pp > sysfs set pp\n"); + goto quit; + } + + mali_plat_data.scale_info.maxpp = num; + revise_mali_rt(); + ret = 0; +quit: + return ret; +} +#ifdef CONFIG_DEVFREQ_THERMAL +extern u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, enum kbase_pm_core_type type); +static u32 mali_get_online_pp(void) +{ + u64 core_ready; + u64 l2_ready; + u64 tiler_ready; + mali_plat_info_t* pmali_plat = get_mali_plat_data(); + struct platform_device* ptr_plt_dev = pmali_plat->pdev; + struct kbase_device *kbdev = dev_get_drvdata(&ptr_plt_dev->dev); + + core_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); + tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); + + if (!core_ready && !l2_ready && !tiler_ready) { + return 0; + } + + return 2; +} +#endif +#endif + +int mali_meson_init_start(struct platform_device* ptr_plt_dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(&ptr_plt_dev->dev); + + mali_dt_info(ptr_plt_dev, &mali_plat_data); + mali_clock_init_clk_tree(ptr_plt_dev); + + kbdev->platform_context = &mali_plat_data; + return 0; +} + +int mali_meson_init_finish(struct platform_device* ptr_plt_dev) +{ + if (mali_core_scaling_init(&mali_plat_data) < 0) + return -1; + return 0; +} + +int mali_meson_uninit(struct platform_device* ptr_plt_dev) +{ + mali_core_scaling_term(); + return 0; +} + +void mali_post_init(void) +{ +#ifdef CONFIG_AMLOGIC_GPU_THERMAL + int err; + struct gpufreq_cooling_device *gcdev = NULL; + struct gpucore_cooling_device *gccdev = NULL; + + gcdev = gpufreq_cooling_alloc(); + register_gpu_freq_info(get_current_frequency); + if (IS_ERR(gcdev)) + printk("malloc gpu cooling buffer error!!\n"); + else if (!gcdev) + printk("system does not enable thermal driver\n"); + else { + gcdev->get_gpu_freq_level = get_mali_freq_level; + gcdev->get_gpu_max_level = get_mali_max_level; + gcdev->set_gpu_freq_idx = set_limit_mali_freq; + gcdev->get_gpu_current_max_level = get_limit_mali_freq; +#ifdef CONFIG_DEVFREQ_THERMAL + gcdev->get_gpu_freq = get_mali_freq; + gcdev->get_gpu_loading = get_mali_utilization; + gcdev->get_online_pp = mali_get_online_pp; +#endif + err = gpufreq_cooling_register(gcdev); +#ifdef CONFIG_DEVFREQ_THERMAL + meson_gcooldev_min_update(gcdev->cool_dev); +#endif + if (err < 0) + printk("register GPU cooling error\n"); + printk("gpu cooling register okay with err=%d\n",err); + } + + gccdev = gpucore_cooling_alloc(); + if (IS_ERR(gccdev)) + printk("malloc gpu core cooling buffer error!!\n"); + else if (!gccdev) + printk("system does not enable thermal driver\n"); + else { + gccdev->max_gpu_core_num=mali_plat_data.cfg_pp; + gccdev->set_max_pp_num=set_limit_pp_num; + err = (int)gpucore_cooling_register(gccdev); +#ifdef CONFIG_DEVFREQ_THERMAL + meson_gcooldev_min_update(gccdev->cool_dev); +#endif + if (err < 0) + printk("register GPU cooling error\n"); + printk("gpu core cooling register okay with err=%d\n",err); + } +#endif +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c new file mode 100644 index 0000000..2caff3c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/devicetree/scaling.c @@ -0,0 +1,606 @@ +/* + * scaling.c + * + * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/** + * @file arm_core_scaling.c + * Example core scaling policy. + */ + +#include +#include +#include + +#if AMLOGIC_GPU_USE_GPPLL +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 16) +#include +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) +#include +#endif +#endif + +#define LOG_MALI_SCALING 1 +#include "meson_main2.h" +#include "mali_clock.h" + +static int currentStep; +#ifndef CONFIG_MALI_DVFS +static int num_cores_enabled; +static int lastStep; +static struct work_struct wq_work; +static mali_plat_info_t* pmali_plat = NULL; +#endif +static int scaling_mode = MALI_PP_FS_SCALING; +extern int mali_pm_statue; +//static int scaling_mode = MALI_SCALING_DISABLE; +//static int scaling_mode = MALI_PP_SCALING; + +#if AMLOGIC_GPU_USE_GPPLL +static struct gp_pll_user_handle_s *gp_pll_user_gpu; +static int is_gp_pll_get; +static int is_gp_pll_put; +#endif +static unsigned scaling_dbg_level = 0; +module_param(scaling_dbg_level, uint, 0644); +MODULE_PARM_DESC(scaling_dbg_level , "scaling debug level"); + +#define scalingdbg(level, fmt, arg...) \ + do { \ + if (scaling_dbg_level >= (level)) \ + printk(fmt , ## arg); \ + } while (0) + +#ifndef CONFIG_MALI_DVFS +static int mali_stay_count = 0; +static inline void mali_clk_exected(void) +{ + mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table; + uint32_t execStep = currentStep; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) + mali_dvfs_threshold_table *dvfs_tbl = &pmali_plat->dvfs_table[currentStep]; +#endif + + //if (pdvfs[currentStep].freq_index == pdvfs[lastStep].freq_index) return; + if ((pdvfs[execStep].freq_index == pdvfs[lastStep].freq_index) || + (pdvfs[execStep].clk_freq == pdvfs[lastStep].clk_freq)){ + return; + } + +#if AMLOGIC_GPU_USE_GPPLL + if (0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) { + gp_pll_request(gp_pll_user_gpu); + if (!is_gp_pll_get) { + //printk("not get pll\n"); + execStep = currentStep - 1; + } + } else { + //not get the gp pll, do need put + is_gp_pll_get = 0; + is_gp_pll_put = 0; + gp_pll_release(gp_pll_user_gpu); + } +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) + if ((0 == strcmp(dvfs_tbl->clk_parent, "gp0_pll")) && + !IS_ERR(dvfs_tbl->clkp_handle) && + (0 != dvfs_tbl->clkp_freq)) { + clk_prepare_enable(dvfs_tbl->clkp_handle); + clk_set_rate(dvfs_tbl->clkp_handle, dvfs_tbl->clkp_freq); + } + +#endif + //mali_dev_pause(); + mali_clock_set(pdvfs[execStep].freq_index); + //mali_dev_resume(); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 4, 0)) +#if AMLOGIC_GPU_USE_GPPLL==0 + if ((0 == strcmp(pdvfs[lastStep].clk_parent,"gp0_pll")) && + (0 != strcmp(pdvfs[execStep].clk_parent, "gp0_pll"))) { + clk_disable_unprepare(pdvfs[lastStep].clkp_handle); + } +#endif +#endif + + lastStep = execStep; +#if AMLOGIC_GPU_USE_GPPLL + if (is_gp_pll_put) { + //printk("release gp0 pll\n"); + gp_pll_release(gp_pll_user_gpu); + gp_pll_request(gp_pll_user_gpu); + is_gp_pll_get = 0; + is_gp_pll_put = 0; + } +#endif + +} +#if AMLOGIC_GPU_USE_GPPLL +static int gp_pll_user_cb_gpu(struct gp_pll_user_handle_s *user, + int event) +{ + if (event == GP_PLL_USER_EVENT_GRANT) { + //printk("granted\n"); + is_gp_pll_get = 1; + is_gp_pll_put = 0; + schedule_work(&wq_work); + } else if (event == GP_PLL_USER_EVENT_YIELD) { + //printk("ask for yield\n"); + is_gp_pll_get = 0; + is_gp_pll_put = 1; + schedule_work(&wq_work); + } + + return 0; +} +#endif + +int mali_perf_set_num_pp_cores(int cores) +{ + cores = cores; + return 0; +} + +static void do_scaling(struct work_struct *work) +{ + mali_dvfs_threshold_table * pdvfs = pmali_plat->dvfs_table; + int err = mali_perf_set_num_pp_cores(num_cores_enabled); + if (err < 0) scalingdbg(1, "set pp failed"); + + scalingdbg(1, "set pp cores to %d\n", num_cores_enabled); + scalingdbg(1, "pdvfs[%d].freq_index=%d, pdvfs[%d].freq_index=%d\n", + currentStep, pdvfs[currentStep].freq_index, + lastStep, pdvfs[lastStep].freq_index); + mali_clk_exected(); +#ifdef CONFIG_MALI400_PROFILING + _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | + MALI_PROFILING_EVENT_CHANNEL_GPU | + MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, + get_current_frequency(), + 0, 0, 0, 0); +#endif +} +#endif + +u32 revise_set_clk(u32 val, u32 flush) +{ + u32 ret = 0; +#ifndef CONFIG_MALI_DVFS + mali_scale_info_t* pinfo; + + pinfo = &pmali_plat->scale_info; + + if (val < pinfo->minclk) + val = pinfo->minclk; + else if (val > pinfo->maxclk) + val = pinfo->maxclk; + + if (val != currentStep) { + currentStep = val; + if (flush) + schedule_work(&wq_work); + else + ret = 1; + } +#endif + return ret; +} + +void get_mali_rt_clkpp(u32* clk, u32* pp) +{ +#ifndef CONFIG_MALI_DVFS + *clk = currentStep; + *pp = num_cores_enabled; +#endif +} + +u32 set_mali_rt_clkpp(u32 clk, u32 pp, u32 flush) +{ + u32 ret = 0; +#ifndef CONFIG_MALI_DVFS + mali_scale_info_t* pinfo; + u32 flush_work = 0; + + pinfo = &pmali_plat->scale_info; + if (clk < pinfo->minclk) + clk = pinfo->minclk; + else if (clk > pinfo->maxclk) + clk = pinfo->maxclk; + + if (clk != currentStep) { + currentStep = clk; + if (flush) + flush_work++; + else + ret = 1; + } + + if (flush) + mali_stay_count = pmali_plat->dvfs_table[currentStep].keep_count; + + if (pp < pinfo->minpp) + pp = pinfo->minpp; + else if (pp > pinfo->maxpp) + pp = pinfo->maxpp; + + if (pp != num_cores_enabled) { + num_cores_enabled = pp; + if (flush) + flush_work++; + else + ret = 1; + } + + if (flush_work) + schedule_work(&wq_work); +#endif + return ret; +} + +void revise_mali_rt(void) +{ +#ifndef CONFIG_MALI_DVFS + set_mali_rt_clkpp(currentStep, num_cores_enabled, 1); +#endif +} + +void flush_scaling_job(void) +{ +#ifndef CONFIG_MALI_DVFS + cancel_work_sync(&wq_work); +#endif +} + +#ifndef CONFIG_MALI_DVFS +static u32 enable_one_core(void) +{ + scalingdbg(2, "meson: one more pp, curent has %d pp cores\n", num_cores_enabled + 1); + return set_mali_rt_clkpp(currentStep, num_cores_enabled + 1, 0); +} + +static u32 disable_one_core(void) +{ + scalingdbg(2, "meson: disable one pp, current has %d pp cores\n", num_cores_enabled - 1); + return set_mali_rt_clkpp(currentStep, num_cores_enabled - 1, 0); +} + +static u32 enable_max_num_cores(void) +{ + return set_mali_rt_clkpp(currentStep, pmali_plat->scale_info.maxpp, 0); +} + +static u32 enable_pp_cores(u32 val) +{ + scalingdbg(2, "meson: enable %d pp cores\n", val); + return set_mali_rt_clkpp(currentStep, val, 0); +} +#endif + +int mali_core_scaling_init(mali_plat_info_t *mali_plat) +{ +#ifndef CONFIG_MALI_DVFS + if (mali_plat == NULL) { + scalingdbg(2, " Mali platform data is NULL!!!\n"); + return -1; + } + + pmali_plat = mali_plat; + num_cores_enabled = pmali_plat->sc_mpp; +#if AMLOGIC_GPU_USE_GPPLL + gp_pll_user_gpu = gp_pll_user_register("gpu", 1, + gp_pll_user_cb_gpu); + //not get the gp pll, do need put + is_gp_pll_get = 0; + is_gp_pll_put = 0; + if (gp_pll_user_gpu == NULL) printk("register gp pll user for gpu failed\n"); +#endif + + currentStep = pmali_plat->def_clock; + lastStep = currentStep; + INIT_WORK(&wq_work, do_scaling); +#endif + return 0; + /* NOTE: Mali is not fully initialized at this point. */ +} + +void mali_core_scaling_term(void) +{ +#ifndef CONFIG_MALI_DVFS + flush_scheduled_work(); +#if AMLOGIC_GPU_USE_GPPLL + gp_pll_user_unregister(gp_pll_user_gpu); +#endif +#endif +} + +#ifndef CONFIG_MALI_DVFS +static u32 mali_threshold [] = { + 40, /* 40% */ + 50, /* 50% */ + 90, /* 90% */ +}; +#endif + +void mali_pp_scaling_update(int utilization_pp) +{ +#ifndef CONFIG_MALI_DVFS + int ret = 0; + + if (mali_threshold[2] < utilization_pp) + ret = enable_max_num_cores(); + else if (mali_threshold[1]< utilization_pp) + ret = enable_one_core(); + else if (0 < utilization_pp) + ret = disable_one_core(); + if (ret == 1) + schedule_work(&wq_work); +#endif +} + +#if LOG_MALI_SCALING +void trace_utilization(int utilization_gpu, u32 current_idx, u32 next, + u32 current_pp, u32 next_pp) +{ + char direction; + if (next > current_idx) + direction = '>'; + else if ((current_idx > pmali_plat->scale_info.minpp) && (next < current_idx)) + direction = '<'; + else + direction = '~'; + + scalingdbg(2, "[SCALING]%c (%3d-->%3d)@%3d{%3d - %3d}. pp:(%d-->%d)\n", + direction, + get_mali_freq(current_idx), + get_mali_freq(next), + utilization_gpu, + pmali_plat->dvfs_table[current_idx].downthreshold, + pmali_plat->dvfs_table[current_idx].upthreshold, + current_pp, next_pp); +} +#endif + +#ifndef CONFIG_MALI_DVFS +static void mali_decide_next_status(int utilization_pp, int* next_fs_idx, + int* pp_change_flag) +{ + u32 mali_up_limit, decided_fs_idx; + u32 ld_left, ld_right; + u32 ld_up, ld_down; + u32 change_mode; + + *pp_change_flag = 0; + change_mode = 0; + + scalingdbg(5, "line(%d), scaling_mode=%d, MALI_TURBO_MODE=%d, turbo=%d, maxclk=%d\n", + __LINE__, scaling_mode, MALI_TURBO_MODE, + pmali_plat->turbo_clock, pmali_plat->scale_info.maxclk); + + mali_up_limit = (scaling_mode == MALI_TURBO_MODE) ? + pmali_plat->turbo_clock : pmali_plat->scale_info.maxclk; + decided_fs_idx = currentStep; + + ld_up = pmali_plat->dvfs_table[currentStep].upthreshold; + ld_down = pmali_plat->dvfs_table[currentStep].downthreshold; + + scalingdbg(2, "utilization=%d, ld_up=%d\n ", utilization_pp, ld_up); + if (utilization_pp >= ld_up) { /* go up */ + + scalingdbg(2, "currentStep=%d, mali_up_limit=%d\n ", currentStep, mali_up_limit); + if (currentStep < mali_up_limit) { + change_mode = 1; + if ((currentStep < pmali_plat->def_clock) && (utilization_pp > pmali_plat->bst_gpu)) + decided_fs_idx = pmali_plat->def_clock; + else + decided_fs_idx++; + } + if ((utilization_pp >= ld_up) && + (num_cores_enabled < pmali_plat->scale_info.maxpp)) { + if ((num_cores_enabled < pmali_plat->sc_mpp) && (utilization_pp >= pmali_plat->bst_pp)) { + *pp_change_flag = 1; + change_mode = 1; + } else if (change_mode == 0) { + *pp_change_flag = 2; + change_mode = 1; + } + } +#if LOG_MALI_SCALING + scalingdbg(2, "[nexting..] [LD:%d]-> FS[CRNT:%d LMT:%d NEXT:%d] PP[NUM:%d LMT:%d MD:%d][F:%d]\n", + utilization_pp, currentStep, mali_up_limit, decided_fs_idx, + num_cores_enabled, pmali_plat->scale_info.maxpp, *pp_change_flag, change_mode); +#endif + } else if (utilization_pp <= ld_down) { /* go down */ + if (mali_stay_count > 0) { + *next_fs_idx = decided_fs_idx; + mali_stay_count--; + return; + } + + if (num_cores_enabled > pmali_plat->sc_mpp) { + change_mode = 1; + if (utilization_pp <= ld_down) { + ld_left = utilization_pp * num_cores_enabled; + ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) * + (num_cores_enabled - 1); + if (ld_left < ld_right) { + change_mode = 2; + } + } + } else if (currentStep > pmali_plat->scale_info.minclk) { + change_mode = 1; + } else if (num_cores_enabled > 1) { /* decrease PPS */ + if (utilization_pp <= ld_down) { + ld_left = utilization_pp * num_cores_enabled; + ld_right = (pmali_plat->dvfs_table[currentStep].upthreshold) * + (num_cores_enabled - 1); + scalingdbg(2, "ld_left=%d, ld_right=%d\n", ld_left, ld_right); + if (ld_left < ld_right) { + change_mode = 2; + } + } + } + + if (change_mode == 1) { + decided_fs_idx--; + } else if (change_mode == 2) { /* decrease PPS */ + *pp_change_flag = -1; + } + } else { + mali_stay_count = pmali_plat->dvfs_table[currentStep].keep_count; + scalingdbg(1, "reset to %d, decided_fs_idx=%d, mali_stay_count=%d\n", + currentStep, decided_fs_idx, mali_stay_count); + } + + if (decided_fs_idx < 0 ) { + printk("gpu debug, next index below 0\n"); + decided_fs_idx = 0; + } + if (decided_fs_idx > pmali_plat->scale_info.maxclk) { + decided_fs_idx = pmali_plat->scale_info.maxclk; + printk("gpu debug, next index above max-1, set to %d\n", decided_fs_idx); + } + + if (change_mode) + mali_stay_count = pmali_plat->dvfs_table[decided_fs_idx].keep_count; + + *next_fs_idx = decided_fs_idx; + scalingdbg(1, "mali_stay_count=%d\n", mali_stay_count); +} +#endif + +void mali_pp_fs_scaling_update(int utilization_pp) +{ +#ifndef CONFIG_MALI_DVFS + int ret = 0; + int pp_change_flag = 0; + u32 next_idx = 0; + +#if LOG_MALI_SCALING + u32 last_pp = num_cores_enabled; +#endif + mali_decide_next_status(utilization_pp, &next_idx, &pp_change_flag); + + if (pp_change_flag == 1) + ret = enable_pp_cores(pmali_plat->sc_mpp); + else if (pp_change_flag == 2) + ret = enable_one_core(); + else if (pp_change_flag == -1) { + ret = disable_one_core(); + } + +#if LOG_MALI_SCALING + if (pp_change_flag || (next_idx != currentStep)) + trace_utilization(utilization_pp, currentStep, next_idx, last_pp, num_cores_enabled); +#endif + + if (next_idx != currentStep) { + ret = 1; + currentStep = next_idx; + } + + if (ret == 1) + schedule_work(&wq_work); +#ifdef CONFIG_MALI400_PROFILING + else + _mali_osk_profiling_add_event(MALI_PROFILING_EVENT_TYPE_SINGLE | + MALI_PROFILING_EVENT_CHANNEL_GPU | + MALI_PROFILING_EVENT_REASON_SINGLE_GPU_FREQ_VOLT_CHANGE, + get_current_frequency(), + 0, 0, 0, 0); +#endif +#endif +} + +u32 get_mali_schel_mode(void) +{ + return scaling_mode; +} + +void set_mali_schel_mode(u32 mode) +{ +#ifndef CONFIG_MALI_DVFS + if (mode >= MALI_SCALING_MODE_MAX) + return; + scaling_mode = mode; + + //disable thermal in turbo mode + if (scaling_mode == MALI_TURBO_MODE) { + pmali_plat->limit_on = 0; + } else { + pmali_plat->limit_on = 1; + } + /* set default performance range. */ + pmali_plat->scale_info.minclk = pmali_plat->cfg_min_clock; + pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; + pmali_plat->scale_info.minpp = pmali_plat->cfg_min_pp; + pmali_plat->scale_info.maxpp = pmali_plat->cfg_pp; + + /* set current status and tune max freq */ + if (scaling_mode == MALI_PP_FS_SCALING) { + pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; + enable_pp_cores(pmali_plat->sc_mpp); + } else if (scaling_mode == MALI_SCALING_DISABLE) { + pmali_plat->scale_info.maxclk = pmali_plat->cfg_clock; + enable_max_num_cores(); + } else if (scaling_mode == MALI_TURBO_MODE) { + pmali_plat->scale_info.maxclk = pmali_plat->turbo_clock; + enable_max_num_cores(); + } + currentStep = pmali_plat->scale_info.maxclk; + schedule_work(&wq_work); +#endif +} + +u32 get_current_frequency(void) +{ + return get_mali_freq(currentStep); +} + +void mali_gpu_utilization_callback(int utilization_pp) +{ +#ifndef CONFIG_MALI_DVFS + if (mali_pm_statue) + return; + + switch (scaling_mode) { + case MALI_PP_FS_SCALING: + mali_pp_fs_scaling_update(utilization_pp); + break; + case MALI_PP_SCALING: + mali_pp_scaling_update(utilization_pp); + break; + default: + break; + } +#endif +} +static u32 clk_cntl_save = 0; +void mali_dev_freeze(void) +{ + clk_cntl_save = mplt_read(HHI_MALI_CLK_CNTL); +} + +void mali_dev_restore(void) +{ + u32 reg = 0; + if (!pmali_plat || !pmali_plat->pdev) { + printk("error: init clock failed, pmali_plat=%p, pmali_plat->pdev=%p\n", + pmali_plat, pmali_plat == NULL ? NULL: pmali_plat->pdev); + return ; + } + + reg = pmali_plat->clk_cntl_reg; + + mplt_write(reg, clk_cntl_save); + mali_clock_init_clk_tree(pmali_plat->pdev); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild new file mode 100644 index 0000000..6780e4c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h new file mode 100644 index 0000000..fac3cd5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c new file mode 100644 index 0000000..d165ce2 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include +#include +#include "mali_kbase_config_platform.h" + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0xFC010000, + .end = 0xFC010000 + (4096 * 4) - 1 + } +}; +#endif /* CONFIG_OF */ + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild new file mode 100644 index 0000000..51b408e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h new file mode 100644 index 0000000..fac3cd5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c new file mode 100644 index 0000000..efca0a5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c @@ -0,0 +1,65 @@ +/* + * + * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include +#include + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0x2f010000, + .end = 0x2f010000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild new file mode 100644 index 0000000..e07709c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild @@ -0,0 +1,25 @@ +# +# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +mali_kbase-y += \ + $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + $(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h new file mode 100644 index 0000000..fac3cd5 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c new file mode 100644 index 0000000..b6714b9 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + + +#include +#include +#include +#include + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 75, + .mmu_irq_number = 76, + .gpu_irq_number = 77, + .io_memory_region = { + .start = 0x2F000000, + .end = 0x2F000000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h new file mode 100644 index 0000000..8778d81 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _PROTECTED_MODE_SWITCH_H_ +#define _PROTECTED_MODE_SWITCH_H_ + +struct protected_mode_device; + +/** + * struct protected_mode_ops - Callbacks for protected mode switch operations + * + * @protected_mode_enable: Callback to enable protected mode for device + * @protected_mode_disable: Callback to disable protected mode for device + */ +struct protected_mode_ops { + /** + * protected_mode_enable() - Enable protected mode on device + * @dev: The struct device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_enable)( + struct protected_mode_device *protected_dev); + + /** + * protected_mode_disable() - Disable protected mode on device, and + * reset device + * @dev: The struct device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_disable)( + struct protected_mode_device *protected_dev); +}; + +/** + * struct protected_mode_device - Device structure for protected mode devices + * + * @ops - Callbacks associated with this device + * @data - Pointer to device private data + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). + */ +struct protected_mode_device { + struct protected_mode_ops ops; + void *data; +}; + +#endif /* _PROTECTED_MODE_SWITCH_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild new file mode 100644 index 0000000..df16a77 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kbuild @@ -0,0 +1,23 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +obj-$(CONFIG_MALI_KUTF) += kutf/ +obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig new file mode 100644 index 0000000..fa91aea --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Kconfig @@ -0,0 +1,23 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" +source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig new file mode 100644 index 0000000..be3fedb --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/Mconfig @@ -0,0 +1,38 @@ +# +# (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# + +config UNIT_TEST_KERNEL_MODULES + bool + default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES + default n + +config BUILD_IPA_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ + default n + +config BUILD_IPA_UNIT_TESTS + bool + default y if NO_MALI && BUILD_IPA_TESTS + default n + +config BUILD_CSF_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF + default n + +config BUILD_ARBIF_TESTS + bool + default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT + default n + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h new file mode 100644 index 0000000..15e168c --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h @@ -0,0 +1,77 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_HELPERS_H_ +#define _KERNEL_UTF_HELPERS_H_ + +/* kutf_helpers.h + * Test helper functions for the kernel UTF test infrastructure. + * + * These functions provide methods for enqueuing/dequeuing lines of text sent + * by user space. They are used to implement the transfer of "userdata" from + * user space to kernel. + */ + +#include + +/** + * kutf_helper_input_dequeue() - Dequeue a line sent by user space + * @context: KUTF context + * @str_size: Pointer to an integer to receive the size of the string + * + * If no line is available then this function will wait (interruptibly) until + * a line is available. + * + * Return: The line dequeued, ERR_PTR(-EINTR) if interrupted or NULL on end + * of data. + */ +char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size); + +/** + * kutf_helper_input_enqueue() - Enqueue a line sent by user space + * @context: KUTF context + * @str: The user space address of the line + * @size: The length in bytes of the string + * + * This function will use copy_from_user to copy the string out of user space. + * The string need not be NULL-terminated (@size should not include the NULL + * termination). + * + * As a special case @str==NULL and @size==0 is valid to mark the end of input, + * but callers should use kutf_helper_input_enqueue_end_of_data() instead. + * + * Return: 0 on success, -EFAULT if the line cannot be copied from user space, + * -ENOMEM if out of memory. + */ +int kutf_helper_input_enqueue(struct kutf_context *context, + const char __user *str, size_t size); + +/** + * kutf_helper_input_enqueue_end_of_data() - Signal no more data is to be sent + * @context: KUTF context + * + * After this function has been called, kutf_helper_input_dequeue() will always + * return NULL. + */ +void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context); + +#endif /* _KERNEL_UTF_HELPERS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h new file mode 100644 index 0000000..3b1300e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h @@ -0,0 +1,179 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_HELPERS_USER_H_ +#define _KERNEL_UTF_HELPERS_USER_H_ + +/* kutf_helpers.h + * Test helper functions for the kernel UTF test infrastructure, whose + * implementation mirrors that of similar functions for kutf-userside + */ + +#include +#include + + +#define KUTF_HELPER_MAX_VAL_NAME_LEN 255 + +enum kutf_helper_valtype { + KUTF_HELPER_VALTYPE_INVALID, + KUTF_HELPER_VALTYPE_U64, + KUTF_HELPER_VALTYPE_STR, + + KUTF_HELPER_VALTYPE_COUNT /* Must be last */ +}; + +struct kutf_helper_named_val { + enum kutf_helper_valtype type; + char *val_name; + union { + u64 val_u64; + char *val_str; + } u; +}; + +/* Extra error values for certain helpers when we want to distinguish between + * Linux's own error values too. + * + * These can only be used on certain functions returning an int type that are + * documented as returning one of these potential values, they cannot be used + * from functions return a ptr type, since we can't decode it with PTR_ERR + * + * No negative values are used - Linux error codes should be used instead, and + * indicate a problem in accessing the data file itself (are generally + * unrecoverable) + * + * Positive values indicate correct access but invalid parsing (can be + * recovered from assuming data in the future is correct) */ +enum kutf_helper_err { + /* No error - must be zero */ + KUTF_HELPER_ERR_NONE = 0, + /* Named value parsing encountered an invalid name */ + KUTF_HELPER_ERR_INVALID_NAME, + /* Named value parsing of string or u64 type encountered extra + * characters after the value (after the last digit for a u64 type or + * after the string end delimiter for string type) */ + KUTF_HELPER_ERR_CHARS_AFTER_VAL, + /* Named value parsing of string type couldn't find the string end + * delimiter. + * + * This cannot be encountered when the NAME="value" message exceeds the + * textbuf's maximum line length, because such messages are not checked + * for an end string delimiter */ + KUTF_HELPER_ERR_NO_END_DELIMITER, + /* Named value didn't parse as any of the known types */ + KUTF_HELPER_ERR_INVALID_VALUE, +}; + + +/* Send named NAME=value pair, u64 value + * + * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long + * + * Any failure will be logged on the suite's current test fixture + * + * Returns 0 on success, non-zero on failure + */ +int kutf_helper_send_named_u64(struct kutf_context *context, + const char *val_name, u64 val); + +/* Get the maximum length of a string that can be represented as a particular + * NAME="value" pair without string-value truncation in the kernel's buffer + * + * Given val_name and the kernel buffer's size, this can be used to determine + * the maximum length of a string that can be sent as val_name="value" pair + * without having the string value truncated. Any string longer than this will + * be truncated at some point during communication to this size. + * + * It is assumed that val_name is a valid name for + * kutf_helper_send_named_str(), and no checking will be made to + * ensure this. + * + * Returns the maximum string length that can be represented, or a negative + * value if the NAME="value" encoding itself wouldn't fit in kern_buf_sz + */ +int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); + +/* Send named NAME="str" pair + * + * no escaping allowed in str. Any of the following characters will terminate + * the string: '"' '\\' '\n' + * + * NAME must match [A-Z0-9_]\+ and can be up to MAX_VAL_NAME_LEN characters long + * + * Any failure will be logged on the suite's current test fixture + * + * Returns 0 on success, non-zero on failure */ +int kutf_helper_send_named_str(struct kutf_context *context, + const char *val_name, const char *val_str); + +/* Receive named NAME=value pair + * + * This can receive u64 and string values - check named_val->type + * + * If you are not planning on dynamic handling of the named value's name and + * type, then kutf_helper_receive_check_val() is more useful as a + * convenience function. + * + * String members of named_val will come from memory allocated on the fixture's mempool + * + * Returns 0 on success. Negative value on failure to receive from the 'run' + * file, positive value indicates an enum kutf_helper_err value for correct + * reception of data but invalid parsing */ +int kutf_helper_receive_named_val( + struct kutf_context *context, + struct kutf_helper_named_val *named_val); + +/* Receive and validate NAME=value pair + * + * As with kutf_helper_receive_named_val, but validate that the + * name and type are as expected, as a convenience for a common pattern found + * in tests. + * + * NOTE: this only returns an error value if there was actually a problem + * receiving data. + * + * NOTE: If the underlying data was received correctly, but: + * - isn't of the expected name + * - isn't the expected type + * - isn't correctly parsed for the type + * then the following happens: + * - failure result is recorded + * - named_val->type will be KUTF_HELPER_VALTYPE_INVALID + * - named_val->u will contain some default value that should be relatively + * harmless for the test, including being writable in the case of string + * values + * - return value will be 0 to indicate success + * + * The rationale behind this is that we'd prefer to continue the rest of the + * test with failures propagated, rather than hitting a timeout */ +int kutf_helper_receive_check_val( + struct kutf_helper_named_val *named_val, + struct kutf_context *context, + const char *expect_val_name, + enum kutf_helper_valtype expect_val_type); + +/* Output a named value to kmsg */ +void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val); + + +#endif /* _KERNEL_UTF_HELPERS_USER_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h new file mode 100644 index 0000000..988559d --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h @@ -0,0 +1,73 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_MEM_H_ +#define _KERNEL_UTF_MEM_H_ + +/* kutf_mem.h + * Functions for management of memory pools in the kernel. + * + * This module implements a memory pool allocator, allowing a test + * implementation to allocate linked allocations which can then be freed by a + * single free which releases all of the resources held by the entire pool. + * + * Note that it is not possible to free single resources within the pool once + * allocated. + */ + +#include +#include + +/** + * struct kutf_mempool - the memory pool context management structure + * @head: list head on which the allocations in this context are added to + * @lock: mutex for concurrent allocation from multiple threads + * + */ +struct kutf_mempool { + struct list_head head; + struct mutex lock; +}; + +/** + * kutf_mempool_init() - Initialize a memory pool. + * @pool: Memory pool structure to initialize, provided by the user + * + * Return: zero on success + */ +int kutf_mempool_init(struct kutf_mempool *pool); + +/** + * kutf_mempool_alloc() - Allocate memory from a pool + * @pool: Memory pool to allocate from + * @size: Size of memory wanted in number of bytes + * + * Return: Pointer to memory on success, NULL on failure. + */ +void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size); + +/** + * kutf_mempool_destroy() - Destroy a memory pool, freeing all memory within it. + * @pool: The memory pool to free + */ +void kutf_mempool_destroy(struct kutf_mempool *pool); +#endif /* _KERNEL_UTF_MEM_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h new file mode 100644 index 0000000..49ebeb4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h @@ -0,0 +1,181 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_RESULTSET_H_ +#define _KERNEL_UTF_RESULTSET_H_ + +/* kutf_resultset.h + * Functions and structures for handling test results and result sets. + * + * This section of the kernel UTF contains structures and functions used for the + * management of Results and Result Sets. + */ + +/** + * enum kutf_result_status - Status values for a single Test error. + * @KUTF_RESULT_BENCHMARK: Result is a meta-result containing benchmark + * results. + * @KUTF_RESULT_SKIP: The test was skipped. + * @KUTF_RESULT_UNKNOWN: The test has an unknown result. + * @KUTF_RESULT_PASS: The test result passed. + * @KUTF_RESULT_DEBUG: The test result passed, but raised a debug + * message. + * @KUTF_RESULT_INFO: The test result passed, but raised + * an informative message. + * @KUTF_RESULT_WARN: The test result passed, but raised a warning + * message. + * @KUTF_RESULT_FAIL: The test result failed with a non-fatal error. + * @KUTF_RESULT_FATAL: The test result failed with a fatal error. + * @KUTF_RESULT_ABORT: The test result failed due to a non-UTF + * assertion failure. + * @KUTF_RESULT_USERDATA: User data is ready to be read, + * this is not seen outside the kernel + * @KUTF_RESULT_USERDATA_WAIT: Waiting for user data to be sent, + * this is not seen outside the kernel + * @KUTF_RESULT_TEST_FINISHED: The test has finished, no more results will + * be produced. This is not seen outside kutf + */ +enum kutf_result_status { + KUTF_RESULT_BENCHMARK = -3, + KUTF_RESULT_SKIP = -2, + KUTF_RESULT_UNKNOWN = -1, + + KUTF_RESULT_PASS = 0, + KUTF_RESULT_DEBUG = 1, + KUTF_RESULT_INFO = 2, + KUTF_RESULT_WARN = 3, + KUTF_RESULT_FAIL = 4, + KUTF_RESULT_FATAL = 5, + KUTF_RESULT_ABORT = 6, + + KUTF_RESULT_USERDATA = 7, + KUTF_RESULT_USERDATA_WAIT = 8, + KUTF_RESULT_TEST_FINISHED = 9 +}; + +/* The maximum size of a kutf_result_status result when + * converted to a string + */ +#define KUTF_ERROR_MAX_NAME_SIZE 21 + +#ifdef __KERNEL__ + +#include +#include + +struct kutf_context; + +/** + * struct kutf_result - Represents a single test result. + * @node: Next result in the list of results. + * @status: The status summary (pass / warn / fail / etc). + * @message: A more verbose status message. + */ +struct kutf_result { + struct list_head node; + enum kutf_result_status status; + const char *message; +}; + +/** + * KUTF_RESULT_SET_WAITING_FOR_INPUT - Test is waiting for user data + * + * This flag is set within a struct kutf_result_set whenever the test is blocked + * waiting for user data. Attempts to dequeue results when this flag is set + * will cause a dummy %KUTF_RESULT_USERDATA_WAIT result to be produced. This + * is used to output a warning message and end of file. + */ +#define KUTF_RESULT_SET_WAITING_FOR_INPUT 1 + +/** + * struct kutf_result_set - Represents a set of results. + * @results: List head of a struct kutf_result list for storing the results + * @waitq: Wait queue signalled whenever new results are added. + * @flags: Flags see %KUTF_RESULT_SET_WAITING_FOR_INPUT + */ +struct kutf_result_set { + struct list_head results; + wait_queue_head_t waitq; + int flags; +}; + +/** + * kutf_create_result_set() - Create a new result set + * to which results can be added. + * + * Return: The created result set. + */ +struct kutf_result_set *kutf_create_result_set(void); + +/** + * kutf_add_result() - Add a result to the end of an existing result set. + * + * @context: The kutf context + * @status: The result status to add. + * @message: The result message to add. + * + * Return: 0 if the result is successfully added. -ENOMEM if allocation fails. + */ +int kutf_add_result(struct kutf_context *context, + enum kutf_result_status status, const char *message); + +/** + * kutf_remove_result() - Remove a result from the head of a result set. + * @set: The result set. + * + * This function will block until there is a result to read. The wait is + * interruptible, so this function will return with an ERR_PTR if interrupted. + * + * Return: result or ERR_PTR if interrupted + */ +struct kutf_result *kutf_remove_result( + struct kutf_result_set *set); + +/** + * kutf_destroy_result_set() - Free a previously created result set. + * + * @results: The result set whose resources to free. + */ +void kutf_destroy_result_set(struct kutf_result_set *results); + +/** + * kutf_set_waiting_for_input() - The test is waiting for userdata + * + * @set: The result set to update + * + * Causes the result set to always have results and return a fake + * %KUTF_RESULT_USERDATA_WAIT result. + */ +void kutf_set_waiting_for_input(struct kutf_result_set *set); + +/** + * kutf_clear_waiting_for_input() - The test is no longer waiting for userdata + * + * @set: The result set to update + * + * Cancels the effect of kutf_set_waiting_for_input() + */ +void kutf_clear_waiting_for_input(struct kutf_result_set *set); + +#endif /* __KERNEL__ */ + +#endif /* _KERNEL_UTF_RESULTSET_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h new file mode 100644 index 0000000..8d75f50 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h @@ -0,0 +1,569 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_SUITE_H_ +#define _KERNEL_UTF_SUITE_H_ + +/* kutf_suite.h + * Functions for management of test suites. + * + * This collection of data structures, macros, and functions are used to + * create Test Suites, Tests within those Test Suites, and Fixture variants + * of each test. + */ + +#include +#include +#include + +#include +#include + +/* Arbitrary maximum size to prevent user space allocating too much kernel + * memory + */ +#define KUTF_MAX_LINE_LENGTH (1024u) + +/** + * Pseudo-flag indicating an absence of any specified test class. Note that + * tests should not be annotated with this constant as it is simply a zero + * value; tests without a more specific class must be marked with the flag + * KUTF_F_TEST_GENERIC. + */ +#define KUTF_F_TEST_NONE ((unsigned int)(0)) + +/** + * Class indicating this test is a smoke test. + * A given set of smoke tests should be quick to run, enabling rapid turn-around + * of "regress-on-commit" test runs. + */ +#define KUTF_F_TEST_SMOKETEST ((unsigned int)(1 << 1)) + +/** + * Class indicating this test is a performance test. + * These tests typically produce a performance metric, such as "time to run" or + * "frames per second", + */ +#define KUTF_F_TEST_PERFORMANCE ((unsigned int)(1 << 2)) + +/** + * Class indicating that this test is a deprecated test. + * These tests have typically been replaced by an alternative test which is + * more efficient, or has better coverage. + */ +#define KUTF_F_TEST_DEPRECATED ((unsigned int)(1 << 3)) + +/** + * Class indicating that this test is a known failure. + * These tests have typically been run and failed, but marking them as a known + * failure means it is easier to triage results. + * + * It is typically more convenient to triage known failures using the + * results database and web UI, as this means there is no need to modify the + * test code. + */ +#define KUTF_F_TEST_EXPECTED_FAILURE ((unsigned int)(1 << 4)) + +/** + * Class indicating that this test is a generic test, which is not a member of + * a more specific test class. Tests which are not created with a specific set + * of filter flags by the user are assigned this test class by default. + */ +#define KUTF_F_TEST_GENERIC ((unsigned int)(1 << 5)) + +/** + * Class indicating this test is a resource allocation failure test. + * A resource allocation failure test will test that an error code is + * correctly propagated when an allocation fails. + */ +#define KUTF_F_TEST_RESFAIL ((unsigned int)(1 << 6)) + +/** + * Additional flag indicating that this test is an expected failure when + * run in resource failure mode. These tests are never run when running + * the low resource mode. + */ +#define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7)) + +/** + * Flag reserved for user-defined filter zero. + */ +#define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24)) + +/** + * Flag reserved for user-defined filter one. + */ +#define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25)) + +/** + * Flag reserved for user-defined filter two. + */ +#define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26)) + +/** + * Flag reserved for user-defined filter three. + */ +#define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27)) + +/** + * Flag reserved for user-defined filter four. + */ +#define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28)) + +/** + * Flag reserved for user-defined filter five. + */ +#define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29)) + +/** + * Flag reserved for user-defined filter six. + */ +#define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30)) + +/** + * Flag reserved for user-defined filter seven. + */ +#define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31)) + +/** + * Pseudo-flag indicating that all test classes should be executed. + */ +#define KUTF_F_TEST_ALL ((unsigned int)(0xFFFFFFFFU)) + +/** + * union kutf_callback_data - Union used to store test callback data + * @ptr_value: pointer to the location where test callback data + * are stored + * @u32_value: a number which represents test callback data + */ +union kutf_callback_data { + void *ptr_value; + u32 u32_value; +}; + +/** + * struct kutf_userdata_line - A line of user data to be returned to the user + * @node: struct list_head to link this into a list + * @str: The line of user data to return to user space + * @size: The number of bytes within @str + */ +struct kutf_userdata_line { + struct list_head node; + char *str; + size_t size; +}; + +/** + * KUTF_USERDATA_WARNING_OUTPUT - Flag specifying that a warning has been output + * + * If user space reads the "run" file while the test is waiting for user data, + * then the framework will output a warning message and set this flag within + * struct kutf_userdata. A subsequent read will then simply return an end of + * file condition rather than outputting the warning again. The upshot of this + * is that simply running 'cat' on a test which requires user data will produce + * the warning followed by 'cat' exiting due to EOF - which is much more user + * friendly than blocking indefinitely waiting for user data. + */ +#define KUTF_USERDATA_WARNING_OUTPUT 1 + +/** + * struct kutf_userdata - Structure holding user data + * @flags: See %KUTF_USERDATA_WARNING_OUTPUT + * @input_head: List of struct kutf_userdata_line containing user data + * to be read by the kernel space test. + * @input_waitq: Wait queue signalled when there is new user data to be + * read by the kernel space test. + */ +struct kutf_userdata { + unsigned long flags; + struct list_head input_head; + wait_queue_head_t input_waitq; +}; + +/** + * struct kutf_context - Structure representing a kernel test context + * @kref: Refcount for number of users of this context + * @suite: Convenience pointer to the suite this context + * is running + * @test_fix: The fixture that is being run in this context + * @fixture_pool: The memory pool used for the duration of + * the fixture/text context. + * @fixture: The user provided fixture structure. + * @fixture_index: The index (id) of the current fixture. + * @fixture_name: The name of the current fixture (or NULL if unnamed). + * @test_data: Any user private data associated with this test + * @result_set: All the results logged by this test context + * @status: The status of the currently running fixture. + * @expected_status: The expected status on exist of the currently + * running fixture. + * @work: Work item to enqueue onto the work queue to run the test + * @userdata: Structure containing the user data for the test to read + */ +struct kutf_context { + struct kref kref; + struct kutf_suite *suite; + struct kutf_test_fixture *test_fix; + struct kutf_mempool fixture_pool; + void *fixture; + unsigned int fixture_index; + const char *fixture_name; + union kutf_callback_data test_data; + struct kutf_result_set *result_set; + enum kutf_result_status status; + enum kutf_result_status expected_status; + + struct work_struct work; + struct kutf_userdata userdata; +}; + +/** + * struct kutf_suite - Structure representing a kernel test suite + * @app: The application this suite belongs to. + * @name: The name of this suite. + * @suite_data: Any user private data associated with this + * suite. + * @create_fixture: Function used to create a new fixture instance + * @remove_fixture: Function used to destroy a new fixture instance + * @fixture_variants: The number of variants (must be at least 1). + * @suite_default_flags: Suite global filter flags which are set on + * all tests. + * @node: List node for suite_list + * @dir: The debugfs directory for this suite + * @test_list: List head to store all the tests which are + * part of this suite + */ +struct kutf_suite { + struct kutf_application *app; + const char *name; + union kutf_callback_data suite_data; + void *(*create_fixture)(struct kutf_context *context); + void (*remove_fixture)(struct kutf_context *context); + unsigned int fixture_variants; + unsigned int suite_default_flags; + struct list_head node; + struct dentry *dir; + struct list_head test_list; +}; + +/* ============================================================================ + Application functions +============================================================================ */ + +/** + * kutf_create_application() - Create an in kernel test application. + * @name: The name of the test application. + * + * Return: pointer to the kutf_application on success or NULL + * on failure + */ +struct kutf_application *kutf_create_application(const char *name); + +/** + * kutf_destroy_application() - Destroy an in kernel test application. + * + * @app: The test application to destroy. + */ +void kutf_destroy_application(struct kutf_application *app); + +/* ============================================================================ + Suite functions +============================================================================ */ + +/** + * kutf_create_suite() - Create a kernel test suite. + * @app: The test application to create the suite in. + * @name: The name of the suite. + * @fixture_count: The number of fixtures to run over the test + * functions in this suite + * @create_fixture: Callback used to create a fixture. The returned value + * is stored in the fixture pointer in the context for + * use in the test functions. + * @remove_fixture: Callback used to remove a previously created fixture. + * + * Suite names must be unique. Should two suites with the same name be + * registered with the same application then this function will fail, if they + * are registered with different applications then the function will not detect + * this and the call will succeed. + * + * Return: pointer to the created kutf_suite on success or NULL + * on failure + */ +struct kutf_suite *kutf_create_suite( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context)); + +/** + * kutf_create_suite_with_filters() - Create a kernel test suite with user + * defined default filters. + * @app: The test application to create the suite in. + * @name: The name of the suite. + * @fixture_count: The number of fixtures to run over the test + * functions in this suite + * @create_fixture: Callback used to create a fixture. The returned value + * is stored in the fixture pointer in the context for + * use in the test functions. + * @remove_fixture: Callback used to remove a previously created fixture. + * @filters: Filters to apply to a test if it doesn't provide its own + * + * Suite names must be unique. Should two suites with the same name be + * registered with the same application then this function will fail, if they + * are registered with different applications then the function will not detect + * this and the call will succeed. + * + * Return: pointer to the created kutf_suite on success or NULL on failure + */ +struct kutf_suite *kutf_create_suite_with_filters( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters); + +/** + * kutf_create_suite_with_filters_and_data() - Create a kernel test suite with + * user defined default filters. + * @app: The test application to create the suite in. + * @name: The name of the suite. + * @fixture_count: The number of fixtures to run over the test + * functions in this suite + * @create_fixture: Callback used to create a fixture. The returned value + * is stored in the fixture pointer in the context for + * use in the test functions. + * @remove_fixture: Callback used to remove a previously created fixture. + * @filters: Filters to apply to a test if it doesn't provide its own + * @suite_data: Suite specific callback data, provided during the + * running of the test in the kutf_context + * + * Return: pointer to the created kutf_suite on success or NULL + * on failure + */ +struct kutf_suite *kutf_create_suite_with_filters_and_data( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data suite_data); + +/** + * kutf_add_test() - Add a test to a kernel test suite. + * @suite: The suite to add the test to. + * @id: The ID of the test. + * @name: The name of the test. + * @execute: Callback to the test function to run. + * + * Note: As no filters are provided the test will use the suite filters instead + */ +void kutf_add_test(struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context)); + +/** + * kutf_add_test_with_filters() - Add a test to a kernel test suite with filters + * @suite: The suite to add the test to. + * @id: The ID of the test. + * @name: The name of the test. + * @execute: Callback to the test function to run. + * @filters: A set of filtering flags, assigning test categories. + */ +void kutf_add_test_with_filters(struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters); + +/** + * kutf_add_test_with_filters_and_data() - Add a test to a kernel test suite + * with filters. + * @suite: The suite to add the test to. + * @id: The ID of the test. + * @name: The name of the test. + * @execute: Callback to the test function to run. + * @filters: A set of filtering flags, assigning test categories. + * @test_data: Test specific callback data, provided during the + * running of the test in the kutf_context + */ +void kutf_add_test_with_filters_and_data( + struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data test_data); + + +/* ============================================================================ + Test functions +============================================================================ */ +/** + * kutf_test_log_result_external() - Log a result which has been created + * externally into a in a standard form + * recognized by the log parser. + * @context: The test context the test is running in + * @message: The message for this result + * @new_status: The result status of this log message + */ +void kutf_test_log_result_external( + struct kutf_context *context, + const char *message, + enum kutf_result_status new_status); + +/** + * kutf_test_expect_abort() - Tell the kernel that you expect the current + * fixture to produce an abort. + * @context: The test context this test is running in. + */ +void kutf_test_expect_abort(struct kutf_context *context); + +/** + * kutf_test_expect_fatal() - Tell the kernel that you expect the current + * fixture to produce a fatal error. + * @context: The test context this test is running in. + */ +void kutf_test_expect_fatal(struct kutf_context *context); + +/** + * kutf_test_expect_fail() - Tell the kernel that you expect the current + * fixture to fail. + * @context: The test context this test is running in. + */ +void kutf_test_expect_fail(struct kutf_context *context); + +/** + * kutf_test_expect_warn() - Tell the kernel that you expect the current + * fixture to produce a warning. + * @context: The test context this test is running in. + */ +void kutf_test_expect_warn(struct kutf_context *context); + +/** + * kutf_test_expect_pass() - Tell the kernel that you expect the current + * fixture to pass. + * @context: The test context this test is running in. + */ +void kutf_test_expect_pass(struct kutf_context *context); + +/** + * kutf_test_skip() - Tell the kernel that the test should be skipped. + * @context: The test context this test is running in. + */ +void kutf_test_skip(struct kutf_context *context); + +/** + * kutf_test_skip_msg() - Tell the kernel that this test has been skipped, + * supplying a reason string. + * @context: The test context this test is running in. + * @message: A message string containing the reason for the skip. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a prebaked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_skip_msg(struct kutf_context *context, const char *message); + +/** + * kutf_test_pass() - Tell the kernel that this test has passed. + * @context: The test context this test is running in. + * @message: A message string containing the reason for the pass. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_pass(struct kutf_context *context, char const *message); + +/** + * kutf_test_debug() - Send a debug message + * @context: The test context this test is running in. + * @message: A message string containing the debug information. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_debug(struct kutf_context *context, char const *message); + +/** + * kutf_test_info() - Send an information message + * @context: The test context this test is running in. + * @message: A message string containing the information message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_info(struct kutf_context *context, char const *message); + +/** + * kutf_test_warn() - Send a warning message + * @context: The test context this test is running in. + * @message: A message string containing the warning message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_warn(struct kutf_context *context, char const *message); + +/** + * kutf_test_fail() - Tell the kernel that a test has failed + * @context: The test context this test is running in. + * @message: A message string containing the failure message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_fail(struct kutf_context *context, char const *message); + +/** + * kutf_test_fatal() - Tell the kernel that a test has triggered a fatal error + * @context: The test context this test is running in. + * @message: A message string containing the fatal error message. + * + * Note: The message must not be freed during the lifetime of the test run. + * This means it should either be a pre-baked string, or if a dynamic string + * is required it must be created with kutf_dsprintf which will store + * the resultant string in a buffer who's lifetime is the same as the test run. + */ +void kutf_test_fatal(struct kutf_context *context, char const *message); + +/** + * kutf_test_abort() - Tell the kernel that a test triggered an abort in the test + * + * @context: The test context this test is running in. + */ +void kutf_test_abort(struct kutf_context *context); + +#endif /* _KERNEL_UTF_SUITE_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h new file mode 100644 index 0000000..25b8285 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _KERNEL_UTF_UTILS_H_ +#define _KERNEL_UTF_UTILS_H_ + +/* kutf_utils.h + * Utilities for the kernel UTF test infrastructure. + * + * This collection of library functions are provided for use by kernel UTF + * and users of kernel UTF which don't directly fit within the other + * code modules. + */ + +#include + +/** + * Maximum size of the message strings within kernel UTF, messages longer then + * this will be truncated. + */ +#define KUTF_MAX_DSPRINTF_LEN 1024 + +/** + * kutf_dsprintf() - dynamic sprintf + * @pool: memory pool to allocate from + * @fmt: The format string describing the string to document. + * @... The parameters to feed in to the format string. + * + * This function implements sprintf which dynamically allocates memory to store + * the string. The library will free the memory containing the string when the + * result set is cleared or destroyed. + * + * Note The returned string may be truncated to fit an internal temporary + * buffer, which is KUTF_MAX_DSPRINTF_LEN bytes in length. + * + * Return: Returns pointer to allocated string, or NULL on error. + */ +const char *kutf_dsprintf(struct kutf_mempool *pool, + const char *fmt, ...); + +#endif /* _KERNEL_UTF_UTILS_H_ */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild new file mode 100644 index 0000000..2531d41 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild @@ -0,0 +1,26 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ccflags-y += -I$(src)/../include + +obj-$(CONFIG_MALI_KUTF) += kutf.o + +kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Kconfig diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/Makefile diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp new file mode 100644 index 0000000..32eab14 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp @@ -0,0 +1,36 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +bob_kernel_module { + name: "kutf", + defaults: [ + "kernel_defaults", + "kutf_includes", + ], + srcs: [ + "Kbuild", + "kutf_helpers.c", + "kutf_helpers_user.c", + "kutf_mem.c", + "kutf_resultset.c", + "kutf_suite.c", + "kutf_utils.c", + ], + kbuild_options: ["CONFIG_MALI_KUTF=m"], + enabled: false, + base_build_kutf: { + enabled: true, + }, +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c new file mode 100644 index 0000000..cab5add --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c @@ -0,0 +1,129 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF test helpers */ +#include + +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(kutf_input_lock); + +static bool pending_input(struct kutf_context *context) +{ + bool input_pending; + + spin_lock(&kutf_input_lock); + + input_pending = !list_empty(&context->userdata.input_head); + + spin_unlock(&kutf_input_lock); + + return input_pending; +} + +char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) +{ + struct kutf_userdata_line *line; + + spin_lock(&kutf_input_lock); + + while (list_empty(&context->userdata.input_head)) { + int err; + + kutf_set_waiting_for_input(context->result_set); + + spin_unlock(&kutf_input_lock); + + err = wait_event_interruptible(context->userdata.input_waitq, + pending_input(context)); + + if (err) + return ERR_PTR(-EINTR); + + spin_lock(&kutf_input_lock); + } + + line = list_first_entry(&context->userdata.input_head, + struct kutf_userdata_line, node); + if (line->str) { + /* + * Unless it is the end-of-input marker, + * remove it from the list + */ + list_del(&line->node); + } + + spin_unlock(&kutf_input_lock); + + if (str_size) + *str_size = line->size; + return line->str; +} + +int kutf_helper_input_enqueue(struct kutf_context *context, + const char __user *str, size_t size) +{ + struct kutf_userdata_line *line; + + line = kutf_mempool_alloc(&context->fixture_pool, + sizeof(*line) + size + 1); + if (!line) + return -ENOMEM; + if (str) { + unsigned long bytes_not_copied; + + line->size = size; + line->str = (void *)(line + 1); + bytes_not_copied = copy_from_user(line->str, str, size); + if (bytes_not_copied != 0) + return -EFAULT; + /* Zero terminate the string */ + line->str[size] = '\0'; + } else { + /* This is used to mark the end of input */ + WARN_ON(size); + line->size = 0; + line->str = NULL; + } + + spin_lock(&kutf_input_lock); + + list_add_tail(&line->node, &context->userdata.input_head); + + kutf_clear_waiting_for_input(context->result_set); + + spin_unlock(&kutf_input_lock); + + wake_up(&context->userdata.input_waitq); + + return 0; +} + +void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context) +{ + kutf_helper_input_enqueue(context, NULL, 0); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c new file mode 100644 index 0000000..108fa82 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c @@ -0,0 +1,468 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF test helpers that mirror those for kutf-userside */ +#include +#include +#include + +#include +#include +#include + +const char *valtype_names[] = { + "INVALID", + "U64", + "STR", +}; + +static const char *get_val_type_name(enum kutf_helper_valtype valtype) +{ + /* enums can be signed or unsigned (implementation dependant), so + * enforce it to prevent: + * a) "<0 comparison on unsigned type" warning - if we did both upper + * and lower bound check + * b) incorrect range checking if it was a signed type - if we did + * upper bound check only */ + unsigned int type_idx = (unsigned int)valtype; + + if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) + type_idx = (unsigned int)KUTF_HELPER_VALTYPE_INVALID; + + return valtype_names[type_idx]; +} + +/* Check up to str_len chars of val_str to see if it's a valid value name: + * + * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator + * - And, each char is in the character set [A-Z0-9_] */ +static int validate_val_name(const char *val_str, int str_len) +{ + int i = 0; + + for (i = 0; str_len && i <= KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0'; ++i, --str_len) { + char val_chr = val_str[i]; + + if (val_chr >= 'A' && val_chr <= 'Z') + continue; + if (val_chr >= '0' && val_chr <= '9') + continue; + if (val_chr == '_') + continue; + + /* Character not in the set [A-Z0-9_] - report error */ + return 1; + } + + /* Names of 0 length are not valid */ + if (i == 0) + return 1; + /* Length greater than KUTF_HELPER_MAX_VAL_NAME_LEN not allowed */ + if (i > KUTF_HELPER_MAX_VAL_NAME_LEN || (i == KUTF_HELPER_MAX_VAL_NAME_LEN && val_str[i] != '\0')) + return 1; + + return 0; +} + +/* Find the length of the valid part of the string when it will be in quotes + * e.g. "str" + * + * That is, before any '\\', '\n' or '"' characters. This is so we don't have + * to escape the string */ +static int find_quoted_string_valid_len(const char *str) +{ + char *ptr; + const char *check_chars = "\\\n\""; + + ptr = strpbrk(str, check_chars); + if (ptr) + return (int)(ptr-str); + + return (int)strlen(str); +} + +static int kutf_helper_userdata_enqueue(struct kutf_context *context, + const char *str) +{ + char *str_copy; + size_t len; + int err; + + len = strlen(str)+1; + + str_copy = kutf_mempool_alloc(&context->fixture_pool, len); + if (!str_copy) + return -ENOMEM; + + strcpy(str_copy, str); + + err = kutf_add_result(context, KUTF_RESULT_USERDATA, str_copy); + + return err; +} + +#define MAX_U64_HEX_LEN 16 +/* (Name size) + ("=0x" size) + (64-bit hex value size) + (terminator) */ +#define NAMED_U64_VAL_BUF_SZ (KUTF_HELPER_MAX_VAL_NAME_LEN + 3 + MAX_U64_HEX_LEN + 1) + +int kutf_helper_send_named_u64(struct kutf_context *context, + const char *val_name, u64 val) +{ + int ret = 1; + char msgbuf[NAMED_U64_VAL_BUF_SZ]; + const char *errmsg = NULL; + + if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': Invalid value name", val_name); + goto out_err; + } + + ret = snprintf(msgbuf, NAMED_U64_VAL_BUF_SZ, "%s=0x%llx", val_name, val); + if (ret >= NAMED_U64_VAL_BUF_SZ || ret < 0) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': snprintf() problem buffer size==%d ret=%d", + val_name, NAMED_U64_VAL_BUF_SZ, ret); + goto out_err; + } + + ret = kutf_helper_userdata_enqueue(context, msgbuf); + if (ret) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': send returned %d", + val_name, ret); + goto out_err; + } + + return ret; +out_err: + kutf_test_fail(context, errmsg); + return ret; +} +EXPORT_SYMBOL(kutf_helper_send_named_u64); + +#define NAMED_VALUE_SEP "=" +#define NAMED_STR_START_DELIM NAMED_VALUE_SEP "\"" +#define NAMED_STR_END_DELIM "\"" + +int kutf_helper_max_str_len_for_kern(const char *val_name, + int kern_buf_sz) +{ + const int val_name_len = strlen(val_name); + const int start_delim_len = strlen(NAMED_STR_START_DELIM); + const int end_delim_len = strlen(NAMED_STR_END_DELIM); + int max_msg_len = kern_buf_sz; + int max_str_len; + + max_str_len = max_msg_len - val_name_len - start_delim_len - + end_delim_len; + + return max_str_len; +} +EXPORT_SYMBOL(kutf_helper_max_str_len_for_kern); + +int kutf_helper_send_named_str(struct kutf_context *context, + const char *val_name, + const char *val_str) +{ + int val_str_len; + int str_buf_sz; + char *str_buf = NULL; + int ret = 1; + char *copy_ptr; + int val_name_len; + int start_delim_len = strlen(NAMED_STR_START_DELIM); + int end_delim_len = strlen(NAMED_STR_END_DELIM); + const char *errmsg = NULL; + + if (validate_val_name(val_name, KUTF_HELPER_MAX_VAL_NAME_LEN + 1)) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send u64 value named '%s': Invalid value name", val_name); + goto out_err; + } + val_name_len = strlen(val_name); + + val_str_len = find_quoted_string_valid_len(val_str); + + /* (name length) + ("=\"" length) + (val_str len) + ("\"" length) + terminator */ + str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; + + /* Using kmalloc() here instead of mempool since we know we need to free + * before we return */ + str_buf = kmalloc(str_buf_sz, GFP_KERNEL); + if (!str_buf) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send str value named '%s': kmalloc failed, str_buf_sz=%d", + val_name, str_buf_sz); + goto out_err; + } + copy_ptr = str_buf; + + /* Manually copy each string component instead of snprintf because + * val_str may need to end early, and less error path handling */ + + /* name */ + memcpy(copy_ptr, val_name, val_name_len); + copy_ptr += val_name_len; + + /* str start delimiter */ + memcpy(copy_ptr, NAMED_STR_START_DELIM, start_delim_len); + copy_ptr += start_delim_len; + + /* str value */ + memcpy(copy_ptr, val_str, val_str_len); + copy_ptr += val_str_len; + + /* str end delimiter */ + memcpy(copy_ptr, NAMED_STR_END_DELIM, end_delim_len); + copy_ptr += end_delim_len; + + /* Terminator */ + *copy_ptr = '\0'; + + ret = kutf_helper_userdata_enqueue(context, str_buf); + + if (ret) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send str value named '%s': send returned %d", + val_name, ret); + goto out_err; + } + + kfree(str_buf); + return ret; + +out_err: + kutf_test_fail(context, errmsg); + kfree(str_buf); + return ret; +} +EXPORT_SYMBOL(kutf_helper_send_named_str); + +int kutf_helper_receive_named_val( + struct kutf_context *context, + struct kutf_helper_named_val *named_val) +{ + size_t recv_sz; + char *recv_str; + char *search_ptr; + char *name_str = NULL; + int name_len; + int strval_len; + enum kutf_helper_valtype type = KUTF_HELPER_VALTYPE_INVALID; + char *strval = NULL; + u64 u64val = 0; + int err = KUTF_HELPER_ERR_INVALID_VALUE; + + recv_str = kutf_helper_input_dequeue(context, &recv_sz); + if (!recv_str) + return -EBUSY; + else if (IS_ERR(recv_str)) + return PTR_ERR(recv_str); + + /* Find the '=', grab the name and validate it */ + search_ptr = strnchr(recv_str, recv_sz, NAMED_VALUE_SEP[0]); + if (search_ptr) { + name_len = search_ptr - recv_str; + if (!validate_val_name(recv_str, name_len)) { + /* no need to reallocate - just modify string in place */ + name_str = recv_str; + name_str[name_len] = '\0'; + + /* Move until after the '=' */ + recv_str += (name_len + 1); + recv_sz -= (name_len + 1); + } + } + if (!name_str) { + pr_err("Invalid name part for received string '%s'\n", + recv_str); + return KUTF_HELPER_ERR_INVALID_NAME; + } + + /* detect value type */ + if (*recv_str == NAMED_STR_START_DELIM[1]) { + /* string delimiter start*/ + ++recv_str; + --recv_sz; + + /* Find end of string */ + search_ptr = strnchr(recv_str, recv_sz, NAMED_STR_END_DELIM[0]); + if (search_ptr) { + strval_len = search_ptr - recv_str; + /* Validate the string to ensure it contains no quotes */ + if (strval_len == find_quoted_string_valid_len(recv_str)) { + /* no need to reallocate - just modify string in place */ + strval = recv_str; + strval[strval_len] = '\0'; + + /* Move until after the end delimiter */ + recv_str += (strval_len + 1); + recv_sz -= (strval_len + 1); + type = KUTF_HELPER_VALTYPE_STR; + } else { + pr_err("String value contains invalid characters in rest of received string '%s'\n", recv_str); + err = KUTF_HELPER_ERR_CHARS_AFTER_VAL; + } + } else { + pr_err("End of string delimiter not found in rest of received string '%s'\n", recv_str); + err = KUTF_HELPER_ERR_NO_END_DELIMITER; + } + } else { + /* possibly a number value - strtoull will parse it */ + err = kstrtoull(recv_str, 0, &u64val); + /* unlike userspace can't get an end ptr, but if kstrtoull() + * reads characters after the number it'll report -EINVAL */ + if (!err) { + int len_remain = strnlen(recv_str, recv_sz); + + type = KUTF_HELPER_VALTYPE_U64; + recv_str += len_remain; + recv_sz -= len_remain; + } else { + /* special case: not a number, report as such */ + pr_err("Rest of received string was not a numeric value or quoted string value: '%s'\n", recv_str); + } + } + + if (type == KUTF_HELPER_VALTYPE_INVALID) + return err; + + /* Any remaining characters - error */ + if (strnlen(recv_str, recv_sz) != 0) { + pr_err("Characters remain after value of type %s: '%s'\n", + get_val_type_name(type), recv_str); + return KUTF_HELPER_ERR_CHARS_AFTER_VAL; + } + + /* Success - write into the output structure */ + switch (type) { + case KUTF_HELPER_VALTYPE_U64: + named_val->u.val_u64 = u64val; + break; + case KUTF_HELPER_VALTYPE_STR: + named_val->u.val_str = strval; + break; + default: + pr_err("Unreachable, fix kutf_helper_receive_named_val\n"); + /* Coding error, report as though 'run' file failed */ + return -EINVAL; + } + + named_val->val_name = name_str; + named_val->type = type; + + return KUTF_HELPER_ERR_NONE; +} +EXPORT_SYMBOL(kutf_helper_receive_named_val); + +#define DUMMY_MSG "" +int kutf_helper_receive_check_val( + struct kutf_helper_named_val *named_val, + struct kutf_context *context, + const char *expect_val_name, + enum kutf_helper_valtype expect_val_type) +{ + int err; + + err = kutf_helper_receive_named_val(context, named_val); + if (err < 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Failed to receive value named '%s'", + expect_val_name); + kutf_test_fail(context, msg); + return err; + } else if (err > 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Named-value parse error when expecting value named '%s'", + expect_val_name); + kutf_test_fail(context, msg); + goto out_fail_and_fixup; + } + + if (strcmp(named_val->val_name, expect_val_name) != 0) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Expecting to receive value named '%s' but got '%s'", + expect_val_name, named_val->val_name); + kutf_test_fail(context, msg); + goto out_fail_and_fixup; + } + + + if (named_val->type != expect_val_type) { + const char *msg = kutf_dsprintf(&context->fixture_pool, + "Expecting value named '%s' to be of type %s but got %s", + expect_val_name, get_val_type_name(expect_val_type), + get_val_type_name(named_val->type)); + kutf_test_fail(context, msg); + goto out_fail_and_fixup; + } + + return err; + +out_fail_and_fixup: + /* Produce a valid but incorrect value */ + switch (expect_val_type) { + case KUTF_HELPER_VALTYPE_U64: + named_val->u.val_u64 = 0ull; + break; + case KUTF_HELPER_VALTYPE_STR: + { + char *str = kutf_mempool_alloc(&context->fixture_pool, sizeof(DUMMY_MSG)); + + if (!str) + return -1; + + strcpy(str, DUMMY_MSG); + named_val->u.val_str = str; + break; + } + default: + break; + } + + /* Indicate that this is invalid */ + named_val->type = KUTF_HELPER_VALTYPE_INVALID; + + /* But at least allow the caller to continue in the test with failures */ + return 0; +} +EXPORT_SYMBOL(kutf_helper_receive_check_val); + +void kutf_helper_output_named_val(struct kutf_helper_named_val *named_val) +{ + switch (named_val->type) { + case KUTF_HELPER_VALTYPE_U64: + pr_warn("%s=0x%llx\n", named_val->val_name, named_val->u.val_u64); + break; + case KUTF_HELPER_VALTYPE_STR: + pr_warn("%s=\"%s\"\n", named_val->val_name, named_val->u.val_str); + break; + case KUTF_HELPER_VALTYPE_INVALID: + pr_warn("%s is invalid\n", named_val->val_name); + break; + default: + pr_warn("%s has unknown type %d\n", named_val->val_name, named_val->type); + break; + } +} +EXPORT_SYMBOL(kutf_helper_output_named_val); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c new file mode 100644 index 0000000..fd98bea --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c @@ -0,0 +1,108 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF memory management functions */ + +#include +#include +#include + +#include + + +/** + * struct kutf_alloc_entry - Structure representing an allocation. + * @node: List node for use with kutf_mempool. + * @data: Data area of the allocation + */ +struct kutf_alloc_entry { + struct list_head node; + u8 data[0]; +}; + +int kutf_mempool_init(struct kutf_mempool *pool) +{ + if (!pool) { + pr_err("NULL pointer passed to %s\n", __func__); + return -1; + } + + INIT_LIST_HEAD(&pool->head); + mutex_init(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(kutf_mempool_init); + +void kutf_mempool_destroy(struct kutf_mempool *pool) +{ + struct list_head *remove; + struct list_head *tmp; + + if (!pool) { + pr_err("NULL pointer passed to %s\n", __func__); + return; + } + + mutex_lock(&pool->lock); + list_for_each_safe(remove, tmp, &pool->head) { + struct kutf_alloc_entry *remove_alloc; + + remove_alloc = list_entry(remove, struct kutf_alloc_entry, node); + list_del(&remove_alloc->node); + kfree(remove_alloc); + } + mutex_unlock(&pool->lock); + +} +EXPORT_SYMBOL(kutf_mempool_destroy); + +void *kutf_mempool_alloc(struct kutf_mempool *pool, size_t size) +{ + struct kutf_alloc_entry *ret; + + if (!pool) { + pr_err("NULL pointer passed to %s\n", __func__); + goto fail_pool; + } + + mutex_lock(&pool->lock); + + ret = kmalloc(sizeof(*ret) + size, GFP_KERNEL); + if (!ret) { + pr_err("Failed to allocate memory\n"); + goto fail_alloc; + } + + INIT_LIST_HEAD(&ret->node); + list_add(&ret->node, &pool->head); + + mutex_unlock(&pool->lock); + + return &ret->data[0]; + +fail_alloc: + mutex_unlock(&pool->lock); +fail_pool: + return NULL; +} +EXPORT_SYMBOL(kutf_mempool_alloc); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c new file mode 100644 index 0000000..94ecfa4 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c @@ -0,0 +1,164 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF result management functions */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Lock to protect all result structures */ +static DEFINE_SPINLOCK(kutf_result_lock); + +struct kutf_result_set *kutf_create_result_set(void) +{ + struct kutf_result_set *set; + + set = kmalloc(sizeof(*set), GFP_KERNEL); + if (!set) { + pr_err("Failed to allocate resultset"); + goto fail_alloc; + } + + INIT_LIST_HEAD(&set->results); + init_waitqueue_head(&set->waitq); + set->flags = 0; + + return set; + +fail_alloc: + return NULL; +} + +int kutf_add_result(struct kutf_context *context, + enum kutf_result_status status, + const char *message) +{ + struct kutf_mempool *mempool = &context->fixture_pool; + struct kutf_result_set *set = context->result_set; + /* Create the new result */ + struct kutf_result *new_result; + + BUG_ON(set == NULL); + + new_result = kutf_mempool_alloc(mempool, sizeof(*new_result)); + if (!new_result) { + pr_err("Result allocation failed\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&new_result->node); + new_result->status = status; + new_result->message = message; + + spin_lock(&kutf_result_lock); + + list_add_tail(&new_result->node, &set->results); + + spin_unlock(&kutf_result_lock); + + wake_up(&set->waitq); + + return 0; +} + +void kutf_destroy_result_set(struct kutf_result_set *set) +{ + if (!list_empty(&set->results)) + pr_err("kutf_destroy_result_set: Unread results from test\n"); + + kfree(set); +} + +static bool kutf_has_result(struct kutf_result_set *set) +{ + bool has_result; + + spin_lock(&kutf_result_lock); + if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) + /* Pretend there are results if waiting for input */ + has_result = true; + else + has_result = !list_empty(&set->results); + spin_unlock(&kutf_result_lock); + + return has_result; +} + +struct kutf_result *kutf_remove_result(struct kutf_result_set *set) +{ + struct kutf_result *result = NULL; + int ret; + + do { + ret = wait_event_interruptible(set->waitq, + kutf_has_result(set)); + + if (ret) + return ERR_PTR(ret); + + spin_lock(&kutf_result_lock); + + if (!list_empty(&set->results)) { + result = list_first_entry(&set->results, + struct kutf_result, + node); + list_del(&result->node); + } else if (set->flags & KUTF_RESULT_SET_WAITING_FOR_INPUT) { + /* Return a fake result */ + static struct kutf_result waiting = { + .status = KUTF_RESULT_USERDATA_WAIT + }; + result = &waiting; + } + /* If result == NULL then there was a race with the event + * being removed between the check in kutf_has_result and + * the lock being obtained. In this case we retry + */ + + spin_unlock(&kutf_result_lock); + } while (result == NULL); + + return result; +} + +void kutf_set_waiting_for_input(struct kutf_result_set *set) +{ + spin_lock(&kutf_result_lock); + set->flags |= KUTF_RESULT_SET_WAITING_FOR_INPUT; + spin_unlock(&kutf_result_lock); + + wake_up(&set->waitq); +} + +void kutf_clear_waiting_for_input(struct kutf_result_set *set) +{ + spin_lock(&kutf_result_lock); + set->flags &= ~KUTF_RESULT_SET_WAITING_FOR_INPUT; + spin_unlock(&kutf_result_lock); +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c new file mode 100644 index 0000000..3f15669 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c @@ -0,0 +1,1219 @@ +/* + * + * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF suite, test and fixture management including user to kernel + * interaction */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +/** + * struct kutf_application - Structure which represents kutf application + * @name: The name of this test application. + * @dir: The debugfs directory for this test + * @suite_list: List head to store all the suites which are part of this + * application + */ +struct kutf_application { + const char *name; + struct dentry *dir; + struct list_head suite_list; +}; + +/** + * struct kutf_test_function - Structure which represents kutf test function + * @suite: Back reference to the suite this test function + * belongs to + * @filters: Filters that apply to this test function + * @test_id: Test ID + * @execute: Function to run for this test + * @test_data: Static data for this test + * @node: List node for test_list + * @variant_list: List head to store all the variants which can run on + * this function + * @dir: debugfs directory for this test function + */ +struct kutf_test_function { + struct kutf_suite *suite; + unsigned int filters; + unsigned int test_id; + void (*execute)(struct kutf_context *context); + union kutf_callback_data test_data; + struct list_head node; + struct list_head variant_list; + struct dentry *dir; +}; + +/** + * struct kutf_test_fixture - Structure which holds information on the kutf + * test fixture + * @test_func: Test function this fixture belongs to + * @fixture_index: Index of this fixture + * @node: List node for variant_list + * @dir: debugfs directory for this test fixture + */ +struct kutf_test_fixture { + struct kutf_test_function *test_func; + unsigned int fixture_index; + struct list_head node; + struct dentry *dir; +}; + +static struct dentry *base_dir; +static struct workqueue_struct *kutf_workq; + +/** + * struct kutf_convert_table - Structure which keeps test results + * @result_name: Status of the test result + * @result: Status value for a single test + */ +struct kutf_convert_table { + char result_name[50]; + enum kutf_result_status result; +}; + +struct kutf_convert_table kutf_convert[] = { +#define ADD_UTF_RESULT(_name) \ +{ \ + #_name, \ + _name, \ +}, +ADD_UTF_RESULT(KUTF_RESULT_BENCHMARK) +ADD_UTF_RESULT(KUTF_RESULT_SKIP) +ADD_UTF_RESULT(KUTF_RESULT_UNKNOWN) +ADD_UTF_RESULT(KUTF_RESULT_PASS) +ADD_UTF_RESULT(KUTF_RESULT_DEBUG) +ADD_UTF_RESULT(KUTF_RESULT_INFO) +ADD_UTF_RESULT(KUTF_RESULT_WARN) +ADD_UTF_RESULT(KUTF_RESULT_FAIL) +ADD_UTF_RESULT(KUTF_RESULT_FATAL) +ADD_UTF_RESULT(KUTF_RESULT_ABORT) +}; + +#define UTF_CONVERT_SIZE (ARRAY_SIZE(kutf_convert)) + +/** + * kutf_create_context() - Create a test context in which a specific fixture + * of an application will be run and its results + * reported back to the user + * @test_fix: Test fixture to be run. + * + * The context's refcount will be initialized to 1. + * + * Return: Returns the created test context on success or NULL on failure + */ +static struct kutf_context *kutf_create_context( + struct kutf_test_fixture *test_fix); + +/** + * kutf_destroy_context() - Destroy a previously created test context, only + * once its refcount has become zero + * @kref: pointer to kref member within the context + * + * This should only be used via a kref_put() call on the context's kref member + */ +static void kutf_destroy_context(struct kref *kref); + +/** + * kutf_context_get() - increment refcount on a context + * @context: the kutf context + * + * This must be used when the lifetime of the context might exceed that of the + * thread creating @context + */ +static void kutf_context_get(struct kutf_context *context); + +/** + * kutf_context_put() - decrement refcount on a context, destroying it when it + * reached zero + * @context: the kutf context + * + * This must be used only after a corresponding kutf_context_get() call on + * @context, and the caller no longer needs access to @context. + */ +static void kutf_context_put(struct kutf_context *context); + +/** + * kutf_set_result() - Set the test result against the specified test context + * @context: Test context + * @status: Result status + */ +static void kutf_set_result(struct kutf_context *context, + enum kutf_result_status status); + +/** + * kutf_set_expected_result() - Set the expected test result for the specified + * test context + * @context: Test context + * @expected_status: Expected result status + */ +static void kutf_set_expected_result(struct kutf_context *context, + enum kutf_result_status expected_status); + +/** + * kutf_result_to_string() - Converts a KUTF result into a string + * @result_str: Output result string + * @result: Result status to convert + * + * Return: 1 if test result was successfully converted to string, 0 otherwise + */ +static int kutf_result_to_string(char **result_str, + enum kutf_result_status result) +{ + int i; + int ret = 0; + + for (i = 0; i < UTF_CONVERT_SIZE; i++) { + if (result == kutf_convert[i].result) { + *result_str = kutf_convert[i].result_name; + ret = 1; + } + } + return ret; +} + +/** + * kutf_debugfs_const_string_read() - Simple debugfs read callback which + * returns a constant string + * @file: Opened file to read from + * @buf: User buffer to write the data into + * @len: Amount of data to read + * @ppos: Offset into file to read from + * + * Return: On success, the number of bytes read and offset @ppos advanced by + * this number; on error, negative value + */ +static ssize_t kutf_debugfs_const_string_read(struct file *file, + char __user *buf, size_t len, loff_t *ppos) +{ + char *str = file->private_data; + + return simple_read_from_buffer(buf, len, ppos, str, strlen(str)); +} + +static const struct file_operations kutf_debugfs_const_string_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = kutf_debugfs_const_string_read, + .llseek = default_llseek, +}; + +/** + * kutf_add_explicit_result() - Check if an explicit result needs to be added + * @context: KUTF test context + */ +static void kutf_add_explicit_result(struct kutf_context *context) +{ + switch (context->expected_status) { + case KUTF_RESULT_UNKNOWN: + break; + + case KUTF_RESULT_WARN: + if (context->status == KUTF_RESULT_WARN) + kutf_test_pass(context, + "Pass (expected warn occurred)"); + else if (context->status != KUTF_RESULT_SKIP) + kutf_test_fail(context, + "Fail (expected warn missing)"); + break; + + case KUTF_RESULT_FAIL: + if (context->status == KUTF_RESULT_FAIL) + kutf_test_pass(context, + "Pass (expected fail occurred)"); + else if (context->status != KUTF_RESULT_SKIP) { + /* Force the expected status so the fail gets logged */ + context->expected_status = KUTF_RESULT_PASS; + kutf_test_fail(context, + "Fail (expected fail missing)"); + } + break; + + case KUTF_RESULT_FATAL: + if (context->status == KUTF_RESULT_FATAL) + kutf_test_pass(context, + "Pass (expected fatal occurred)"); + else if (context->status != KUTF_RESULT_SKIP) + kutf_test_fail(context, + "Fail (expected fatal missing)"); + break; + + case KUTF_RESULT_ABORT: + if (context->status == KUTF_RESULT_ABORT) + kutf_test_pass(context, + "Pass (expected abort occurred)"); + else if (context->status != KUTF_RESULT_SKIP) + kutf_test_fail(context, + "Fail (expected abort missing)"); + break; + default: + break; + } +} + +static void kutf_run_test(struct work_struct *data) +{ + struct kutf_context *test_context = container_of(data, + struct kutf_context, work); + struct kutf_suite *suite = test_context->suite; + struct kutf_test_function *test_func; + + test_func = test_context->test_fix->test_func; + + /* + * Call the create fixture function if required before the + * fixture is run + */ + if (suite->create_fixture) + test_context->fixture = suite->create_fixture(test_context); + + /* Only run the test if the fixture was created (if required) */ + if ((suite->create_fixture && test_context->fixture) || + (!suite->create_fixture)) { + /* Run this fixture */ + test_func->execute(test_context); + + if (suite->remove_fixture) + suite->remove_fixture(test_context); + + kutf_add_explicit_result(test_context); + } + + kutf_add_result(test_context, KUTF_RESULT_TEST_FINISHED, NULL); + + kutf_context_put(test_context); +} + +/** + * kutf_debugfs_run_open() Debugfs open callback for the "run" entry. + * @inode: inode of the opened file + * @file: Opened file to read from + * + * This function creates a KUTF context and queues it onto a workqueue to be + * run asynchronously. The resulting file descriptor can be used to communicate + * userdata to the test and to read back the results of the test execution. + * + * Return: 0 on success + */ +static int kutf_debugfs_run_open(struct inode *inode, struct file *file) +{ + struct kutf_test_fixture *test_fix = inode->i_private; + struct kutf_context *test_context; + int err = 0; + + test_context = kutf_create_context(test_fix); + if (!test_context) { + err = -ENOMEM; + goto finish; + } + + file->private_data = test_context; + + /* This reference is release by the kutf_run_test */ + kutf_context_get(test_context); + + queue_work(kutf_workq, &test_context->work); + +finish: + return err; +} + +#define USERDATA_WARNING_MESSAGE "WARNING: This test requires userdata\n" + +/** + * kutf_debugfs_run_read() - Debugfs read callback for the "run" entry. + * @file: Opened file to read from + * @buf: User buffer to write the data into + * @len: Amount of data to read + * @ppos: Offset into file to read from + * + * This function emits the results of the test, blocking until they are + * available. + * + * If the test involves user data then this will also return user data records + * to user space. If the test is waiting for user data then this function will + * output a message (to make the likes of 'cat' display it), followed by + * returning 0 to mark the end of file. + * + * Results will be emitted one at a time, once all the results have been read + * 0 will be returned to indicate there is no more data. + * + * Return: Number of bytes read. + */ +static ssize_t kutf_debugfs_run_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct kutf_context *test_context = file->private_data; + struct kutf_result *res; + unsigned long bytes_not_copied; + ssize_t bytes_copied = 0; + char *kutf_str_ptr = NULL; + size_t kutf_str_len = 0; + size_t message_len = 0; + char separator = ':'; + char terminator = '\n'; + + res = kutf_remove_result(test_context->result_set); + + if (IS_ERR(res)) + return PTR_ERR(res); + + /* + * Handle 'fake' results - these results are converted to another + * form before being returned from the kernel + */ + switch (res->status) { + case KUTF_RESULT_TEST_FINISHED: + return 0; + case KUTF_RESULT_USERDATA_WAIT: + if (test_context->userdata.flags & + KUTF_USERDATA_WARNING_OUTPUT) { + /* + * Warning message already output, + * signal end-of-file + */ + return 0; + } + + message_len = sizeof(USERDATA_WARNING_MESSAGE)-1; + if (message_len > len) + message_len = len; + + bytes_not_copied = copy_to_user(buf, + USERDATA_WARNING_MESSAGE, + message_len); + if (bytes_not_copied != 0) + return -EFAULT; + test_context->userdata.flags |= KUTF_USERDATA_WARNING_OUTPUT; + return message_len; + case KUTF_RESULT_USERDATA: + message_len = strlen(res->message); + if (message_len > len-1) { + message_len = len-1; + pr_warn("User data truncated, read not long enough\n"); + } + bytes_not_copied = copy_to_user(buf, res->message, + message_len); + if (bytes_not_copied != 0) { + pr_warn("Failed to copy data to user space buffer\n"); + return -EFAULT; + } + /* Finally the terminator */ + bytes_not_copied = copy_to_user(&buf[message_len], + &terminator, 1); + if (bytes_not_copied != 0) { + pr_warn("Failed to copy data to user space buffer\n"); + return -EFAULT; + } + return message_len+1; + default: + /* Fall through - this is a test result */ + break; + } + + /* Note: This code assumes a result is read completely */ + kutf_result_to_string(&kutf_str_ptr, res->status); + if (kutf_str_ptr) + kutf_str_len = strlen(kutf_str_ptr); + + if (res->message) + message_len = strlen(res->message); + + if ((kutf_str_len + 1 + message_len + 1) > len) { + pr_err("Not enough space in user buffer for a single result"); + return 0; + } + + /* First copy the result string */ + if (kutf_str_ptr) { + bytes_not_copied = copy_to_user(&buf[0], kutf_str_ptr, + kutf_str_len); + bytes_copied += kutf_str_len - bytes_not_copied; + if (bytes_not_copied) + goto exit; + } + + /* Then the separator */ + bytes_not_copied = copy_to_user(&buf[bytes_copied], + &separator, 1); + bytes_copied += 1 - bytes_not_copied; + if (bytes_not_copied) + goto exit; + + /* Finally Next copy the result string */ + if (res->message) { + bytes_not_copied = copy_to_user(&buf[bytes_copied], + res->message, message_len); + bytes_copied += message_len - bytes_not_copied; + if (bytes_not_copied) + goto exit; + } + + /* Finally the terminator */ + bytes_not_copied = copy_to_user(&buf[bytes_copied], + &terminator, 1); + bytes_copied += 1 - bytes_not_copied; + +exit: + return bytes_copied; +} + +/** + * kutf_debugfs_run_write() Debugfs write callback for the "run" entry. + * @file: Opened file to write to + * @buf: User buffer to read the data from + * @len: Amount of data to write + * @ppos: Offset into file to write to + * + * This function allows user and kernel to exchange extra data necessary for + * the test fixture. + * + * The data is added to the first struct kutf_context running the fixture + * + * Return: Number of bytes written + */ +static ssize_t kutf_debugfs_run_write(struct file *file, + const char __user *buf, size_t len, loff_t *ppos) +{ + int ret = 0; + struct kutf_context *test_context = file->private_data; + + if (len > KUTF_MAX_LINE_LENGTH) + return -EINVAL; + + ret = kutf_helper_input_enqueue(test_context, buf, len); + if (ret < 0) + return ret; + + return len; +} + +/** + * kutf_debugfs_run_release() - Debugfs release callback for the "run" entry. + * @inode: File entry representation + * @file: A specific opening of the file + * + * Release any resources that were created during the opening of the file + * + * Note that resources may not be released immediately, that might only happen + * later when other users of the kutf_context release their refcount. + * + * Return: 0 on success + */ +static int kutf_debugfs_run_release(struct inode *inode, struct file *file) +{ + struct kutf_context *test_context = file->private_data; + + kutf_helper_input_enqueue_end_of_data(test_context); + + kutf_context_put(test_context); + return 0; +} + +static const struct file_operations kutf_debugfs_run_ops = { + .owner = THIS_MODULE, + .open = kutf_debugfs_run_open, + .read = kutf_debugfs_run_read, + .write = kutf_debugfs_run_write, + .release = kutf_debugfs_run_release, + .llseek = default_llseek, +}; + +/** + * create_fixture_variant() - Creates a fixture variant for the specified + * test function and index and the debugfs entries + * that represent it. + * @test_func: Test function + * @fixture_index: Fixture index + * + * Return: 0 on success, negative value corresponding to error code in failure + */ +static int create_fixture_variant(struct kutf_test_function *test_func, + unsigned int fixture_index) +{ + struct kutf_test_fixture *test_fix; + char name[11]; /* Enough to print the MAX_UINT32 + the null terminator */ + struct dentry *tmp; + int err; + + test_fix = kmalloc(sizeof(*test_fix), GFP_KERNEL); + if (!test_fix) { + pr_err("Failed to create debugfs directory when adding fixture\n"); + err = -ENOMEM; + goto fail_alloc; + } + + test_fix->test_func = test_func; + test_fix->fixture_index = fixture_index; + + snprintf(name, sizeof(name), "%d", fixture_index); + test_fix->dir = debugfs_create_dir(name, test_func->dir); + if (!test_func->dir) { + pr_err("Failed to create debugfs directory when adding fixture\n"); + /* Might not be the right error, we don't get it passed back to us */ + err = -EEXIST; + goto fail_dir; + } + + tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when adding fixture\n"); + /* Might not be the right error, we don't get it passed back to us */ + err = -EEXIST; + goto fail_file; + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + tmp = debugfs_create_file_unsafe( +#else + tmp = debugfs_create_file( +#endif + "run", 0600, test_fix->dir, + test_fix, + &kutf_debugfs_run_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"run\" when adding fixture\n"); + /* Might not be the right error, we don't get it passed back to us */ + err = -EEXIST; + goto fail_file; + } + + list_add(&test_fix->node, &test_func->variant_list); + return 0; + +fail_file: + debugfs_remove_recursive(test_fix->dir); +fail_dir: + kfree(test_fix); +fail_alloc: + return err; +} + +/** + * kutf_remove_test_variant() - Destroy a previously created fixture variant. + * @test_fix: Test fixture + */ +static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) +{ + debugfs_remove_recursive(test_fix->dir); + kfree(test_fix); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +/* Adapting to the upstream debugfs_create_x32() change */ +static int ktufp_u32_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kutfp_fops_x32_ro, ktufp_u32_get, NULL, "0x%08llx\n"); +#endif + +void kutf_add_test_with_filters_and_data( + struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data test_data) +{ + struct kutf_test_function *test_func; + struct dentry *tmp; + unsigned int i; + + test_func = kmalloc(sizeof(*test_func), GFP_KERNEL); + if (!test_func) { + pr_err("Failed to allocate memory when adding test %s\n", name); + goto fail_alloc; + } + + INIT_LIST_HEAD(&test_func->variant_list); + + test_func->dir = debugfs_create_dir(name, suite->dir); + if (!test_func->dir) { + pr_err("Failed to create debugfs directory when adding test %s\n", name); + goto fail_dir; + } + + tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); + goto fail_file; + } + + test_func->filters = filters; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) + tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir, + &test_func->filters, &kutfp_fops_x32_ro); +#else + tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir, + &test_func->filters); +#endif + if (!tmp) { + pr_err("Failed to create debugfs file \"filters\" when adding test %s\n", name); + goto fail_file; + } + + test_func->test_id = id; + tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, + &test_func->test_id); + if (!tmp) { + pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); + goto fail_file; + } + + for (i = 0; i < suite->fixture_variants; i++) { + if (create_fixture_variant(test_func, i)) { + pr_err("Failed to create fixture %d when adding test %s\n", i, name); + goto fail_file; + } + } + + test_func->suite = suite; + test_func->execute = execute; + test_func->test_data = test_data; + + list_add(&test_func->node, &suite->test_list); + return; + +fail_file: + debugfs_remove_recursive(test_func->dir); +fail_dir: + kfree(test_func); +fail_alloc: + return; +} +EXPORT_SYMBOL(kutf_add_test_with_filters_and_data); + +void kutf_add_test_with_filters( + struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context), + unsigned int filters) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + + kutf_add_test_with_filters_and_data(suite, + id, + name, + execute, + suite->suite_default_flags, + data); +} +EXPORT_SYMBOL(kutf_add_test_with_filters); + +void kutf_add_test(struct kutf_suite *suite, + unsigned int id, + const char *name, + void (*execute)(struct kutf_context *context)) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + + kutf_add_test_with_filters_and_data(suite, + id, + name, + execute, + suite->suite_default_flags, + data); +} +EXPORT_SYMBOL(kutf_add_test); + +/** + * kutf_remove_test(): Remove a previously added test function. + * @test_func: Test function + */ +static void kutf_remove_test(struct kutf_test_function *test_func) +{ + struct list_head *pos; + struct list_head *tmp; + + list_for_each_safe(pos, tmp, &test_func->variant_list) { + struct kutf_test_fixture *test_fix; + + test_fix = list_entry(pos, struct kutf_test_fixture, node); + kutf_remove_test_variant(test_fix); + } + + list_del(&test_func->node); + debugfs_remove_recursive(test_func->dir); + kfree(test_func); +} + +struct kutf_suite *kutf_create_suite_with_filters_and_data( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters, + union kutf_callback_data suite_data) +{ + struct kutf_suite *suite; + struct dentry *tmp; + + suite = kmalloc(sizeof(*suite), GFP_KERNEL); + if (!suite) { + pr_err("Failed to allocate memory when creating suite %s\n", name); + goto fail_kmalloc; + } + + suite->dir = debugfs_create_dir(name, app->dir); + if (!suite->dir) { + pr_err("Failed to create debugfs directory when adding test %s\n", name); + goto fail_debugfs; + } + + tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name); + goto fail_file; + } + + INIT_LIST_HEAD(&suite->test_list); + suite->app = app; + suite->name = name; + suite->fixture_variants = fixture_count; + suite->create_fixture = create_fixture; + suite->remove_fixture = remove_fixture; + suite->suite_default_flags = filters; + suite->suite_data = suite_data; + + list_add(&suite->node, &app->suite_list); + + return suite; + +fail_file: + debugfs_remove_recursive(suite->dir); +fail_debugfs: + kfree(suite); +fail_kmalloc: + return NULL; +} +EXPORT_SYMBOL(kutf_create_suite_with_filters_and_data); + +struct kutf_suite *kutf_create_suite_with_filters( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context), + unsigned int filters) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + return kutf_create_suite_with_filters_and_data(app, + name, + fixture_count, + create_fixture, + remove_fixture, + filters, + data); +} +EXPORT_SYMBOL(kutf_create_suite_with_filters); + +struct kutf_suite *kutf_create_suite( + struct kutf_application *app, + const char *name, + unsigned int fixture_count, + void *(*create_fixture)(struct kutf_context *context), + void (*remove_fixture)(struct kutf_context *context)) +{ + union kutf_callback_data data; + + data.ptr_value = NULL; + return kutf_create_suite_with_filters_and_data(app, + name, + fixture_count, + create_fixture, + remove_fixture, + KUTF_F_TEST_GENERIC, + data); +} +EXPORT_SYMBOL(kutf_create_suite); + +/** + * kutf_destroy_suite() - Destroy a previously added test suite. + * @suite: Test suite + */ +static void kutf_destroy_suite(struct kutf_suite *suite) +{ + struct list_head *pos; + struct list_head *tmp; + + list_for_each_safe(pos, tmp, &suite->test_list) { + struct kutf_test_function *test_func; + + test_func = list_entry(pos, struct kutf_test_function, node); + kutf_remove_test(test_func); + } + + list_del(&suite->node); + debugfs_remove_recursive(suite->dir); + kfree(suite); +} + +struct kutf_application *kutf_create_application(const char *name) +{ + struct kutf_application *app; + struct dentry *tmp; + + app = kmalloc(sizeof(*app), GFP_KERNEL); + if (!app) { + pr_err("Failed to create allocate memory when creating application %s\n", name); + goto fail_kmalloc; + } + + app->dir = debugfs_create_dir(name, base_dir); + if (!app->dir) { + pr_err("Failed to create debugfs direcotry when creating application %s\n", name); + goto fail_debugfs; + } + + tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n", + &kutf_debugfs_const_string_ops); + if (!tmp) { + pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name); + goto fail_file; + } + + INIT_LIST_HEAD(&app->suite_list); + app->name = name; + + return app; + +fail_file: + debugfs_remove_recursive(app->dir); +fail_debugfs: + kfree(app); +fail_kmalloc: + return NULL; +} +EXPORT_SYMBOL(kutf_create_application); + +void kutf_destroy_application(struct kutf_application *app) +{ + struct list_head *pos; + struct list_head *tmp; + + list_for_each_safe(pos, tmp, &app->suite_list) { + struct kutf_suite *suite; + + suite = list_entry(pos, struct kutf_suite, node); + kutf_destroy_suite(suite); + } + + debugfs_remove_recursive(app->dir); + kfree(app); +} +EXPORT_SYMBOL(kutf_destroy_application); + +static struct kutf_context *kutf_create_context( + struct kutf_test_fixture *test_fix) +{ + struct kutf_context *new_context; + + new_context = kmalloc(sizeof(*new_context), GFP_KERNEL); + if (!new_context) { + pr_err("Failed to allocate test context"); + goto fail_alloc; + } + + new_context->result_set = kutf_create_result_set(); + if (!new_context->result_set) { + pr_err("Failed to create result set"); + goto fail_result_set; + } + + new_context->test_fix = test_fix; + /* Save the pointer to the suite as the callbacks will require it */ + new_context->suite = test_fix->test_func->suite; + new_context->status = KUTF_RESULT_UNKNOWN; + new_context->expected_status = KUTF_RESULT_UNKNOWN; + + kutf_mempool_init(&new_context->fixture_pool); + new_context->fixture = NULL; + new_context->fixture_index = test_fix->fixture_index; + new_context->fixture_name = NULL; + new_context->test_data = test_fix->test_func->test_data; + + new_context->userdata.flags = 0; + INIT_LIST_HEAD(&new_context->userdata.input_head); + init_waitqueue_head(&new_context->userdata.input_waitq); + + INIT_WORK(&new_context->work, kutf_run_test); + + kref_init(&new_context->kref); + + return new_context; + +fail_result_set: + kfree(new_context); +fail_alloc: + return NULL; +} + +static void kutf_destroy_context(struct kref *kref) +{ + struct kutf_context *context; + + context = container_of(kref, struct kutf_context, kref); + kutf_destroy_result_set(context->result_set); + kutf_mempool_destroy(&context->fixture_pool); + kfree(context); +} + +static void kutf_context_get(struct kutf_context *context) +{ + kref_get(&context->kref); +} + +static void kutf_context_put(struct kutf_context *context) +{ + kref_put(&context->kref, kutf_destroy_context); +} + + +static void kutf_set_result(struct kutf_context *context, + enum kutf_result_status status) +{ + context->status = status; +} + +static void kutf_set_expected_result(struct kutf_context *context, + enum kutf_result_status expected_status) +{ + context->expected_status = expected_status; +} + +/** + * kutf_test_log_result() - Log a result for the specified test context + * @context: Test context + * @message: Result string + * @new_status: Result status + */ +static void kutf_test_log_result( + struct kutf_context *context, + const char *message, + enum kutf_result_status new_status) +{ + if (context->status < new_status) + context->status = new_status; + + if (context->expected_status != new_status) + kutf_add_result(context, new_status, message); +} + +void kutf_test_log_result_external( + struct kutf_context *context, + const char *message, + enum kutf_result_status new_status) +{ + kutf_test_log_result(context, message, new_status); +} +EXPORT_SYMBOL(kutf_test_log_result_external); + +void kutf_test_expect_abort(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_ABORT); +} +EXPORT_SYMBOL(kutf_test_expect_abort); + +void kutf_test_expect_fatal(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_FATAL); +} +EXPORT_SYMBOL(kutf_test_expect_fatal); + +void kutf_test_expect_fail(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_FAIL); +} +EXPORT_SYMBOL(kutf_test_expect_fail); + +void kutf_test_expect_warn(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_WARN); +} +EXPORT_SYMBOL(kutf_test_expect_warn); + +void kutf_test_expect_pass(struct kutf_context *context) +{ + kutf_set_expected_result(context, KUTF_RESULT_PASS); +} +EXPORT_SYMBOL(kutf_test_expect_pass); + +void kutf_test_skip(struct kutf_context *context) +{ + kutf_set_result(context, KUTF_RESULT_SKIP); + kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + + kutf_test_log_result(context, "Test skipped", KUTF_RESULT_SKIP); +} +EXPORT_SYMBOL(kutf_test_skip); + +void kutf_test_skip_msg(struct kutf_context *context, const char *message) +{ + kutf_set_result(context, KUTF_RESULT_SKIP); + kutf_set_expected_result(context, KUTF_RESULT_UNKNOWN); + + kutf_test_log_result(context, kutf_dsprintf(&context->fixture_pool, + "Test skipped: %s", message), KUTF_RESULT_SKIP); + kutf_test_log_result(context, "!!!Test skipped!!!", KUTF_RESULT_SKIP); +} +EXPORT_SYMBOL(kutf_test_skip_msg); + +void kutf_test_debug(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_DEBUG); +} +EXPORT_SYMBOL(kutf_test_debug); + +void kutf_test_pass(struct kutf_context *context, char const *message) +{ + static const char explicit_message[] = "(explicit pass)"; + + if (!message) + message = explicit_message; + + kutf_test_log_result(context, message, KUTF_RESULT_PASS); +} +EXPORT_SYMBOL(kutf_test_pass); + +void kutf_test_info(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_INFO); +} +EXPORT_SYMBOL(kutf_test_info); + +void kutf_test_warn(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_WARN); +} +EXPORT_SYMBOL(kutf_test_warn); + +void kutf_test_fail(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_FAIL); +} +EXPORT_SYMBOL(kutf_test_fail); + +void kutf_test_fatal(struct kutf_context *context, char const *message) +{ + kutf_test_log_result(context, message, KUTF_RESULT_FATAL); +} +EXPORT_SYMBOL(kutf_test_fatal); + +void kutf_test_abort(struct kutf_context *context) +{ + kutf_test_log_result(context, "", KUTF_RESULT_ABORT); +} +EXPORT_SYMBOL(kutf_test_abort); + +#ifdef CONFIG_DEBUG_FS + +/** + * init_kutf_core() - Module entry point. + * + * Create the base entry point in debugfs. + */ +static int __init init_kutf_core(void) +{ + kutf_workq = alloc_workqueue("kutf workq", WQ_UNBOUND, 1); + if (!kutf_workq) + return -ENOMEM; + + base_dir = debugfs_create_dir("kutf_tests", NULL); + if (!base_dir) { + destroy_workqueue(kutf_workq); + kutf_workq = NULL; + return -ENOMEM; + } + + return 0; +} + +/** + * exit_kutf_core() - Module exit point. + * + * Remove the base entry point in debugfs. + */ +static void __exit exit_kutf_core(void) +{ + debugfs_remove_recursive(base_dir); + + if (kutf_workq) + destroy_workqueue(kutf_workq); +} + +#else /* CONFIG_DEBUG_FS */ + +/** + * init_kutf_core() - Module entry point. + * + * Stub for when build against a kernel without debugfs support + */ +static int __init init_kutf_core(void) +{ + pr_debug("KUTF requires a kernel with debug fs support"); + + return -ENODEV; +} + +/** + * exit_kutf_core() - Module exit point. + * + * Stub for when build against a kernel without debugfs support + */ +static void __exit exit_kutf_core(void) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +MODULE_LICENSE("GPL"); + +module_init(init_kutf_core); +module_exit(exit_kutf_core); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c new file mode 100644 index 0000000..7f5ac51 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c @@ -0,0 +1,76 @@ +/* + * + * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* Kernel UTF utility functions */ + +#include +#include +#include +#include + +#include +#include + +static char tmp_buffer[KUTF_MAX_DSPRINTF_LEN]; + +DEFINE_MUTEX(buffer_lock); + +const char *kutf_dsprintf(struct kutf_mempool *pool, + const char *fmt, ...) +{ + va_list args; + int len; + int size; + void *buffer; + + mutex_lock(&buffer_lock); + va_start(args, fmt); + len = vsnprintf(tmp_buffer, sizeof(tmp_buffer), fmt, args); + va_end(args); + + if (len < 0) { + pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt); + goto fail_format; + } + + if (len >= sizeof(tmp_buffer)) { + pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt); + size = sizeof(tmp_buffer); + } else { + size = len + 1; + } + + buffer = kutf_mempool_alloc(pool, size); + if (!buffer) + goto fail_alloc; + + memcpy(buffer, tmp_buffer, size); + mutex_unlock(&buffer_lock); + + return buffer; + +fail_alloc: +fail_format: + mutex_unlock(&buffer_lock); + return NULL; +} +EXPORT_SYMBOL(kutf_dsprintf); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild new file mode 100644 index 0000000..ca8c512 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild @@ -0,0 +1,26 @@ +# +# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# SPDX-License-Identifier: GPL-2.0 +# +# + +ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android + +obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o + +mali_kutf_irq_test-y := mali_kutf_irq_test_main.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile similarity index 100% rename from dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile rename to bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Makefile diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp new file mode 100644 index 0000000..90efdcf --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp @@ -0,0 +1,35 @@ +/* + * + * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +bob_kernel_module { + name: "mali_kutf_irq_test", + defaults: [ + "mali_kbase_shared_config_defaults", + "kernel_test_includes", + ], + srcs: [ + "Kbuild", + "mali_kutf_irq_test_main.c", + ], + extra_symbols: [ + "mali_kbase", + "kutf", + ], + enabled: false, + base_build_kutf: { + enabled: true, + kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"], + }, +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c new file mode 100644 index 0000000..26b442a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -0,0 +1,278 @@ +/* + * + * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include +#include +#include + +#include "mali_kbase.h" +#include +#include + +#include +#include + +/* + * This file contains the code which is used for measuring interrupt latency + * of the Mali GPU IRQ. In particular, function mali_kutf_irq_latency() is + * used with this purpose and it is called within KUTF framework - a kernel + * unit test framework. The measured latency provided by this test should + * be representative for the latency of the Mali JOB/MMU IRQs as well. + */ + +/* KUTF test application pointer for this test */ +struct kutf_application *irq_app; + +/** + * struct kutf_irq_fixture data - test fixture used by the test functions. + * @kbdev: kbase device for the GPU. + * + */ +struct kutf_irq_fixture_data { + struct kbase_device *kbdev; +}; + +#define SEC_TO_NANO(s) ((s)*1000000000LL) + +/* ID for the GPU IRQ */ +#define GPU_IRQ_HANDLER 2 + +#define NR_TEST_IRQS ((u32)1000000) + +/* IRQ for the test to trigger. Currently POWER_CHANGED_SINGLE as it is + * otherwise unused in the DDK + */ +#define TEST_IRQ POWER_CHANGED_SINGLE + +#define IRQ_TIMEOUT HZ + +/* Kernel API for setting irq throttle hook callback and irq time in us*/ +extern int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type); +extern irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val); + +static DECLARE_WAIT_QUEUE_HEAD(wait); +static bool triggered; +static u64 irq_time; + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + +/** + * kbase_gpu_irq_custom_handler - Custom IRQ throttle handler + * @irq: IRQ number + * @data: Data associated with this IRQ + * + * Return: state of the IRQ + */ +static irqreturn_t kbase_gpu_irq_custom_handler(int irq, void *data) +{ + struct kbase_device *kbdev = kbase_untag(data); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS)); + irqreturn_t result; + u64 tval; + bool has_test_irq = val & TEST_IRQ; + + if (has_test_irq) { + tval = ktime_get_real_ns(); + /* Clear the test source only here */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + TEST_IRQ); + /* Remove the test IRQ status bit */ + val = val ^ TEST_IRQ; + } + + result = kbase_gpu_irq_test_handler(irq, data, val); + + if (has_test_irq) { + irq_time = tval; + triggered = true; + wake_up(&wait); + result = IRQ_HANDLED; + } + + return result; +} + +/** + * mali_kutf_irq_default_create_fixture() - Creates the fixture data required + * for all the tests in the irq suite. + * @context: KUTF context. + * + * Return: Fixture data created on success or NULL on failure + */ +static void *mali_kutf_irq_default_create_fixture( + struct kutf_context *context) +{ + struct kutf_irq_fixture_data *data; + + data = kutf_mempool_alloc(&context->fixture_pool, + sizeof(struct kutf_irq_fixture_data)); + + if (!data) + goto fail; + + /* Acquire the kbase device */ + data->kbdev = kbase_find_device(-1); + if (data->kbdev == NULL) { + kutf_test_fail(context, "Failed to find kbase device"); + goto fail; + } + + return data; + +fail: + return NULL; +} + +/** + * mali_kutf_irq_default_remove_fixture() - Destroy fixture data previously + * created by mali_kutf_irq_default_create_fixture. + * + * @context: KUTF context. + */ +static void mali_kutf_irq_default_remove_fixture( + struct kutf_context *context) +{ + struct kutf_irq_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + + kbase_release_device(kbdev); +} + +/** + * mali_kutf_irq_latency() - measure GPU IRQ latency + * @context: kutf context within which to perform the test + * + * The test triggers IRQs manually, and measures the + * time between triggering the IRQ and the IRQ handler being executed. + * + * This is not a traditional test, in that the pass/fail status has little + * meaning (other than indicating that the IRQ handler executed at all). Instead + * the results are in the latencies provided with the test result. There is no + * meaningful pass/fail result that can be obtained here, instead the latencies + * are provided for manual analysis only. + */ +static void mali_kutf_irq_latency(struct kutf_context *context) +{ + struct kutf_irq_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + u64 min_time = U64_MAX, max_time = 0, average_time = 0; + u32 i; + const char *results; + + /* Force GPU to be powered */ + kbase_pm_context_active(kbdev); + kbase_pm_wait_for_desired_state(kbdev); + + kbase_set_custom_irq_handler(kbdev, kbase_gpu_irq_custom_handler, + GPU_IRQ_HANDLER); + + for (i = 1; i <= NR_TEST_IRQS; i++) { + u64 start_time = ktime_get_real_ns(); + + triggered = false; + + /* Trigger fake IRQ */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), + TEST_IRQ); + + if (wait_event_timeout(wait, triggered, IRQ_TIMEOUT) == 0) { + /* Wait extra time to see if it would come */ + wait_event_timeout(wait, triggered, 10 * IRQ_TIMEOUT); + break; + } + + if ((irq_time - start_time) < min_time) + min_time = irq_time - start_time; + if ((irq_time - start_time) > max_time) + max_time = irq_time - start_time; + average_time += irq_time - start_time; + + udelay(10); + } + + /* Go back to default handler */ + kbase_set_custom_irq_handler(kbdev, NULL, GPU_IRQ_HANDLER); + + kbase_pm_context_idle(kbdev); + + if (i > NR_TEST_IRQS) { + do_div(average_time, NR_TEST_IRQS); + results = kutf_dsprintf(&context->fixture_pool, + "Min latency = %lldns, Max latency = %lldns, Average latency = %lldns\n", + min_time, max_time, average_time); + kutf_test_pass(context, results); + } else { + results = kutf_dsprintf(&context->fixture_pool, + "Timed out for the %u-th IRQ (loop_limit: %u), triggered late: %d\n", + i, NR_TEST_IRQS, triggered); + kutf_test_fail(context, results); + } +} + +/** + * Module entry point for this test. + */ +int mali_kutf_irq_test_main_init(void) +{ + struct kutf_suite *suite; + + irq_app = kutf_create_application("irq"); + + if (NULL == irq_app) { + pr_warn("Creation of test application failed!\n"); + return -ENOMEM; + } + + suite = kutf_create_suite(irq_app, "irq_default", + 1, mali_kutf_irq_default_create_fixture, + mali_kutf_irq_default_remove_fixture); + + if (NULL == suite) { + pr_warn("Creation of test suite failed!\n"); + kutf_destroy_application(irq_app); + return -ENOMEM; + } + + kutf_add_test(suite, 0x0, "irq_latency", + mali_kutf_irq_latency); + return 0; +} + +/** + * Module exit point for this test. + */ +void mali_kutf_irq_test_main_exit(void) +{ + kutf_destroy_application(irq_app); +} + +module_init(mali_kutf_irq_test_main_init); +module_exit(mali_kutf_irq_test_main_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ARM Ltd."); +MODULE_VERSION("1.0"); diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c new file mode 100644 index 0000000..f266d8e --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c @@ -0,0 +1,366 @@ +/* + * + * (C) COPYRIGHT ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + *//* + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "linux/mman.h" +#include "../mali_kbase.h" + +/* mali_kbase_mmap.c + * + * This file contains Linux specific implementation of + * kbase_context_get_unmapped_area() interface. + */ + + +/** + * align_and_check() - Align the specified pointer to the provided alignment and + * check that it is still in range. + * @gap_end: Highest possible start address for allocation (end of gap in + * address space) + * @gap_start: Start address of current memory area / gap in address space + * @info: vm_unmapped_area_info structure passed to caller, containing + * alignment, length and limits for the allocation + * @is_shader_code: True if the allocation is for shader code (which has + * additional alignment requirements) + * @is_same_4gb_page: True if the allocation needs to reside completely within + * a 4GB chunk + * + * Return: true if gap_end is now aligned correctly and is still in range, + * false otherwise + */ +static bool align_and_check(unsigned long *gap_end, unsigned long gap_start, + struct vm_unmapped_area_info *info, bool is_shader_code, + bool is_same_4gb_page) +{ + /* Compute highest gap address at the desired alignment */ + (*gap_end) -= info->length; + (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask; + + if (is_shader_code) { + /* Check for 4GB boundary */ + if (0 == (*gap_end & BASE_MEM_MASK_4GB)) + (*gap_end) -= (info->align_offset ? info->align_offset : + info->length); + if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) + (*gap_end) -= (info->align_offset ? info->align_offset : + info->length); + + if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end + + info->length) & BASE_MEM_MASK_4GB)) + return false; + } else if (is_same_4gb_page) { + unsigned long start = *gap_end; + unsigned long end = *gap_end + info->length; + unsigned long mask = ~((unsigned long)U32_MAX); + + /* Check if 4GB boundary is straddled */ + if ((start & mask) != ((end - 1) & mask)) { + unsigned long offset = end - (end & mask); + /* This is to ensure that alignment doesn't get + * disturbed in an attempt to prevent straddling at + * 4GB boundary. The GPU VA is aligned to 2MB when the + * allocation size is > 2MB and there is enough CPU & + * GPU virtual space. + */ + unsigned long rounded_offset = + ALIGN(offset, info->align_mask + 1); + + start -= rounded_offset; + end -= rounded_offset; + + *gap_end = start; + + /* The preceding 4GB boundary shall not get straddled, + * even after accounting for the alignment, as the + * size of allocation is limited to 4GB and the initial + * start location was already aligned. + */ + WARN_ON((start & mask) != ((end - 1) & mask)); + } + } + + + if ((*gap_end < info->low_limit) || (*gap_end < gap_start)) + return false; + + + return true; +} + +/** + * kbase_unmapped_area_topdown() - allocates new areas top-down from + * below the stack limit. + * @info: Information about the memory area to allocate. + * @is_shader_code: Boolean which denotes whether the allocated area is + * intended for the use by shader core in which case a + * special alignment requirements apply. + * @is_same_4gb_page: Boolean which indicates whether the allocated area needs + * to reside completely within a 4GB chunk. + * + * The unmapped_area_topdown() function in the Linux kernel is not exported + * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a + * module and also make use of the fact that some of the requirements for + * the unmapped area are known in advance, we implemented an extended version + * of this function and prefixed it with 'kbase_'. + * + * The difference in the call parameter list comes from the fact that + * kbase_unmapped_area_topdown() is called with additional parameters which + * are provided to indicate whether the allocation is for a shader core memory, + * which has additional alignment requirements, and whether the allocation can + * straddle a 4GB boundary. + * + * The modification of the original Linux function lies in how the computation + * of the highest gap address at the desired alignment is performed once the + * gap with desirable properties is found. For this purpose a special function + * is introduced (@ref align_and_check()) which beside computing the gap end + * at the desired alignment also performs additional alignment checks for the + * case when the memory is executable shader core memory, for which it is + * ensured that the gap does not end on a 4GB boundary, and for the case when + * memory needs to be confined within a 4GB chunk. + * + * Return: address of the found gap end (high limit) if area is found; + * -ENOMEM if search is unsuccessful +*/ + +static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info + *info, bool is_shader_code, bool is_same_4gb_page) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long length, low_limit, high_limit, gap_start, gap_end; + + /* Adjust search length to account for worst case alignment overhead */ + length = info->length + info->align_mask; + if (length < info->length) + return -ENOMEM; + + /* + * Adjust search limits by the desired length. + * See implementation comment at top of unmapped_area(). + */ + gap_end = info->high_limit; + if (gap_end < length) + return -ENOMEM; + high_limit = gap_end - length; + + if (info->low_limit > high_limit) + return -ENOMEM; + low_limit = info->low_limit + length; + + /* Check highest gap, which does not precede any rbtree node */ + gap_start = mm->highest_vm_end; + if (gap_start <= high_limit) { + if (align_and_check(&gap_end, gap_start, info, + is_shader_code, is_same_4gb_page)) + return gap_end; + } + + /* Check if rbtree root looks promising */ + if (RB_EMPTY_ROOT(&mm->mm_rb)) + return -ENOMEM; + vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); + if (vma->rb_subtree_gap < length) + return -ENOMEM; + + while (true) { + /* Visit right subtree if it looks promising */ + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + if (gap_start <= high_limit && vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb); + if (right->rb_subtree_gap >= length) { + vma = right; + continue; + } + } + +check_current: + /* Check if current node has a suitable gap */ + gap_end = vma->vm_start; + if (gap_end < low_limit) + return -ENOMEM; + if (gap_start <= high_limit && gap_end - gap_start >= length) { + /* We found a suitable gap. Clip it with the original + * high_limit. */ + if (gap_end > info->high_limit) + gap_end = info->high_limit; + + if (align_and_check(&gap_end, gap_start, info, + is_shader_code, is_same_4gb_page)) + return gap_end; + } + + /* Visit left subtree if it looks promising */ + if (vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb); + if (left->rb_subtree_gap >= length) { + vma = left; + continue; + } + } + + /* Go back up the rbtree to find next candidate node */ + while (true) { + struct rb_node *prev = &vma->vm_rb; + + if (!rb_parent(prev)) + return -ENOMEM; + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_right) { + gap_start = vma->vm_prev ? + vma->vm_prev->vm_end : 0; + goto check_current; + } + } + } + + return -ENOMEM; +} + + +/* This function is based on Linux kernel's arch_get_unmapped_area, but + * simplified slightly. Modifications come from the fact that some values + * about the memory area are known in advance. + */ +unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_unmapped_area_info info; + unsigned long align_offset = 0; + unsigned long align_mask = 0; + unsigned long high_limit = mm->mmap_base; + unsigned long low_limit = PAGE_SIZE; + int cpu_va_bits = BITS_PER_LONG; + int gpu_pc_bits = + kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + bool is_shader_code = false; + bool is_same_4gb_page = false; + unsigned long ret; + + /* err on fixed address */ + if ((flags & MAP_FIXED) || addr) + return -EINVAL; + +#ifdef CONFIG_64BIT + /* too big? */ + if (len > TASK_SIZE - SZ_2M) + return -ENOMEM; + + if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { + + high_limit = min_t(unsigned long, mm->mmap_base, + (kctx->same_va_end << PAGE_SHIFT)); + + /* If there's enough (> 33 bits) of GPU VA space, align + * to 2MB boundaries. + */ + if (kctx->kbdev->gpu_props.mmu.va_bits > 33) { + if (len >= SZ_2M) { + align_offset = SZ_2M; + align_mask = SZ_2M - 1; + } + } + + low_limit = SZ_2M; + } else { + cpu_va_bits = 32; + } +#endif /* CONFIG_64BIT */ + if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) && + (PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) { + int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + struct kbase_va_region *reg; + + /* Need to hold gpu vm lock when using reg */ + kbase_gpu_vm_lock(kctx); + reg = kctx->pending_regions[cookie]; + if (!reg) { + kbase_gpu_vm_unlock(kctx); + return -EINVAL; + } + if (!(reg->flags & KBASE_REG_GPU_NX)) { + if (cpu_va_bits > gpu_pc_bits) { + align_offset = 1ULL << gpu_pc_bits; + align_mask = align_offset - 1; + is_shader_code = true; + } + } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { + unsigned long extent_bytes = + (unsigned long)(reg->extent << PAGE_SHIFT); + /* kbase_check_alloc_sizes() already satisfies + * these checks, but they're here to avoid + * maintenance hazards due to the assumptions + * involved */ + WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT)); + WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); + WARN_ON(!is_power_of_2(extent_bytes)); + align_mask = extent_bytes - 1; + align_offset = + extent_bytes - (reg->initial_commit << PAGE_SHIFT); + } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { + is_same_4gb_page = true; + } + kbase_gpu_vm_unlock(kctx); +#ifndef CONFIG_64BIT + } else { + return current->mm->get_unmapped_area( + kctx->filp, addr, len, pgoff, flags); +#endif + } + + info.flags = 0; + info.length = len; + info.low_limit = low_limit; + info.high_limit = high_limit; + info.align_offset = align_offset; + info.align_mask = align_mask; + + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); + + if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && + high_limit < (kctx->same_va_end << PAGE_SHIFT)) { + /* Retry above mmap_base */ + info.low_limit = mm->mmap_base; + info.high_limit = min_t(u64, TASK_SIZE, + (kctx->same_va_end << PAGE_SHIFT)); + + ret = kbase_unmapped_area_topdown(&info, is_shader_code, + is_same_4gb_page); + } + + return ret; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c new file mode 100644 index 0000000..c368ac7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "../mali_kbase_tracepoints.h" +#include "../mali_kbase_timeline.h" +#include "../mali_kbase_timeline_priv.h" + +#include + +void kbase_create_timeline_objects(struct kbase_device *kbdev) +{ + unsigned int lpu_id; + unsigned int as_nr; + struct kbase_context *kctx; + struct kbase_timeline *timeline = kbdev->timeline; + struct kbase_tlstream *summary = + &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + + /* Summarize the LPU objects. */ + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + u32 *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + __kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu); + } + + /* Summarize the Address Space objects. */ + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); + + /* Create GPU object and make it retain all LPUs and address spaces. */ + __kbase_tlstream_tl_new_gpu(summary, + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores); + + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + void *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + __kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev); + } + + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_lifelink_as_gpu(summary, + &kbdev->as[as_nr], + kbdev); + + /* Lock the context list, to ensure no changes to the list are made + * while we're summarizing the contexts and their contents. + */ + mutex_lock(&kbdev->kctx_list_lock); + + /* For each context in the device... */ + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* Summarize the context itself */ + __kbase_tlstream_tl_new_ctx(summary, + kctx, + kctx->id, + (u32)(kctx->tgid)); + }; + + /* Reset body stream buffers while holding the kctx lock. + * This ensures we can't fire both summary and normal tracepoints for + * the same objects. + * If we weren't holding the lock, it's possible that the summarized + * objects could have been created, destroyed, or used after we + * constructed the summary stream tracepoints, but before we reset + * the body stream, resulting in losing those object event tracepoints. + */ + kbase_timeline_streams_body_reset(timeline); + + mutex_unlock(&kbdev->kctx_list_lock); + + /* Static object are placed into summary packet that needs to be + * transmitted first. Flush all streams to make it available to + * user space. + */ + kbase_timeline_streams_flush(timeline); +} \ No newline at end of file diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c new file mode 100644 index 0000000..88fba83 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c @@ -0,0 +1,274 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_timeline.h" +#include "mali_kbase_timeline_priv.h" +#include "mali_kbase_tracepoints.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* The period of autoflush checker execution in milliseconds. */ +#define AUTOFLUSH_INTERVAL 1000 /* ms */ + +/*****************************************************************************/ + +/* These values are used in mali_kbase_tracepoints.h + * to retrieve the streams from a kbase_timeline instance. + */ +const size_t __obj_stream_offset = + offsetof(struct kbase_timeline, streams) + + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ; + +const size_t __aux_stream_offset = + offsetof(struct kbase_timeline, streams) + + sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX; + +/** + * kbasep_timeline_autoflush_timer_callback - autoflush timer callback + * @timer: Timer list + * + * Timer is executed periodically to check if any of the stream contains + * buffer ready to be submitted to user space. + */ +static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer) +{ + enum tl_stream_type stype; + int rcode; + struct kbase_timeline *timeline = + container_of(timer, struct kbase_timeline, autoflush_timer); + + CSTD_UNUSED(timer); + + for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; + stype++) { + struct kbase_tlstream *stream = &timeline->streams[stype]; + + int af_cnt = atomic_read(&stream->autoflush_counter); + + /* Check if stream contain unflushed data. */ + if (af_cnt < 0) + continue; + + /* Check if stream should be flushed now. */ + if (af_cnt != atomic_cmpxchg( + &stream->autoflush_counter, + af_cnt, + af_cnt + 1)) + continue; + if (!af_cnt) + continue; + + /* Autoflush this stream. */ + kbase_tlstream_flush_stream(stream); + } + + if (atomic_read(&timeline->autoflush_timer_active)) + rcode = mod_timer( + &timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); +} + + + +/*****************************************************************************/ + +int kbase_timeline_init(struct kbase_timeline **timeline, + atomic_t *timeline_flags) +{ + enum tl_stream_type i; + struct kbase_timeline *result; + + if (!timeline || !timeline_flags) + return -EINVAL; + + result = kzalloc(sizeof(*result), GFP_KERNEL); + if (!result) + return -ENOMEM; + + mutex_init(&result->reader_lock); + init_waitqueue_head(&result->event_queue); + + /* Prepare stream structures. */ + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) + kbase_tlstream_init(&result->streams[i], i, + &result->event_queue); + + /* Initialize autoflush timer. */ + atomic_set(&result->autoflush_timer_active, 0); + kbase_timer_setup(&result->autoflush_timer, + kbasep_timeline_autoflush_timer_callback); + result->timeline_flags = timeline_flags; + + + *timeline = result; + return 0; +} + +void kbase_timeline_term(struct kbase_timeline *timeline) +{ + enum tl_stream_type i; + + if (!timeline) + return; + + + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) + kbase_tlstream_term(&timeline->streams[i]); + + kfree(timeline); +} + +#ifdef CONFIG_MALI_DEVFREQ +static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) +{ + struct devfreq *devfreq = kbdev->devfreq; + + /* Devfreq initialization failure isn't a fatal error, so devfreq might + * be null. + */ + if (devfreq) { + unsigned long cur_freq = 0; + + mutex_lock(&devfreq->lock); +#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE + cur_freq = kbdev->current_nominal_freq; +#else + cur_freq = devfreq->last_status.current_frequency; +#endif + KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq); + mutex_unlock(&devfreq->lock); + } +} +#endif /* CONFIG_MALI_DEVFREQ */ + +int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) +{ + int ret; + u32 timeline_flags = TLSTREAM_ENABLED | flags; + struct kbase_timeline *timeline = kbdev->timeline; + + if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { + int rcode; + + ret = anon_inode_getfd( + "[mali_tlstream]", + &kbasep_tlstream_fops, + timeline, + O_RDONLY | O_CLOEXEC); + if (ret < 0) { + atomic_set(timeline->timeline_flags, 0); + return ret; + } + + /* Reset and initialize header streams. */ + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]); + + timeline->obj_header_btc = obj_desc_header_size; + timeline->aux_header_btc = aux_desc_header_size; + + /* Start autoflush timer. */ + atomic_set(&timeline->autoflush_timer_active, 1); + rcode = mod_timer( + &timeline->autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + + /* If job dumping is enabled, readjust the software event's + * timeout as the default value of 3 seconds is often + * insufficient. + */ + if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) { + dev_info(kbdev->dev, + "Job dumping is enabled, readjusting the software event's timeout\n"); + atomic_set(&kbdev->js_data.soft_job_timeout_ms, + 1800000); + } + + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be + * read by client. + */ + kbase_create_timeline_objects(kbdev); + +#ifdef CONFIG_MALI_DEVFREQ + /* Devfreq target tracepoints are only fired when the target + * changes, so we won't know the current target unless we + * send it now. + */ + kbase_tlstream_current_devfreq_target(kbdev); +#endif /* CONFIG_MALI_DEVFREQ */ + + } else { + ret = -EBUSY; + } + + return ret; +} + +void kbase_timeline_streams_flush(struct kbase_timeline *timeline) +{ + enum tl_stream_type stype; + + for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) + kbase_tlstream_flush_stream(&timeline->streams[stype]); +} + +void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) +{ + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_OBJ]); + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_AUX]); +} + +#if MALI_UNIT_TEST +void kbase_timeline_stats(struct kbase_timeline *timeline, + u32 *bytes_collected, u32 *bytes_generated) +{ + enum tl_stream_type stype; + + KBASE_DEBUG_ASSERT(bytes_collected); + + /* Accumulate bytes generated per stream */ + *bytes_generated = 0; + for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT; + stype++) + *bytes_generated += atomic_read( + &timeline->streams[stype].bytes_generated); + + *bytes_collected = atomic_read(&timeline->bytes_collected); +} +#endif /* MALI_UNIT_TEST */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h new file mode 100644 index 0000000..cd48411 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h @@ -0,0 +1,121 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TIMELINE_H) +#define _KBASE_TIMELINE_H + +#include + +/*****************************************************************************/ + +struct kbase_timeline; + +/** + * kbase_timeline_init - initialize timeline infrastructure in kernel + * @timeline: Newly created instance of kbase_timeline will be stored in + * this pointer. + * @timeline_flags: Timeline status will be written to this variable when a + * client is attached/detached. The variable must be valid + * while timeline instance is valid. + * Return: zero on success, negative number on error + */ +int kbase_timeline_init(struct kbase_timeline **timeline, + atomic_t *timeline_flags); + +/** + * kbase_timeline_term - terminate timeline infrastructure in kernel + * + * @timeline: Timeline instance to be terminated. It must be previously created + * with kbase_timeline_init(). + */ +void kbase_timeline_term(struct kbase_timeline *timeline); + +/** + * kbase_timeline_io_acquire - acquire timeline stream file descriptor + * @kbdev: Kbase device + * @flags: Timeline stream flags + * + * This descriptor is meant to be used by userspace timeline to gain access to + * kernel timeline stream. This stream is later broadcasted by user space to the + * timeline client. + * Only one entity can own the descriptor at any given time. Descriptor shall be + * closed if unused. If descriptor cannot be obtained (i.e. when it is already + * being used) return will be a negative value. + * + * Return: file descriptor on success, negative number on error + */ +int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags); + +/** + * kbase_timeline_streams_flush - flush timeline streams. + * @timeline: Timeline instance + * + * Function will flush pending data in all timeline streams. + */ +void kbase_timeline_streams_flush(struct kbase_timeline *timeline); + +/** + * kbase_timeline_streams_body_reset - reset timeline body streams. + * + * Function will discard pending data in all timeline body streams. + * @timeline: Timeline instance + */ +void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline); + +#if MALI_UNIT_TEST +/** + * kbase_timeline_test - start timeline stream data generator + * @kbdev: Kernel common context + * @tpw_count: Number of trace point writers in each context + * @msg_delay: Time delay in milliseconds between trace points written by one + * writer + * @msg_count: Number of trace points written by one writer + * @aux_msg: If non-zero aux messages will be included + * + * This test starts a requested number of asynchronous writers in both IRQ and + * thread context. Each writer will generate required number of test + * tracepoints (tracepoints with embedded information about writer that + * should be verified by user space reader). Tracepoints will be emitted in + * all timeline body streams. If aux_msg is non-zero writer will also + * generate not testable tracepoints (tracepoints without information about + * writer). These tracepoints are used to check correctness of remaining + * timeline message generating functions. Writer will wait requested time + * between generating another set of messages. This call blocks until all + * writers finish. + */ +void kbase_timeline_test( + struct kbase_device *kbdev, + unsigned int tpw_count, + unsigned int msg_delay, + unsigned int msg_count, + int aux_msg); + +/** + * kbase_timeline_stats - read timeline stream statistics + * @timeline: Timeline instance + * @bytes_collected: Will hold number of bytes read by the user + * @bytes_generated: Will hold number of bytes generated by trace points + */ +void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated); +#endif /* MALI_UNIT_TEST */ + +#endif /* _KBASE_TIMELINE_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c new file mode 100644 index 0000000..cdde928 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c @@ -0,0 +1,329 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_timeline_priv.h" +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tracepoints.h" + +#include + +/* The timeline stream file operations functions. */ +static ssize_t kbasep_timeline_io_read( + struct file *filp, + char __user *buffer, + size_t size, + loff_t *f_pos); +static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait); +static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); + +/* The timeline stream file operations structure. */ +const struct file_operations kbasep_tlstream_fops = { + .owner = THIS_MODULE, + .release = kbasep_timeline_io_release, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, +}; + +/** + * kbasep_timeline_io_packet_pending - check timeline streams for pending packets + * @timeline: Timeline instance + * @ready_stream: Pointer to variable where stream will be placed + * @rb_idx_raw: Pointer to variable where read buffer index will be placed + * + * Function checks all streams for pending packets. It will stop as soon as + * packet ready to be submitted to user space is detected. Variables under + * pointers, passed as the parameters to this function will be updated with + * values pointing to right stream and buffer. + * + * Return: non-zero if any of timeline streams has at last one packet ready + */ +static int kbasep_timeline_io_packet_pending( + struct kbase_timeline *timeline, + struct kbase_tlstream **ready_stream, + unsigned int *rb_idx_raw) +{ + enum tl_stream_type i; + + KBASE_DEBUG_ASSERT(ready_stream); + KBASE_DEBUG_ASSERT(rb_idx_raw); + + for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; ++i) { + struct kbase_tlstream *stream = &timeline->streams[i]; + *rb_idx_raw = atomic_read(&stream->rbi); + /* Read buffer index may be updated by writer in case of + * overflow. Read and write buffer indexes must be + * loaded in correct order. + */ + smp_rmb(); + if (atomic_read(&stream->wbi) != *rb_idx_raw) { + *ready_stream = stream; + return 1; + } + + } + + return 0; +} + +/** + * copy_stream_header() - copy timeline stream header. + * + * @buffer: Pointer to the buffer provided by user. + * @size: Maximum amount of data that can be stored in the buffer. + * @copy_len: Pointer to amount of bytes that has been copied already + * within the read system call. + * @hdr: Pointer to the stream header. + * @hdr_size: Header size. + * @hdr_btc: Pointer to the remaining number of bytes to copy. + * + * Returns: 0 if success, -1 otherwise. + */ +static inline int copy_stream_header( + char __user *buffer, size_t size, ssize_t *copy_len, + const char *hdr, + size_t hdr_size, + size_t *hdr_btc) +{ + const size_t offset = hdr_size - *hdr_btc; + const size_t copy_size = MIN(size - *copy_len, *hdr_btc); + + if (!*hdr_btc) + return 0; + + if (WARN_ON(*hdr_btc > hdr_size)) + return -1; + + if (copy_to_user(&buffer[*copy_len], &hdr[offset], copy_size)) + return -1; + + *hdr_btc -= copy_size; + *copy_len += copy_size; + + return 0; +} + +/** + * kbasep_timeline_copy_header - copy timeline headers to the user + * @timeline: Timeline instance + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer + * @copy_len: Pointer to amount of bytes that has been copied already + * within the read system call. + * + * This helper function checks if timeline headers have not been sent + * to the user, and if so, sends them. copy_len is respectively + * updated. + * + * Returns: 0 if success, -1 if copy_to_user has failed. + */ +static inline int kbasep_timeline_copy_headers( + struct kbase_timeline *timeline, + char __user *buffer, + size_t size, + ssize_t *copy_len) +{ + if (copy_stream_header(buffer, size, copy_len, + obj_desc_header, + obj_desc_header_size, + &timeline->obj_header_btc)) + return -1; + + if (copy_stream_header(buffer, size, copy_len, + aux_desc_header, + aux_desc_header_size, + &timeline->aux_header_btc)) + return -1; + return 0; +} + + +/** + * kbasep_timeline_io_read - copy data from streams to buffer provided by user + * @filp: Pointer to file structure + * @buffer: Pointer to the buffer provided by user + * @size: Maximum amount of data that can be stored in the buffer + * @f_pos: Pointer to file offset (unused) + * + * Return: number of bytes stored in the buffer + */ +static ssize_t kbasep_timeline_io_read( + struct file *filp, + char __user *buffer, + size_t size, + loff_t *f_pos) +{ + ssize_t copy_len = 0; + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(f_pos); + + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *) filp->private_data; + + if (!buffer) + return -EINVAL; + + if ((*f_pos < 0) || (size < PACKET_SIZE)) + return -EINVAL; + + mutex_lock(&timeline->reader_lock); + + while (copy_len < size) { + struct kbase_tlstream *stream = NULL; + unsigned int rb_idx_raw = 0; + unsigned int wb_idx_raw; + unsigned int rb_idx; + size_t rb_size; + + if (kbasep_timeline_copy_headers( + timeline, buffer, size, ©_len)) { + copy_len = -EFAULT; + break; + } + + /* If we already read some packets and there is no + * packet pending then return back to user. + * If we don't have any data yet, wait for packet to be + * submitted. + */ + if (copy_len > 0) { + if (!kbasep_timeline_io_packet_pending( + timeline, + &stream, + &rb_idx_raw)) + break; + } else { + if (wait_event_interruptible( + timeline->event_queue, + kbasep_timeline_io_packet_pending( + timeline, + &stream, + &rb_idx_raw))) { + copy_len = -ERESTARTSYS; + break; + } + } + + if (WARN_ON(!stream)) { + copy_len = -EFAULT; + break; + } + + /* Check if this packet fits into the user buffer. + * If so copy its content. + */ + rb_idx = rb_idx_raw % PACKET_COUNT; + rb_size = atomic_read(&stream->buffer[rb_idx].size); + if (rb_size > size - copy_len) + break; + if (copy_to_user( + &buffer[copy_len], + stream->buffer[rb_idx].data, + rb_size)) { + copy_len = -EFAULT; + break; + } + + /* If the distance between read buffer index and write + * buffer index became more than PACKET_COUNT, then overflow + * happened and we need to ignore the last portion of bytes + * that we have just sent to user. + */ + smp_rmb(); + wb_idx_raw = atomic_read(&stream->wbi); + + if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) { + copy_len += rb_size; + atomic_inc(&stream->rbi); +#if MALI_UNIT_TEST + atomic_add(rb_size, &timeline->bytes_collected); +#endif /* MALI_UNIT_TEST */ + + } else { + const unsigned int new_rb_idx_raw = + wb_idx_raw - PACKET_COUNT + 1; + /* Adjust read buffer index to the next valid buffer */ + atomic_set(&stream->rbi, new_rb_idx_raw); + } + } + + mutex_unlock(&timeline->reader_lock); + + return copy_len; +} + +/** + * kbasep_timeline_io_poll - poll timeline stream for packets + * @filp: Pointer to file structure + * @wait: Pointer to poll table + * Return: POLLIN if data can be read without blocking, otherwise zero + */ +static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) +{ + struct kbase_tlstream *stream; + unsigned int rb_idx; + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(wait); + + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *) filp->private_data; + + poll_wait(filp, &timeline->event_queue, wait); + if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) + return POLLIN; + return 0; +} + +/** + * kbasep_timeline_io_release - release timeline stream descriptor + * @inode: Pointer to inode structure + * @filp: Pointer to file structure + * + * Return always return zero + */ +static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) +{ + struct kbase_timeline *timeline; + + KBASE_DEBUG_ASSERT(inode); + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(filp->private_data); + + CSTD_UNUSED(inode); + + timeline = (struct kbase_timeline *) filp->private_data; + + + /* Stop autoflush timer before releasing access to streams. */ + atomic_set(&timeline->autoflush_timer_active, 0); + del_timer_sync(&timeline->autoflush_timer); + + atomic_set(timeline->timeline_flags, 0); + return 0; +} diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h new file mode 100644 index 0000000..3596584 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h @@ -0,0 +1,66 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TIMELINE_PRIV_H) +#define _KBASE_TIMELINE_PRIV_H + +#include +#include "mali_kbase_tlstream.h" + + +#include +#include +#include + +/** + * struct kbase_timeline - timeline state structure + * @streams: The timeline streams generated by kernel + * @autoflush_timer: Autoflush timer + * @autoflush_timer_active: If non-zero autoflush timer is active + * @reader_lock: Reader lock. Only one reader is allowed to + * have access to the timeline streams at any given time. + * @event_queue: Timeline stream event queue + * @bytes_collected: Number of bytes read by user + * @timeline_flags: Zero, if timeline is disabled. Timeline stream flags + * otherwise. See kbase_timeline_io_acquire(). + * @obj_header_btc: Remaining bytes to copy for the object stream header + * @aux_header_btc: Remaining bytes to copy for the aux stream header + */ +struct kbase_timeline { + struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; + struct timer_list autoflush_timer; + atomic_t autoflush_timer_active; + struct mutex reader_lock; + wait_queue_head_t event_queue; +#if MALI_UNIT_TEST + atomic_t bytes_collected; +#endif /* MALI_UNIT_TEST */ + atomic_t *timeline_flags; + size_t obj_header_btc; + size_t aux_header_btc; +}; + +extern const struct file_operations kbasep_tlstream_fops; + +void kbase_create_timeline_objects(struct kbase_device *kbdev); + +#endif /* _KBASE_TIMELINE_PRIV_H */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h new file mode 100644 index 0000000..3e37827 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h @@ -0,0 +1,125 @@ +/* + * + * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TL_SERIALIZE_H) +#define _KBASE_TL_SERIALIZE_H + +#include + +#include + +/* The number of nanoseconds in a second. */ +#define NSECS_IN_SEC 1000000000ull /* ns */ + +/** + * kbasep_serialize_bytes - serialize bytes to the message buffer + * + * Serialize bytes as is using memcpy() + * + * @buffer: Message buffer + * @pos: Message buffer offset + * @bytes: Bytes to serialize + * @len: Length of bytes array + * + * Return: updated position in the buffer + */ +static inline size_t kbasep_serialize_bytes( + char *buffer, + size_t pos, + const void *bytes, + size_t len) +{ + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(bytes); + + memcpy(&buffer[pos], bytes, len); + + return pos + len; +} + +/** + * kbasep_serialize_string - serialize string to the message buffer + * + * String is serialized as 4 bytes for string size, + * then string content and then null terminator. + * + * @buffer: Message buffer + * @pos: Message buffer offset + * @string: String to serialize + * @max_write_size: Number of bytes that can be stored in buffer + * + * Return: updated position in the buffer + */ +static inline size_t kbasep_serialize_string( + char *buffer, + size_t pos, + const char *string, + size_t max_write_size) +{ + u32 string_len; + + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(string); + /* Timeline string consists of at least string length and nul + * terminator. + */ + KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); + max_write_size -= sizeof(string_len); + + string_len = strlcpy( + &buffer[pos + sizeof(string_len)], + string, + max_write_size); + string_len += sizeof(char); + + /* Make sure that the source string fit into the buffer. */ + KBASE_DEBUG_ASSERT(string_len <= max_write_size); + + /* Update string length. */ + memcpy(&buffer[pos], &string_len, sizeof(string_len)); + + return pos + sizeof(string_len) + string_len; +} + +/** + * kbasep_serialize_timestamp - serialize timestamp to the message buffer + * + * Get current timestamp using kbasep_get_timestamp() + * and serialize it as 64 bit unsigned integer. + * + * @buffer: Message buffer + * @pos: Message buffer offset + * + * Return: updated position in the buffer + */ +static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos) +{ + u64 timestamp; + + timestamp = ktime_get_raw_ns(); + + return kbasep_serialize_bytes( + buffer, pos, + ×tamp, sizeof(timestamp)); +} +#endif /* _KBASE_TL_SERIALIZE_H */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c new file mode 100644 index 0000000..bec4be7 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c @@ -0,0 +1,298 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tl_serialize.h" +#include "mali_kbase_mipe_proto.h" + +/** + * kbasep_packet_header_setup - setup the packet header + * @buffer: pointer to the buffer + * @pkt_family: packet's family + * @pkt_type: packet's type + * @pkt_class: packet's class + * @stream_id: stream id + * @numbered: non-zero if this stream is numbered + * + * Function sets up immutable part of packet header in the given buffer. + */ +static void kbasep_packet_header_setup( + char *buffer, + enum tl_packet_family pkt_family, + enum tl_packet_class pkt_class, + enum tl_packet_type pkt_type, + unsigned int stream_id, + int numbered) +{ + u32 words[2] = { + MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id), + MIPE_PACKET_HEADER_W1(0, !!numbered), + }; + memcpy(buffer, words, sizeof(words)); +} + +/** + * kbasep_packet_header_update - update the packet header + * @buffer: pointer to the buffer + * @data_size: amount of data carried in this packet + * @numbered: non-zero if the stream is numbered + * + * Function updates mutable part of packet header in the given buffer. + * Note that value of data_size must not including size of the header. + */ +static void kbasep_packet_header_update( + char *buffer, + size_t data_size, + int numbered) +{ + u32 word0; + u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); + + KBASE_DEBUG_ASSERT(buffer); + CSTD_UNUSED(word0); + + memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); +} + +/** + * kbasep_packet_number_update - update the packet number + * @buffer: pointer to the buffer + * @counter: value of packet counter for this packet's stream + * + * Function updates packet number embedded within the packet placed in the + * given buffer. + */ +static void kbasep_packet_number_update(char *buffer, u32 counter) +{ + KBASE_DEBUG_ASSERT(buffer); + + memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); +} + +void kbase_tlstream_reset(struct kbase_tlstream *stream) +{ + unsigned int i; + + for (i = 0; i < PACKET_COUNT; i++) { + if (stream->numbered) + atomic_set( + &stream->buffer[i].size, + PACKET_HEADER_SIZE + + PACKET_NUMBER_SIZE); + else + atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); + } + + atomic_set(&stream->wbi, 0); + atomic_set(&stream->rbi, 0); +} + +/* Configuration of timeline streams generated by kernel. */ +static const struct { + enum tl_packet_family pkt_family; + enum tl_packet_class pkt_class; + enum tl_packet_type pkt_type; + enum tl_stream_id stream_id; +} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_SUMMARY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_AUX, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_KERNEL, + }, +}; + +void kbase_tlstream_init( + struct kbase_tlstream *stream, + enum tl_stream_type stream_type, + wait_queue_head_t *ready_read) +{ + unsigned int i; + + KBASE_DEBUG_ASSERT(stream); + KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + + spin_lock_init(&stream->lock); + + /* All packets carrying tracepoints shall be numbered. */ + if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) + stream->numbered = 1; + else + stream->numbered = 0; + + for (i = 0; i < PACKET_COUNT; i++) + kbasep_packet_header_setup( + stream->buffer[i].data, + tl_stream_cfg[stream_type].pkt_family, + tl_stream_cfg[stream_type].pkt_class, + tl_stream_cfg[stream_type].pkt_type, + tl_stream_cfg[stream_type].stream_id, + stream->numbered); + +#if MALI_UNIT_TEST + atomic_set(&stream->bytes_generated, 0); +#endif + stream->ready_read = ready_read; + + kbase_tlstream_reset(stream); +} + +void kbase_tlstream_term(struct kbase_tlstream *stream) +{ + KBASE_DEBUG_ASSERT(stream); +} + +/** + * kbase_tlstream_msgbuf_submit - submit packet to user space + * @stream: Pointer to the stream structure + * @wb_idx_raw: Write buffer index + * @wb_size: Length of data stored in the current buffer + * + * Updates currently written buffer with the packet header. + * Then write index is incremented and the buffer is handed to user space. + * Parameters of the new buffer are returned using provided arguments. + * + * Return: length of data in the new buffer + * + * Warning: the user must update the stream structure with returned value. + */ +static size_t kbasep_tlstream_msgbuf_submit( + struct kbase_tlstream *stream, + unsigned int wb_idx_raw, + unsigned int wb_size) +{ + unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + + /* Set stream as flushed. */ + atomic_set(&stream->autoflush_counter, -1); + + kbasep_packet_header_update( + stream->buffer[wb_idx].data, + wb_size - PACKET_HEADER_SIZE, + stream->numbered); + + if (stream->numbered) + kbasep_packet_number_update( + stream->buffer[wb_idx].data, + wb_idx_raw); + + /* Increasing write buffer index will expose this packet to the reader. + * As stream->lock is not taken on reader side we must make sure memory + * is updated correctly before this will happen. */ + smp_wmb(); + atomic_inc(&stream->wbi); + + /* Inform user that packets are ready for reading. */ + wake_up_interruptible(stream->ready_read); + + wb_size = PACKET_HEADER_SIZE; + if (stream->numbered) + wb_size += PACKET_NUMBER_SIZE; + + return wb_size; +} + +char *kbase_tlstream_msgbuf_acquire( + struct kbase_tlstream *stream, + size_t msg_size, + unsigned long *flags) __acquires(&stream->lock) +{ + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + + KBASE_DEBUG_ASSERT( + PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= + msg_size); + + spin_lock_irqsave(&stream->lock, *flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + /* Select next buffer if data will not fit into current one. */ + if (PACKET_SIZE < wb_size + msg_size) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + } + + /* Reserve space in selected buffer. */ + atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); + +#if MALI_UNIT_TEST + atomic_add(msg_size, &stream->bytes_generated); +#endif /* MALI_UNIT_TEST */ + + return &stream->buffer[wb_idx].data[wb_size]; +} + +void kbase_tlstream_msgbuf_release( + struct kbase_tlstream *stream, + unsigned long flags) __releases(&stream->lock) +{ + /* Mark stream as containing unflushed data. */ + atomic_set(&stream->autoflush_counter, 0); + + spin_unlock_irqrestore(&stream->lock, flags); +} + +void kbase_tlstream_flush_stream( + struct kbase_tlstream *stream) +{ + unsigned long flags; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + size_t min_size = PACKET_HEADER_SIZE; + + if (stream->numbered) + min_size += PACKET_NUMBER_SIZE; + + spin_lock_irqsave(&stream->lock, flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + if (wb_size > min_size) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, wb_size); + } + spin_unlock_irqrestore(&stream->lock, flags); +} + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h new file mode 100644 index 0000000..427bb09 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h @@ -0,0 +1,166 @@ +/* + * + * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#if !defined(_KBASE_TLSTREAM_H) +#define _KBASE_TLSTREAM_H + +#include +#include +#include + +/* The maximum size of a single packet used by timeline. */ +#define PACKET_SIZE 4096 /* bytes */ + +/* The number of packets used by one timeline stream. */ +#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP) + #define PACKET_COUNT 64 +#else + #define PACKET_COUNT 32 +#endif + +/* The maximum expected length of string in tracepoint descriptor. */ +#define STRLEN_MAX 64 /* bytes */ + +/** + * struct kbase_tlstream - timeline stream structure + * @lock: Message order lock + * @buffer: Array of buffers + * @wbi: Write buffer index + * @rbi: Read buffer index + * @numbered: If non-zero stream's packets are sequentially numbered + * @autoflush_counter: Counter tracking stream's autoflush state + * @ready_read: Pointer to a wait queue, which is signaled when + * timeline messages are ready for collection. + * @bytes_generated: Number of bytes generated by tracepoint messages + * + * This structure holds information needed to construct proper packets in the + * timeline stream. + * + * Each message in the sequence must bear a timestamp that is + * greater than the previous message in the same stream. For this reason + * a lock is held throughout the process of message creation. + * + * Each stream contains a set of buffers. Each buffer will hold one MIPE + * packet. In case there is no free space required to store the incoming + * message the oldest buffer is discarded. Each packet in timeline body + * stream has a sequence number embedded, this value must increment + * monotonically and is used by the packets receiver to discover these + * buffer overflows. + * + * The autoflush counter is set to a negative number when there is no data + * pending for flush and it is set to zero on every update of the buffer. The + * autoflush timer will increment the counter by one on every expiry. If there + * is no activity on the buffer for two consecutive timer expiries, the stream + * buffer will be flushed. + */ +struct kbase_tlstream { + spinlock_t lock; + + struct { + atomic_t size; /* number of bytes in buffer */ + char data[PACKET_SIZE]; /* buffer's data */ + } buffer[PACKET_COUNT]; + + atomic_t wbi; + atomic_t rbi; + + int numbered; + atomic_t autoflush_counter; + wait_queue_head_t *ready_read; +#if MALI_UNIT_TEST + atomic_t bytes_generated; +#endif +}; + +/* Types of streams generated by timeline. */ +enum tl_stream_type { + TL_STREAM_TYPE_FIRST, + TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, + TL_STREAM_TYPE_OBJ, + TL_STREAM_TYPE_AUX, + TL_STREAM_TYPE_COUNT +}; + +/** + * kbase_tlstream_init - initialize timeline stream + * @stream: Pointer to the stream structure + * @stream_type: Stream type + * @ready_read: Pointer to a wait queue to signal when + * timeline messages are ready for collection. + */ +void kbase_tlstream_init(struct kbase_tlstream *stream, + enum tl_stream_type stream_type, + wait_queue_head_t *ready_read); + +/** + * kbase_tlstream_term - terminate timeline stream + * @stream: Pointer to the stream structure + */ +void kbase_tlstream_term(struct kbase_tlstream *stream); + +/** + * kbase_tlstream_reset - reset stream + * @stream: Pointer to the stream structure + * + * Function discards all pending messages and resets packet counters. + */ +void kbase_tlstream_reset(struct kbase_tlstream *stream); + +/** + * kbase_tlstream_msgbuf_acquire - lock selected stream and reserve a buffer + * @stream: Pointer to the stream structure + * @msg_size: Message size + * @flags: Pointer to store flags passed back on stream release + * + * Lock the stream and reserve the number of bytes requested + * in msg_size for the user. + * + * Return: pointer to the buffer where a message can be stored + * + * Warning: The stream must be released with kbase_tlstream_msgbuf_release(). + * Only atomic operations are allowed while the stream is locked + * (i.e. do not use any operation that may sleep). + */ +char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream, + size_t msg_size, unsigned long *flags) __acquires(&stream->lock); + +/** + * kbase_tlstream_msgbuf_release - unlock selected stream + * @stream: Pointer to the stream structure + * @flags: Value obtained during stream acquire + * + * Release the stream that has been previously + * locked with a call to kbase_tlstream_msgbuf_acquire(). + */ +void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, + unsigned long flags) __releases(&stream->lock); + +/** + * kbase_tlstream_flush_stream - flush stream + * @stream: Pointer to the stream structure + * + * Flush pending data in the timeline stream. + */ +void kbase_tlstream_flush_stream(struct kbase_tlstream *stream); + +#endif /* _KBASE_TLSTREAM_H */ + diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c new file mode 100644 index 0000000..e445a3a --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c @@ -0,0 +1,3194 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +#include "mali_kbase_tracepoints.h" +#include "mali_kbase_tlstream.h" +#include "mali_kbase_tl_serialize.h" + +/* clang-format off */ + +/* Message ids of trace events that are recorded in the timeline stream. */ +enum tl_msg_id_obj { + KBASE_TL_NEW_CTX, + KBASE_TL_NEW_GPU, + KBASE_TL_NEW_LPU, + KBASE_TL_NEW_ATOM, + KBASE_TL_NEW_AS, + KBASE_TL_DEL_CTX, + KBASE_TL_DEL_ATOM, + KBASE_TL_LIFELINK_LPU_GPU, + KBASE_TL_LIFELINK_AS_GPU, + KBASE_TL_RET_CTX_LPU, + KBASE_TL_RET_ATOM_CTX, + KBASE_TL_RET_ATOM_LPU, + KBASE_TL_NRET_CTX_LPU, + KBASE_TL_NRET_ATOM_CTX, + KBASE_TL_NRET_ATOM_LPU, + KBASE_TL_RET_AS_CTX, + KBASE_TL_NRET_AS_CTX, + KBASE_TL_RET_ATOM_AS, + KBASE_TL_NRET_ATOM_AS, + KBASE_TL_ATTRIB_ATOM_CONFIG, + KBASE_TL_ATTRIB_ATOM_PRIORITY, + KBASE_TL_ATTRIB_ATOM_STATE, + KBASE_TL_ATTRIB_ATOM_PRIORITIZED, + KBASE_TL_ATTRIB_ATOM_JIT, + KBASE_TL_JIT_USEDPAGES, + KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, + KBASE_TL_ATTRIB_ATOM_JITFREEINFO, + KBASE_TL_ATTRIB_AS_CONFIG, + KBASE_TL_EVENT_LPU_SOFTSTOP, + KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, + KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, + KBASE_TL_EVENT_ATOM_SOFTJOB_START, + KBASE_TL_EVENT_ATOM_SOFTJOB_END, + KBASE_TL_EVENT_ARB_GRANTED, + KBASE_TL_EVENT_ARB_STARTED, + KBASE_TL_EVENT_ARB_STOP_REQUESTED, + KBASE_TL_EVENT_ARB_STOPPED, + KBASE_JD_GPU_SOFT_RESET, + KBASE_TL_KBASE_NEW_DEVICE, + KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, + KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, + KBASE_TL_KBASE_NEW_CTX, + KBASE_TL_KBASE_DEL_CTX, + KBASE_TL_KBASE_CTX_ASSIGN_AS, + KBASE_TL_KBASE_CTX_UNASSIGN_AS, + KBASE_TL_KBASE_NEW_KCPUQUEUE, + KBASE_TL_KBASE_DEL_KCPUQUEUE, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, + KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, + KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, + KBASE_OBJ_MSG_COUNT, +}; + +/* Message ids of trace events that are recorded in the auxiliary stream. */ +enum tl_msg_id_aux { + KBASE_AUX_PM_STATE, + KBASE_AUX_PAGEFAULT, + KBASE_AUX_PAGESALLOC, + KBASE_AUX_DEVFREQ_TARGET, + KBASE_AUX_PROTECTED_ENTER_START, + KBASE_AUX_PROTECTED_ENTER_END, + KBASE_AUX_PROTECTED_LEAVE_START, + KBASE_AUX_PROTECTED_LEAVE_END, + KBASE_AUX_JIT_STATS, + KBASE_AUX_EVENT_JOB_SLOT, + KBASE_AUX_MSG_COUNT, +}; + +#define OBJ_TP_LIST \ + TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \ + "object ctx is created", \ + "@pII", \ + "ctx,ctx_nr,tgid") \ + TRACEPOINT_DESC(KBASE_TL_NEW_GPU, \ + "object gpu is created", \ + "@pII", \ + "gpu,gpu_id,core_count") \ + TRACEPOINT_DESC(KBASE_TL_NEW_LPU, \ + "object lpu is created", \ + "@pII", \ + "lpu,lpu_nr,lpu_fn") \ + TRACEPOINT_DESC(KBASE_TL_NEW_ATOM, \ + "object atom is created", \ + "@pI", \ + "atom,atom_nr") \ + TRACEPOINT_DESC(KBASE_TL_NEW_AS, \ + "address space object is created", \ + "@pI", \ + "address_space,as_nr") \ + TRACEPOINT_DESC(KBASE_TL_DEL_CTX, \ + "context is destroyed", \ + "@p", \ + "ctx") \ + TRACEPOINT_DESC(KBASE_TL_DEL_ATOM, \ + "atom is destroyed", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_LPU_GPU, \ + "lpu is deleted with gpu", \ + "@pp", \ + "lpu,gpu") \ + TRACEPOINT_DESC(KBASE_TL_LIFELINK_AS_GPU, \ + "address space is deleted with gpu", \ + "@pp", \ + "address_space,gpu") \ + TRACEPOINT_DESC(KBASE_TL_RET_CTX_LPU, \ + "context is retained by lpu", \ + "@pp", \ + "ctx,lpu") \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_CTX, \ + "atom is retained by context", \ + "@pp", \ + "atom,ctx") \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_LPU, \ + "atom is retained by lpu", \ + "@pps", \ + "atom,lpu,attrib_match_list") \ + TRACEPOINT_DESC(KBASE_TL_NRET_CTX_LPU, \ + "context is released by lpu", \ + "@pp", \ + "ctx,lpu") \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_CTX, \ + "atom is released by context", \ + "@pp", \ + "atom,ctx") \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_LPU, \ + "atom is released by lpu", \ + "@pp", \ + "atom,lpu") \ + TRACEPOINT_DESC(KBASE_TL_RET_AS_CTX, \ + "address space is retained by context", \ + "@pp", \ + "address_space,ctx") \ + TRACEPOINT_DESC(KBASE_TL_NRET_AS_CTX, \ + "address space is released by context", \ + "@pp", \ + "address_space,ctx") \ + TRACEPOINT_DESC(KBASE_TL_RET_ATOM_AS, \ + "atom is retained by address space", \ + "@pp", \ + "atom,address_space") \ + TRACEPOINT_DESC(KBASE_TL_NRET_ATOM_AS, \ + "atom is released by address space", \ + "@pp", \ + "atom,address_space") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \ + "atom job slot attributes", \ + "@pLLI", \ + "atom,descriptor,affinity,config") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \ + "atom priority", \ + "@pI", \ + "atom,prio") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \ + "atom state", \ + "@pI", \ + "atom,state") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \ + "atom caused priority change", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \ + "jit done for atom", \ + "@pLLILILLL", \ + "atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \ + TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \ + "used pages for jit", \ + "@LI", \ + "used_pages,j_id") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \ + "Information about JIT allocations", \ + "@pLLLIIIII", \ + "atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \ + "Information about JIT frees", \ + "@pI", \ + "atom,j_id") \ + TRACEPOINT_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \ + "address space attributes", \ + "@pLLL", \ + "address_space,transtab,memattr,transcfg") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \ + "softstop event on given lpu", \ + "@p", \ + "lpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \ + "atom softstopped", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \ + "atom softstop issued", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \ + "atom soft job has started", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \ + "atom soft job has completed", \ + "@p", \ + "atom") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_GRANTED, \ + "Arbiter has granted gpu access", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STARTED, \ + "Driver is running again and able to process jobs", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOP_REQUESTED, \ + "Arbiter has requested driver to stop using gpu", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOPPED, \ + "Driver has stopped using gpu", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ + "gpu soft reset", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \ + "New KBase Device", \ + "@IIII", \ + "kbase_device_id,kbase_device_gpu_core_count,kbase_device_max_num_csgs,kbase_device_as_count") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \ + "CSG is programmed to a slot", \ + "@III", \ + "kbase_device_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \ + "CSG is deprogrammed from a slot", \ + "@II", \ + "kbase_device_id,kbase_device_csg_slot_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \ + "New KBase Context", \ + "@II", \ + "kernel_ctx_id,kbase_device_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_CTX, \ + "Delete KBase Context", \ + "@I", \ + "kernel_ctx_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_ASSIGN_AS, \ + "Address Space is assigned to a KBase context", \ + "@II", \ + "kernel_ctx_id,kbase_device_as_index") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CTX_UNASSIGN_AS, \ + "Address Space is unassigned from a KBase context", \ + "@I", \ + "kernel_ctx_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \ + "New KCPU Queue", \ + "@pII", \ + "kcpu_queue,kernel_ctx_id,kcpuq_num_pending_cmds") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \ + "Delete KCPU Queue", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \ + "KCPU Queue enqueues Signal on Fence", \ + "@pp", \ + "kcpu_queue,fence") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \ + "KCPU Queue enqueues Wait on Fence", \ + "@pp", \ + "kcpu_queue,fence") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@pLI", \ + "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@pL", \ + "kcpu_queue,cqs_obj_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + "Begin array of KCPU Queue enqueues Debug Copy", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + "Array item of KCPU Queue enqueues Debug Copy", \ + "@pL", \ + "kcpu_queue,debugcopy_dst_size") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ + "End array of KCPU Queue enqueues Debug Copy", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ + "KCPU Queue enqueues Map Import", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \ + "KCPU Queue enqueues Unmap Import", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, \ + "KCPU Queue enqueues Unmap Import ignoring reference count", \ + "@pL", \ + "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "Begin array of KCPU Queue enqueues JIT Alloc", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "Array item of KCPU Queue enqueues JIT Alloc", \ + "@pLLLLIIIII", \ + "kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ + "End array of KCPU Queue enqueues JIT Alloc", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + "Begin array of KCPU Queue enqueues JIT Free", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + "Array item of KCPU Queue enqueues JIT Free", \ + "@pI", \ + "kcpu_queue,jit_alloc_jit_id") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \ + "End array of KCPU Queue enqueues JIT Free", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \ + "KCPU Queue starts a Signal on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ + "KCPU Queue ends a Signal on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ + "KCPU Queue starts a Wait on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ + "KCPU Queue ends a Wait on Fence", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ + "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ + "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ + "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ + "KCPU Queue starts an array of Debug Copys", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ + "KCPU Queue ends an array of Debug Copys", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ + "KCPU Queue starts a Map Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ + "KCPU Queue ends a Map Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ + "KCPU Queue starts an Unmap Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ + "KCPU Queue ends an Unmap Import", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ + "KCPU Queue starts an Unmap Import ignoring reference count", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ + "KCPU Queue ends an Unmap Import ignoring reference count", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ + "KCPU Queue starts an array of JIT Allocs", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "Begin array of KCPU Queue ends an array of JIT Allocs", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "Array item of KCPU Queue ends an array of JIT Allocs", \ + "@pLL", \ + "kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ + "End array of KCPU Queue ends an array of JIT Allocs", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START, \ + "KCPU Queue starts an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "Begin array of KCPU Queue ends an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "Array item of KCPU Queue ends an array of JIT Frees", \ + "@pL", \ + "kcpu_queue,jit_free_pages_used") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ + "End array of KCPU Queue ends an array of JIT Frees", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ + "KCPU Queue executes an Error Barrier", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ + "An overflow has happened with the CSFFW Timeline stream", \ + "@LL", \ + "csffw_timestamp,csffw_cycle") \ + +#define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_OBJ +#define MIPE_HEADER_TRACEPOINT_LIST OBJ_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT + +#include "mali_kbase_mipe_gen_header.h" + +const char *obj_desc_header = (const char *) &__obj_desc_header; +const size_t obj_desc_header_size = sizeof(__obj_desc_header); + +#define AUX_TP_LIST \ + TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \ + "PM state", \ + "@IL", \ + "core_type,core_state_bitset") \ + TRACEPOINT_DESC(KBASE_AUX_PAGEFAULT, \ + "Page fault", \ + "@IIL", \ + "ctx_nr,as_nr,page_cnt_change") \ + TRACEPOINT_DESC(KBASE_AUX_PAGESALLOC, \ + "Total alloc pages change", \ + "@IL", \ + "ctx_nr,page_cnt") \ + TRACEPOINT_DESC(KBASE_AUX_DEVFREQ_TARGET, \ + "New device frequency target", \ + "@L", \ + "target_freq") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \ + "enter protected mode start", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \ + "enter protected mode end", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \ + "leave protected mode start", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \ + "leave protected mode end", \ + "@p", \ + "gpu") \ + TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \ + "per-bin JIT statistics", \ + "@IIIIII", \ + "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ + TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ + "event on a given job slot", \ + "@pIII", \ + "ctx,slot_nr,atom_nr,event") \ + +#define MIPE_HEADER_BLOB_VAR_NAME __aux_desc_header +#define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL +#define MIPE_HEADER_PKT_CLASS TL_PACKET_CLASS_AUX +#define MIPE_HEADER_TRACEPOINT_LIST AUX_TP_LIST +#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT + +#include "mali_kbase_mipe_gen_header.h" + +const char *aux_desc_header = (const char *) &__aux_desc_header; +const size_t aux_desc_header_size = sizeof(__aux_desc_header); + +void __kbase_tlstream_tl_new_ctx( + struct kbase_tlstream *stream, + const void *ctx, + u32 ctx_nr, + u32 tgid) +{ + const u32 msg_id = KBASE_TL_NEW_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(ctx_nr) + + sizeof(tgid) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &tgid, sizeof(tgid)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_gpu( + struct kbase_tlstream *stream, + const void *gpu, + u32 gpu_id, + u32 core_count) +{ + const u32 msg_id = KBASE_TL_NEW_GPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + + sizeof(gpu_id) + + sizeof(core_count) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_id, sizeof(gpu_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &core_count, sizeof(core_count)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_lpu( + struct kbase_tlstream *stream, + const void *lpu, + u32 lpu_nr, + u32 lpu_fn) +{ + const u32 msg_id = KBASE_TL_NEW_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(lpu) + + sizeof(lpu_nr) + + sizeof(lpu_fn) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu_nr, sizeof(lpu_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu_fn, sizeof(lpu_fn)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_atom( + struct kbase_tlstream *stream, + const void *atom, + u32 atom_nr) +{ + const u32 msg_id = KBASE_TL_NEW_ATOM; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(atom_nr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &atom_nr, sizeof(atom_nr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_new_as( + struct kbase_tlstream *stream, + const void *address_space, + u32 as_nr) +{ + const u32 msg_id = KBASE_TL_NEW_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(as_nr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &as_nr, sizeof(as_nr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_del_ctx( + struct kbase_tlstream *stream, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_DEL_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_del_atom( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_DEL_ATOM; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_lifelink_lpu_gpu( + struct kbase_tlstream *stream, + const void *lpu, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(lpu) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_lifelink_as_gpu( + struct kbase_tlstream *stream, + const void *address_space, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_RET_CTX_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu, + const char *attrib_match_list) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_LPU; + const size_t s0 = sizeof(u32) + sizeof(char) + + strnlen(attrib_match_list, STRLEN_MAX); + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(lpu) + + s0 + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + pos = kbasep_serialize_string(buffer, + pos, attrib_match_list, s0); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_NRET_CTX_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_RET_AS_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx) +{ + const u32 msg_id = KBASE_TL_NRET_AS_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(ctx) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_ret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(address_space) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_nret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(address_space) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_config( + struct kbase_tlstream *stream, + const void *atom, + u64 descriptor, + u64 affinity, + u32 config) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(descriptor) + + sizeof(affinity) + + sizeof(config) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &descriptor, sizeof(descriptor)); + pos = kbasep_serialize_bytes(buffer, + pos, &affinity, sizeof(affinity)); + pos = kbasep_serialize_bytes(buffer, + pos, &config, sizeof(config)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_priority( + struct kbase_tlstream *stream, + const void *atom, + u32 prio) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(prio) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &prio, sizeof(prio)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_state( + struct kbase_tlstream *stream, + const void *atom, + u32 state) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(state) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &state, sizeof(state)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_prioritized( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_jit( + struct kbase_tlstream *stream, + const void *atom, + u64 edit_addr, + u64 new_addr, + u32 jit_flags, + u64 mem_flags, + u32 j_id, + u64 com_pgs, + u64 extent, + u64 va_pgs) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(edit_addr) + + sizeof(new_addr) + + sizeof(jit_flags) + + sizeof(mem_flags) + + sizeof(j_id) + + sizeof(com_pgs) + + sizeof(extent) + + sizeof(va_pgs) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &edit_addr, sizeof(edit_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &new_addr, sizeof(new_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_flags, sizeof(jit_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &mem_flags, sizeof(mem_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &com_pgs, sizeof(com_pgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &extent, sizeof(extent)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pgs, sizeof(va_pgs)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_jit_usedpages( + struct kbase_tlstream *stream, + u64 used_pages, + u32 j_id) +{ + const u32 msg_id = KBASE_TL_JIT_USEDPAGES; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(used_pages) + + sizeof(j_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &used_pages, sizeof(used_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + struct kbase_tlstream *stream, + const void *atom, + u64 va_pgs, + u64 com_pgs, + u64 extent, + u32 j_id, + u32 bin_id, + u32 max_allocs, + u32 jit_flags, + u32 usg_id) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(va_pgs) + + sizeof(com_pgs) + + sizeof(extent) + + sizeof(j_id) + + sizeof(bin_id) + + sizeof(max_allocs) + + sizeof(jit_flags) + + sizeof(usg_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pgs, sizeof(va_pgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &com_pgs, sizeof(com_pgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &extent, sizeof(extent)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &bin_id, sizeof(bin_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &max_allocs, sizeof(max_allocs)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_flags, sizeof(jit_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &usg_id, sizeof(usg_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( + struct kbase_tlstream *stream, + const void *atom, + u32 j_id) +{ + const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + + sizeof(j_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + pos = kbasep_serialize_bytes(buffer, + pos, &j_id, sizeof(j_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_attrib_as_config( + struct kbase_tlstream *stream, + const void *address_space, + u64 transtab, + u64 memattr, + u64 transcfg) +{ + const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(address_space) + + sizeof(transtab) + + sizeof(memattr) + + sizeof(transcfg) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &address_space, sizeof(address_space)); + pos = kbasep_serialize_bytes(buffer, + pos, &transtab, sizeof(transtab)); + pos = kbasep_serialize_bytes(buffer, + pos, &memattr, sizeof(memattr)); + pos = kbasep_serialize_bytes(buffer, + pos, &transcfg, sizeof(transcfg)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_lpu_softstop( + struct kbase_tlstream *stream, + const void *lpu) +{ + const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(lpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &lpu, sizeof(lpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softstop_ex( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softstop_issue( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softjob_start( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_atom_softjob_end( + struct kbase_tlstream *stream, + const void *atom) +{ + const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(atom) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &atom, sizeof(atom)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_granted( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_GRANTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_started( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STARTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_stop_requested( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STOP_REQUESTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_event_arb_stopped( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_EVENT_ARB_STOPPED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_jd_gpu_soft_reset( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_pm_state( + struct kbase_tlstream *stream, + u32 core_type, + u64 core_state_bitset) +{ + const u32 msg_id = KBASE_AUX_PM_STATE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(core_type) + + sizeof(core_state_bitset) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &core_type, sizeof(core_type)); + pos = kbasep_serialize_bytes(buffer, + pos, &core_state_bitset, sizeof(core_state_bitset)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_pagefault( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 as_nr, + u64 page_cnt_change) +{ + const u32 msg_id = KBASE_AUX_PAGEFAULT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(as_nr) + + sizeof(page_cnt_change) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &as_nr, sizeof(as_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &page_cnt_change, sizeof(page_cnt_change)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_pagesalloc( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 page_cnt) +{ + const u32 msg_id = KBASE_AUX_PAGESALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(page_cnt) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &page_cnt, sizeof(page_cnt)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_devfreq_target( + struct kbase_tlstream *stream, + u64 target_freq) +{ + const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(target_freq) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &target_freq, sizeof(target_freq)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_enter_start( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_enter_end( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_leave_start( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_protected_leave_end( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_jit_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 bid, + u32 max_allocs, + u32 allocs, + u32 va_pages, + u32 ph_pages) +{ + const u32 msg_id = KBASE_AUX_JIT_STATS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(bid) + + sizeof(max_allocs) + + sizeof(allocs) + + sizeof(va_pages) + + sizeof(ph_pages) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &bid, sizeof(bid)); + pos = kbasep_serialize_bytes(buffer, + pos, &max_allocs, sizeof(max_allocs)); + pos = kbasep_serialize_bytes(buffer, + pos, &allocs, sizeof(allocs)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pages, sizeof(va_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &ph_pages, sizeof(ph_pages)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, + u32 slot_nr, + u32 atom_nr, + u32 event) +{ + const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx) + + sizeof(slot_nr) + + sizeof(atom_nr) + + sizeof(event) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx, sizeof(ctx)); + pos = kbasep_serialize_bytes(buffer, + pos, &slot_nr, sizeof(slot_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &atom_nr, sizeof(atom_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &event, sizeof(event)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_device( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_gpu_core_count, + u32 kbase_device_max_num_csgs, + u32 kbase_device_as_count) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_gpu_core_count) + + sizeof(kbase_device_max_num_csgs) + + sizeof(kbase_device_as_count) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_gpu_core_count, sizeof(kbase_device_gpu_core_count)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_max_num_csgs, sizeof(kbase_device_max_num_csgs)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_as_count, sizeof(kbase_device_as_count)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_program_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 gpu_cmdq_grp_handle, + u32 kbase_device_csg_slot_index) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(gpu_cmdq_grp_handle) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_device_deprogram_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index) +{ + const u32 msg_id = KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kbase_device_id) + + sizeof(kbase_device_csg_slot_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_id) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(kbase_device_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_id, sizeof(kbase_device_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_del_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id) +{ + const u32 msg_id = KBASE_TL_KBASE_DEL_CTX; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_ctx_assign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_as_index) +{ + const u32 msg_id = KBASE_TL_KBASE_CTX_ASSIGN_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + + sizeof(kbase_device_as_index) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kbase_device_as_index, sizeof(kbase_device_as_index)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_ctx_unassign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id) +{ + const u32 msg_id = KBASE_TL_KBASE_CTX_UNASSIGN_AS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kernel_ctx_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_new_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 kernel_ctx_id, + u32 kcpuq_num_pending_cmds) +{ + const u32 msg_id = KBASE_TL_KBASE_NEW_KCPUQUEUE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(kernel_ctx_id) + + sizeof(kcpuq_num_pending_cmds) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &kernel_ctx_id, sizeof(kernel_ctx_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_del_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_DEL_KCPUQUEUE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(fence) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &fence, sizeof(fence)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(fence) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &fence, sizeof(fence)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u32 cqs_obj_compare_value) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + + sizeof(cqs_obj_compare_value) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(cqs_obj_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 debugcopy_dst_size) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(debugcopy_dst_size) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &debugcopy_dst_size, sizeof(debugcopy_dst_size)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr_dest, + u64 jit_alloc_va_pages, + u64 jit_alloc_commit_pages, + u64 jit_alloc_extent, + u32 jit_alloc_jit_id, + u32 jit_alloc_bin_id, + u32 jit_alloc_max_allocations, + u32 jit_alloc_flags, + u32 jit_alloc_usage_id) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_alloc_gpu_alloc_addr_dest) + + sizeof(jit_alloc_va_pages) + + sizeof(jit_alloc_commit_pages) + + sizeof(jit_alloc_extent) + + sizeof(jit_alloc_jit_id) + + sizeof(jit_alloc_bin_id) + + sizeof(jit_alloc_max_allocations) + + sizeof(jit_alloc_flags) + + sizeof(jit_alloc_usage_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_gpu_alloc_addr_dest, sizeof(jit_alloc_gpu_alloc_addr_dest)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_va_pages, sizeof(jit_alloc_va_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_commit_pages, sizeof(jit_alloc_commit_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_extent, sizeof(jit_alloc_extent)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_bin_id, sizeof(jit_alloc_bin_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_max_allocations, sizeof(jit_alloc_max_allocations)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_flags, sizeof(jit_alloc_flags)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_usage_id, sizeof(jit_alloc_usage_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 jit_alloc_jit_id) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_alloc_jit_id) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_alloc_gpu_alloc_addr) + + sizeof(jit_alloc_mmu_flags) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_free_pages_used) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + + sizeof(jit_free_pages_used) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &jit_free_pages_used, sizeof(jit_free_pages_used)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_timestamp) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_timestamp, sizeof(csffw_timestamp)); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +/* clang-format on */ diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h new file mode 100644 index 0000000..7ea8ba8 --- /dev/null +++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h @@ -0,0 +1,2381 @@ +/* + * + * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +/* + * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. + * DO NOT EDIT. + */ + +#if !defined(_KBASE_TRACEPOINTS_H) +#define _KBASE_TRACEPOINTS_H + +/* Tracepoints are abstract callbacks notifying that some important + * software or hardware event has happened. + * + * In this particular implementation, it results into a MIPE + * timeline event and, in some cases, it also fires an ftrace event + * (a.k.a. Gator events, see details below). + */ + +#include "mali_kbase.h" +#include "mali_kbase_gator.h" + +#include +#include + +/* clang-format off */ + +struct kbase_tlstream; + +extern const size_t __obj_stream_offset; +extern const size_t __aux_stream_offset; + +/* This macro dispatches a kbase_tlstream from + * a kbase_device instance. Only AUX or OBJ + * streams can be dispatched. It is aware of + * kbase_timeline binary representation and + * relies on offset variables: + * __obj_stream_offset and __aux_stream_offset. + */ +#define __TL_DISPATCH_STREAM(kbdev, stype) \ + ((struct kbase_tlstream *) \ + ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset)) + +struct tp_desc; + +/* Descriptors of timeline messages transmitted in object events stream. */ +extern const char *obj_desc_header; +extern const size_t obj_desc_header_size; +/* Descriptors of timeline messages transmitted in auxiliary events stream. */ +extern const char *aux_desc_header; +extern const size_t aux_desc_header_size; + +#define TL_ATOM_STATE_IDLE 0 +#define TL_ATOM_STATE_READY 1 +#define TL_ATOM_STATE_DONE 2 +#define TL_ATOM_STATE_POSTED 3 + +#define TL_JS_EVENT_START GATOR_JOB_SLOT_START +#define TL_JS_EVENT_STOP GATOR_JOB_SLOT_STOP +#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED + +#define TLSTREAM_ENABLED (1 << 31) + +void __kbase_tlstream_tl_new_ctx( + struct kbase_tlstream *stream, + const void *ctx, + u32 ctx_nr, + u32 tgid); +void __kbase_tlstream_tl_new_gpu( + struct kbase_tlstream *stream, + const void *gpu, + u32 gpu_id, + u32 core_count); +void __kbase_tlstream_tl_new_lpu( + struct kbase_tlstream *stream, + const void *lpu, + u32 lpu_nr, + u32 lpu_fn); +void __kbase_tlstream_tl_new_atom( + struct kbase_tlstream *stream, + const void *atom, + u32 atom_nr); +void __kbase_tlstream_tl_new_as( + struct kbase_tlstream *stream, + const void *address_space, + u32 as_nr); +void __kbase_tlstream_tl_del_ctx( + struct kbase_tlstream *stream, + const void *ctx); +void __kbase_tlstream_tl_del_atom( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_lifelink_lpu_gpu( + struct kbase_tlstream *stream, + const void *lpu, + const void *gpu); +void __kbase_tlstream_tl_lifelink_as_gpu( + struct kbase_tlstream *stream, + const void *address_space, + const void *gpu); +void __kbase_tlstream_tl_ret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu); +void __kbase_tlstream_tl_ret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx); +void __kbase_tlstream_tl_ret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu, + const char *attrib_match_list); +void __kbase_tlstream_tl_nret_ctx_lpu( + struct kbase_tlstream *stream, + const void *ctx, + const void *lpu); +void __kbase_tlstream_tl_nret_atom_ctx( + struct kbase_tlstream *stream, + const void *atom, + const void *ctx); +void __kbase_tlstream_tl_nret_atom_lpu( + struct kbase_tlstream *stream, + const void *atom, + const void *lpu); +void __kbase_tlstream_tl_ret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx); +void __kbase_tlstream_tl_nret_as_ctx( + struct kbase_tlstream *stream, + const void *address_space, + const void *ctx); +void __kbase_tlstream_tl_ret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space); +void __kbase_tlstream_tl_nret_atom_as( + struct kbase_tlstream *stream, + const void *atom, + const void *address_space); +void __kbase_tlstream_tl_attrib_atom_config( + struct kbase_tlstream *stream, + const void *atom, + u64 descriptor, + u64 affinity, + u32 config); +void __kbase_tlstream_tl_attrib_atom_priority( + struct kbase_tlstream *stream, + const void *atom, + u32 prio); +void __kbase_tlstream_tl_attrib_atom_state( + struct kbase_tlstream *stream, + const void *atom, + u32 state); +void __kbase_tlstream_tl_attrib_atom_prioritized( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_attrib_atom_jit( + struct kbase_tlstream *stream, + const void *atom, + u64 edit_addr, + u64 new_addr, + u32 jit_flags, + u64 mem_flags, + u32 j_id, + u64 com_pgs, + u64 extent, + u64 va_pgs); +void __kbase_tlstream_tl_jit_usedpages( + struct kbase_tlstream *stream, + u64 used_pages, + u32 j_id); +void __kbase_tlstream_tl_attrib_atom_jitallocinfo( + struct kbase_tlstream *stream, + const void *atom, + u64 va_pgs, + u64 com_pgs, + u64 extent, + u32 j_id, + u32 bin_id, + u32 max_allocs, + u32 jit_flags, + u32 usg_id); +void __kbase_tlstream_tl_attrib_atom_jitfreeinfo( + struct kbase_tlstream *stream, + const void *atom, + u32 j_id); +void __kbase_tlstream_tl_attrib_as_config( + struct kbase_tlstream *stream, + const void *address_space, + u64 transtab, + u64 memattr, + u64 transcfg); +void __kbase_tlstream_tl_event_lpu_softstop( + struct kbase_tlstream *stream, + const void *lpu); +void __kbase_tlstream_tl_event_atom_softstop_ex( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softstop_issue( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softjob_start( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_atom_softjob_end( + struct kbase_tlstream *stream, + const void *atom); +void __kbase_tlstream_tl_event_arb_granted( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_started( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_stop_requested( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_event_arb_stopped( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_jd_gpu_soft_reset( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_pm_state( + struct kbase_tlstream *stream, + u32 core_type, + u64 core_state_bitset); +void __kbase_tlstream_aux_pagefault( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 as_nr, + u64 page_cnt_change); +void __kbase_tlstream_aux_pagesalloc( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 page_cnt); +void __kbase_tlstream_aux_devfreq_target( + struct kbase_tlstream *stream, + u64 target_freq); +void __kbase_tlstream_aux_protected_enter_start( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_enter_end( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_leave_start( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_protected_leave_end( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_aux_jit_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u32 bid, + u32 max_allocs, + u32 allocs, + u32 va_pages, + u32 ph_pages); +void __kbase_tlstream_aux_event_job_slot( + struct kbase_tlstream *stream, + const void *ctx, + u32 slot_nr, + u32 atom_nr, + u32 event); +void __kbase_tlstream_tl_kbase_new_device( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_gpu_core_count, + u32 kbase_device_max_num_csgs, + u32 kbase_device_as_count); +void __kbase_tlstream_tl_kbase_device_program_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 gpu_cmdq_grp_handle, + u32 kbase_device_csg_slot_index); +void __kbase_tlstream_tl_kbase_device_deprogram_csg( + struct kbase_tlstream *stream, + u32 kbase_device_id, + u32 kbase_device_csg_slot_index); +void __kbase_tlstream_tl_kbase_new_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_id); +void __kbase_tlstream_tl_kbase_del_ctx( + struct kbase_tlstream *stream, + u32 kernel_ctx_id); +void __kbase_tlstream_tl_kbase_ctx_assign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id, + u32 kbase_device_as_index); +void __kbase_tlstream_tl_kbase_ctx_unassign_as( + struct kbase_tlstream *stream, + u32 kernel_ctx_id); +void __kbase_tlstream_tl_kbase_new_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 kernel_ctx_id, + u32 kcpuq_num_pending_cmds); +void __kbase_tlstream_tl_kbase_del_kcpuqueue( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *fence); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr, + u32 cqs_obj_compare_value); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 cqs_obj_gpu_addr); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 debugcopy_dst_size); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr_dest, + u64 jit_alloc_va_pages, + u64 jit_alloc_commit_pages, + u64 jit_alloc_extent, + u32 jit_alloc_jit_id, + u32 jit_alloc_bin_id, + u32 jit_alloc_max_allocations, + u32 jit_alloc_flags, + u32 jit_alloc_usage_id); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 jit_alloc_jit_id); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_alloc_gpu_alloc_addr, + u64 jit_alloc_mmu_flags); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u64 jit_free_pages_used); +void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( + struct kbase_tlstream *stream, + u64 csffw_timestamp, + u64 csffw_cycle); + +struct kbase_tlstream; + +/** + * KBASE_TLSTREAM_TL_NEW_CTX - + * object ctx is created + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @ctx_nr: Kernel context number + * @tgid: Thread Group Id + */ +#define KBASE_TLSTREAM_TL_NEW_CTX( \ + kbdev, \ + ctx, \ + ctx_nr, \ + tgid \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, ctx_nr, tgid); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_GPU - + * object gpu is created + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + * @gpu_id: Name of the GPU object + * @core_count: Number of cores this GPU hosts + */ +#define KBASE_TLSTREAM_TL_NEW_GPU( \ + kbdev, \ + gpu, \ + gpu_id, \ + core_count \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu, gpu_id, core_count); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_LPU - + * object lpu is created + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @lpu_nr: Sequential number assigned to the newly created LPU + * @lpu_fn: Property describing functional abilities of this LPU + */ +#define KBASE_TLSTREAM_TL_NEW_LPU( \ + kbdev, \ + lpu, \ + lpu_nr, \ + lpu_fn \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu, lpu_nr, lpu_fn); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_ATOM - + * object atom is created + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @atom_nr: Sequential number of an atom + */ +#define KBASE_TLSTREAM_TL_NEW_ATOM( \ + kbdev, \ + atom, \ + atom_nr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_atom( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, atom_nr); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NEW_AS - + * address space object is created + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @as_nr: Address space number + */ +#define KBASE_TLSTREAM_TL_NEW_AS( \ + kbdev, \ + address_space, \ + as_nr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_new_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, as_nr); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_DEL_CTX - + * context is destroyed + * + * @kbdev: Kbase device + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_DEL_CTX( \ + kbdev, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_del_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_DEL_ATOM - + * atom is destroyed + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_DEL_ATOM( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_del_atom( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - + * lpu is deleted with gpu + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU( \ + kbdev, \ + lpu, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_lifelink_lpu_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu, gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - + * address space is deleted with gpu + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU( \ + kbdev, \ + address_space, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_lifelink_as_gpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_CTX_LPU - + * context is retained by lpu + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_RET_CTX_LPU( \ + kbdev, \ + ctx, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_ctx_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_CTX - + * atom is retained by context + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_CTX( \ + kbdev, \ + atom, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_LPU - + * atom is retained by lpu + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @lpu: Name of the Logical Processing Unit object + * @attrib_match_list: List containing match operator attributes + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_LPU( \ + kbdev, \ + atom, \ + lpu, \ + attrib_match_list \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, lpu, attrib_match_list); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_CTX_LPU - + * context is released by lpu + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_NRET_CTX_LPU( \ + kbdev, \ + ctx, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_ctx_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + ctx, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - + * atom is released by context + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX( \ + kbdev, \ + atom, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - + * atom is released by lpu + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU( \ + kbdev, \ + atom, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_lpu( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_AS_CTX - + * address space is retained by context + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_RET_AS_CTX( \ + kbdev, \ + address_space, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_as_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_AS_CTX - + * address space is released by context + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @ctx: Name of the context object + */ +#define KBASE_TLSTREAM_TL_NRET_AS_CTX( \ + kbdev, \ + address_space, \ + ctx \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_as_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, ctx); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_RET_ATOM_AS - + * atom is retained by address space + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @address_space: Name of the address space object + */ +#define KBASE_TLSTREAM_TL_RET_ATOM_AS( \ + kbdev, \ + atom, \ + address_space \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_ret_atom_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, address_space); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_NRET_ATOM_AS - + * atom is released by address space + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @address_space: Name of the address space object + */ +#define KBASE_TLSTREAM_TL_NRET_ATOM_AS( \ + kbdev, \ + atom, \ + address_space \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_nret_atom_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, address_space); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - + * atom job slot attributes + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @descriptor: Job descriptor address + * @affinity: Job affinity + * @config: Job config + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG( \ + kbdev, \ + atom, \ + descriptor, \ + affinity, \ + config \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_config( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, descriptor, affinity, config); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - + * atom priority + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @prio: Atom priority + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY( \ + kbdev, \ + atom, \ + prio \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_priority( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, prio); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - + * atom state + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @state: Atom state + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( \ + kbdev, \ + atom, \ + state \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_state( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, state); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - + * atom caused priority change + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_tl_attrib_atom_prioritized( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - + * jit done for atom + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @edit_addr: Address edited by jit + * @new_addr: Address placed into the edited location + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @mem_flags: Flags defining the properties of a memory region + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @com_pgs: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @va_pgs: The minimum number of virtual pages required + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( \ + kbdev, \ + atom, \ + edit_addr, \ + new_addr, \ + jit_flags, \ + mem_flags, \ + j_id, \ + com_pgs, \ + extent, \ + va_pgs \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jit( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_JIT_USEDPAGES - + * used pages for jit + * + * @kbdev: Kbase device + * @used_pages: Number of pages used for jit + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_JIT_USEDPAGES( \ + kbdev, \ + used_pages, \ + j_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_jit_usedpages( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + used_pages, j_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - + * Information about JIT allocations + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @va_pgs: The minimum number of virtual pages required + * @com_pgs: The minimum number of physical pages which + * should back the allocation. + * @extent: Granularity of physical pages to grow the + * allocation by during a fault. + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * @bin_id: The JIT allocation bin, used in conjunction with + * max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocs: Maximum allocations allowed in this bin. + * @jit_flags: Flags specifying the special requirements for + * the JIT allocation. + * @usg_id: A hint about which allocation should be reused. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( \ + kbdev, \ + atom, \ + va_pgs, \ + com_pgs, \ + extent, \ + j_id, \ + bin_id, \ + max_allocs, \ + jit_flags, \ + usg_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jitallocinfo( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - + * Information about JIT frees + * + * @kbdev: Kbase device + * @atom: Atom identifier + * @j_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + */ +#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO( \ + kbdev, \ + atom, \ + j_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_atom_jitfreeinfo( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom, j_id); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - + * address space attributes + * + * @kbdev: Kbase device + * @address_space: Name of the address space object + * @transtab: Configuration of the TRANSTAB register + * @memattr: Configuration of the MEMATTR register + * @transcfg: Configuration of the TRANSCFG register (or zero if not present) + */ +#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG( \ + kbdev, \ + address_space, \ + transtab, \ + memattr, \ + transcfg \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_attrib_as_config( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + address_space, transtab, memattr, transcfg); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - + * softstop event on given lpu + * + * @kbdev: Kbase device + * @lpu: Name of the Logical Processing Unit object + */ +#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP( \ + kbdev, \ + lpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_lpu_softstop( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + lpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - + * atom softstopped + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softstop_ex( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - + * atom softstop issued + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softstop_issue( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - + * atom soft job has started + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softjob_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - + * atom soft job has completed + * + * @kbdev: Kbase device + * @atom: Atom identifier + */ +#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END( \ + kbdev, \ + atom \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_atom_softjob_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + atom); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED - + * Arbiter has granted gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_granted( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STARTED - + * Driver is running again and able to process jobs + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STARTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_started( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED - + * Arbiter has requested driver to stop using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_stop_requested( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED - + * Driver has stopped using gpu + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_event_arb_stopped( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - + * gpu soft reset + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_jd_gpu_soft_reset( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PM_STATE - + * PM state + * + * @kbdev: Kbase device + * @core_type: Core type (shader, tiler, l2 cache, l3 cache) + * @core_state_bitset: 64bits bitmask reporting power state of the cores + * (1-ON, 0-OFF) + */ +#define KBASE_TLSTREAM_AUX_PM_STATE( \ + kbdev, \ + core_type, \ + core_state_bitset \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pm_state( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + core_type, core_state_bitset); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PAGEFAULT - + * Page fault + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @as_nr: Address space number + * @page_cnt_change: Number of pages to be added + */ +#define KBASE_TLSTREAM_AUX_PAGEFAULT( \ + kbdev, \ + ctx_nr, \ + as_nr, \ + page_cnt_change \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagefault( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, as_nr, page_cnt_change); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PAGESALLOC - + * Total alloc pages change + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @page_cnt: Number of pages used by the context + */ +#define KBASE_TLSTREAM_AUX_PAGESALLOC( \ + kbdev, \ + ctx_nr, \ + page_cnt \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagesalloc( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, page_cnt); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - + * New device frequency target + * + * @kbdev: Kbase device + * @target_freq: New target frequency + */ +#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET( \ + kbdev, \ + target_freq \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_devfreq_target( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + target_freq); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - + * enter protected mode start + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_enter_start( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - + * enter protected mode end + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_enter_end( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - + * leave protected mode start + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_leave_start( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - + * leave protected mode end + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \ + __kbase_tlstream_aux_protected_leave_end( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_JIT_STATS - + * per-bin JIT statistics + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @bid: JIT bin id + * @max_allocs: Maximum allocations allowed in this bin. + * @allocs: Number of active allocations in this bin + * @va_pages: Number of virtual pages allocated in this bin + * @ph_pages: Number of physical pages allocated in this bin + */ +#define KBASE_TLSTREAM_AUX_JIT_STATS( \ + kbdev, \ + ctx_nr, \ + bid, \ + max_allocs, \ + allocs, \ + va_pages, \ + ph_pages \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_jit_stats( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages); \ + } while (0) + +/** + * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - + * event on a given job slot + * + * @kbdev: Kbase device + * @ctx: Name of the context object + * @slot_nr: Job slot number + * @atom_nr: Sequential number of an atom + * @event: Event type. One of TL_JS_EVENT values + */ +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT( \ + kbdev, \ + ctx, \ + slot_nr, \ + atom_nr, \ + event \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_event_job_slot( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx, slot_nr, atom_nr, event); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE - + * New KBase Device + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware + * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports + * @kbase_device_as_count: The number of address spaces the physical hardware has available + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ + kbdev, \ + kbase_device_id, \ + kbase_device_gpu_core_count, \ + kbase_device_max_num_csgs, \ + kbase_device_as_count \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - + * CSG is programmed to a slot + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + gpu_cmdq_grp_handle, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - + * CSG is deprogrammed from a slot + * + * @kbdev: Kbase device + * @kbase_device_id: The id of the physical hardware + * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed + */ +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - + * New KBase Context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_id: The id of the physical hardware + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - + * Delete KBase Context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ +#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - + * Address Space is assigned to a KBase context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + * @kbase_device_as_index: The index of the device address space being assigned + */ +#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_as_index \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - + * Address Space is unassigned from a KBase context + * + * @kbdev: Kbase device + * @kernel_ctx_id: Unique ID for the KBase Context + */ +#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - + * New KCPU Queue + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @kernel_ctx_id: Unique ID for the KBase Context + * @kcpuq_num_pending_cmds: Number of commands already enqueued + * in the KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ + kbdev, \ + kcpu_queue, \ + kernel_ctx_id, \ + kcpuq_num_pending_cmds \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - + * Delete KCPU Queue + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - + * KCPU Queue enqueues Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - + * KCPU Queue enqueues Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @fence: Fence object handle + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU ptr + * @cqs_obj_compare_value: Semaphore value that should be exceeded + * for the WAIT to pass + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + cqs_obj_compare_value \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * End array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET - + * Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET - + * Array item of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET - + * End array of KCPU Queue enqueues Set on Cross Queue Sync Object + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * Begin array of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * Array item of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @debugcopy_dst_size: Debug Copy destination size + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue, \ + debugcopy_dst_size \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY - + * End array of KCPU Queue enqueues Debug Copy + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - + * KCPU Queue enqueues Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - + * KCPU Queue enqueues Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - + * KCPU Queue enqueues Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU ptr + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * Begin array of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * Array item of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write + * the JIT allocated GPU virtual address to + * @jit_alloc_va_pages: The minimum number of virtual pages required + * @jit_alloc_commit_pages: The minimum number of physical pages which + * should back the allocation + * @jit_alloc_extent: Granularity of physical pages to grow the allocation + * by during a fault + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with + * max_allocations to limit the number of each type of JIT allocation + * @jit_alloc_max_allocations: The maximum number of allocations + * allowed within the bin specified by bin_id. Should be the same for all + * JIT allocations within the same bin. + * @jit_alloc_flags: Flags specifying the special requirements for the + * JIT allocation + * @jit_alloc_usage_id: A hint about which allocation should be + * reused. The kernel should attempt to use a previous allocation with the same + * usage_id + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr_dest, \ + jit_alloc_va_pages, \ + jit_alloc_commit_pages, \ + jit_alloc_extent, \ + jit_alloc_jit_id, \ + jit_alloc_bin_id, \ + jit_alloc_max_allocations, \ + jit_alloc_flags, \ + jit_alloc_usage_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - + * End array of KCPU Queue enqueues JIT Alloc + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - + * Begin array of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - + * Array item of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_jit_id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. Zero is not a valid value + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_jit_id \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - + * End array of KCPU Queue enqueues JIT Free + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - + * KCPU Queue starts a Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - + * KCPU Queue ends a Signal on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - + * KCPU Queue starts a Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - + * KCPU Queue ends a Wait on Fence + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - + * KCPU Queue starts a Wait on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - + * KCPU Queue ends a Wait on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - + * KCPU Queue executes a Set on an array of Cross Queue Sync Objects + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START - + * KCPU Queue starts an array of Debug Copys + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END - + * KCPU Queue ends an array of Debug Copys + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - + * KCPU Queue starts a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - + * KCPU Queue ends a Map Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - + * KCPU Queue starts an Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - + * KCPU Queue ends an Unmap Import + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - + * KCPU Queue starts an Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - + * KCPU Queue ends an Unmap Import ignoring reference count + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - + * KCPU Queue starts an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * Begin array of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * Array item of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address + * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr, \ + jit_alloc_mmu_flags \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - + * End array of KCPU Queue ends an array of JIT Allocs + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - + * KCPU Queue starts an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * Begin array of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * Array item of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @jit_free_pages_used: The actual number of pages used by the JIT + * allocation + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue, \ + jit_free_pages_used \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - + * End array of KCPU Queue ends an array of JIT Frees + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER - + * KCPU Queue executes an Error Barrier + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - + * An overflow has happened with the CSFFW Timeline stream + * + * @kbdev: Kbase device + * @csffw_timestamp: Timestamp of a CSFFW event + * @csffw_cycle: Cycle number of a CSFFW event + */ +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ + do { } while (0) + + +/* Gator tracepoints are hooked into TLSTREAM interface. + * When the following tracepoints are called, corresponding + * Gator tracepoint will be called as well. + */ + +#if defined(CONFIG_MALI_GATOR_SUPPORT) +/* `event` is one of TL_JS_EVENT values here. + * The values of TL_JS_EVENT are guaranteed to match + * with corresponding GATOR_JOB_SLOT values. + */ +#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT +#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \ + context, slot_nr, atom_nr, event) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_job_slots_event(kbdev->id, \ + GATOR_MAKE_EVENT(event, slot_nr), \ + context, (u8) atom_nr); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_event_job_slot( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + context, slot_nr, atom_nr, event); \ + } while (0) + +#undef KBASE_TLSTREAM_AUX_PM_STATE +#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_pm_status(kbdev->id, \ + core_type, state); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pm_state( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + core_type, state); \ + } while (0) + +#undef KBASE_TLSTREAM_AUX_PAGEFAULT +#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \ + ctx_nr, as_nr, page_cnt_change) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + kbase_trace_mali_page_fault_insert_pages(kbdev->id, \ + as_nr, \ + page_cnt_change); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagefault( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, as_nr, page_cnt_change); \ + } while (0) + +/* kbase_trace_mali_total_alloc_pages_change is handled differently here. + * We stream the total amount of pages allocated for `kbdev` rather + * than `page_count`, which is per-context. + */ +#undef KBASE_TLSTREAM_AUX_PAGESALLOC +#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + u32 global_pages_count = \ + atomic_read(&kbdev->memdev.used_pages); \ + \ + kbase_trace_mali_total_alloc_pages_change(kbdev->id, \ + global_pages_count); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_pagesalloc( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, page_cnt); \ + } while (0) +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +/* clang-format on */ +#endif diff --git a/bifrost/r25p0/kernel/include/linux/dma-buf-test-exporter.h b/bifrost/r25p0/kernel/include/linux/dma-buf-test-exporter.h new file mode 100644 index 0000000..95bc6f8 --- /dev/null +++ b/bifrost/r25p0/kernel/include/linux/dma-buf-test-exporter.h @@ -0,0 +1,83 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + + +#ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_ +#define _LINUX_DMA_BUF_TEST_EXPORTER_H_ + +#include +#include + +#define DMA_BUF_TE_VER_MAJOR 1 +#define DMA_BUF_TE_VER_MINOR 0 +#define DMA_BUF_TE_ENQ 0x642d7465 +#define DMA_BUF_TE_ACK 0x68692100 + +struct dma_buf_te_ioctl_version +{ + int op; /**< Must be set to DMA_BUF_TE_ENQ by client, driver will set it to DMA_BUF_TE_ACK */ + int major; /**< Major version */ + int minor; /**< Minor version */ +}; + +struct dma_buf_te_ioctl_alloc +{ + __u64 size; /* size of buffer to allocate, in pages */ +}; + +struct dma_buf_te_ioctl_status +{ + /* in */ + int fd; /* the dma_buf to query, only dma_buf objects exported by this driver is supported */ + /* out */ + int attached_devices; /* number of devices attached (active 'dma_buf_attach's) */ + int device_mappings; /* number of device mappings (active 'dma_buf_map_attachment's) */ + int cpu_mappings; /* number of cpu mappings (active 'mmap's) */ +}; + +struct dma_buf_te_ioctl_set_failing +{ + /* in */ + int fd; /* the dma_buf to set failure mode for, only dma_buf objects exported by this driver is supported */ + + /* zero = no fail injection, non-zero = inject failure */ + int fail_attach; + int fail_map; + int fail_mmap; +}; + +struct dma_buf_te_ioctl_fill +{ + int fd; + unsigned int value; +}; + +#define DMA_BUF_TE_IOCTL_BASE 'E' +/* Below all returning 0 if successful or -errcode except DMA_BUF_TE_ALLOC which will return fd or -errcode */ +#define DMA_BUF_TE_VERSION _IOR(DMA_BUF_TE_IOCTL_BASE, 0x00, struct dma_buf_te_ioctl_version) +#define DMA_BUF_TE_ALLOC _IOR(DMA_BUF_TE_IOCTL_BASE, 0x01, struct dma_buf_te_ioctl_alloc) +#define DMA_BUF_TE_QUERY _IOR(DMA_BUF_TE_IOCTL_BASE, 0x02, struct dma_buf_te_ioctl_status) +#define DMA_BUF_TE_SET_FAILING _IOW(DMA_BUF_TE_IOCTL_BASE, 0x03, struct dma_buf_te_ioctl_set_failing) +#define DMA_BUF_TE_ALLOC_CONT _IOR(DMA_BUF_TE_IOCTL_BASE, 0x04, struct dma_buf_te_ioctl_alloc) +#define DMA_BUF_TE_FILL _IOR(DMA_BUF_TE_IOCTL_BASE, 0x05, struct dma_buf_te_ioctl_fill) + +#endif /* _LINUX_DMA_BUF_TEST_EXPORTER_H_ */ diff --git a/bifrost/r25p0/kernel/include/linux/memory_group_manager.h b/bifrost/r25p0/kernel/include/linux/memory_group_manager.h new file mode 100644 index 0000000..b1ac253 --- /dev/null +++ b/bifrost/r25p0/kernel/include/linux/memory_group_manager.h @@ -0,0 +1,198 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _MEMORY_GROUP_MANAGER_H_ +#define _MEMORY_GROUP_MANAGER_H_ + +#include +#include +#include + +#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE) +typedef int vm_fault_t; +#endif + +#define MEMORY_GROUP_MANAGER_NR_GROUPS (16) + +struct memory_group_manager_device; +struct memory_group_manager_import_data; + +/** + * struct memory_group_manager_ops - Callbacks for memory group manager + * operations + * + * @mgm_alloc_page: Callback to allocate physical memory in a group + * @mgm_free_page: Callback to free physical memory in a group + * @mgm_get_import_memory_id: Callback to get the group ID for imported memory + * @mgm_update_gpu_pte: Callback to modify a GPU page table entry + * @mgm_vmf_insert_pfn_prot: Callback to map a physical memory page for the CPU + */ +struct memory_group_manager_ops { + /** + * mgm_alloc_page - Allocate a physical memory page in a group + * + * @mgm_dev: The memory group manager through which the request is + * being made. + * @group_id: A physical memory group ID. The meaning of this is defined + * by the systems integrator. Its valid range is + * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @gfp_mask: Bitmask of Get Free Page flags affecting allocator + * behavior. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + * + * Return: Pointer to allocated page, or NULL if allocation failed. + */ + struct page *(*mgm_alloc_page)( + struct memory_group_manager_device *mgm_dev, int group_id, + gfp_t gfp_mask, unsigned int order); + + /** + * mgm_free_page - Free a physical memory page in a group + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @group_id: A physical memory group ID. The meaning of this is + * defined by the systems integrator. Its valid range is + * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @page: Address of the struct associated with a page of physical + * memory that was allocated by calling the mgm_alloc_page + * method of the same memory pool with the same values of + * @group_id and @order. + * @order: Page order for physical page size (order=0 means 4 KiB, + * order=9 means 2 MiB). + */ + void (*mgm_free_page)( + struct memory_group_manager_device *mgm_dev, int group_id, + struct page *page, unsigned int order); + + /** + * mgm_get_import_memory_id - Get the physical memory group ID for the + * imported memory + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @import_data: Pointer to the data which describes imported memory. + * + * Note that provision of this call back is optional, where it is not + * provided this call back pointer must be set to NULL to indicate it + * is not in use. + * + * Return: The memory group ID to use when mapping pages from this + * imported memory. + */ + int (*mgm_get_import_memory_id)( + struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data); + + /** + * mgm_update_gpu_pte - Modify a GPU page table entry for a memory group + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @group_id: A physical memory group ID. The meaning of this is + * defined by the systems integrator. Its valid range is + * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @mmu_level: The level of the page table entry in @ate. + * @pte: The page table entry to modify, in LPAE or AArch64 format + * (depending on the driver's configuration). This should be + * decoded to determine the physical address and any other + * properties of the mapping the manager requires. + * + * This function allows the memory group manager to modify a GPU page + * table entry before it is stored by the kbase module (controller + * driver). It may set certain bits in the page table entry attributes + * or in the physical address, based on the physical memory group ID. + * + * Return: A modified GPU page table entry to be stored in a page table. + */ + u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev, + int group_id, int mmu_level, u64 pte); + + /** + * mgm_vmf_insert_pfn_prot - Map a physical page in a group for the CPU + * + * @mgm_dev: The memory group manager through which the request + * is being made. + * @group_id: A physical memory group ID. The meaning of this is + * defined by the systems integrator. Its valid range is + * 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1. + * @vma: The virtual memory area to insert the page into. + * @addr: A virtual address (in @vma) to assign to the page. + * @pfn: The kernel Page Frame Number to insert at @addr in @vma. + * @pgprot: Protection flags for the inserted page. + * + * Called from a CPU virtual memory page fault handler. This function + * creates a page table entry from the given parameter values and stores + * it at the appropriate location (unlike mgm_update_gpu_pte, which + * returns a modified entry). + * + * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page + * table entry was successfully installed. + */ + vm_fault_t (*mgm_vmf_insert_pfn_prot)( + struct memory_group_manager_device *mgm_dev, int group_id, + struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, pgprot_t pgprot); +}; + +/** + * struct memory_group_manager_device - Device structure for a memory group + * manager + * + * @ops - Callbacks associated with this device + * @data - Pointer to device private data + * + * In order for a systems integrator to provide custom behaviors for memory + * operations performed by the kbase module (controller driver), they must + * provide a platform-specific driver module which implements this interface. + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). + */ +struct memory_group_manager_device { + struct memory_group_manager_ops ops; + void *data; + struct module *owner; +}; + + +enum memory_group_manager_import_type { + MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF +}; + +/** + * struct memory_group_manager_import_data - Structure describing the imported + * memory + * + * @type - type of imported memory + * @u - Union describing the imported memory + * + */ +struct memory_group_manager_import_data { + enum memory_group_manager_import_type type; + union { + struct dma_buf *dma_buf; + } u; +}; + +#endif /* _MEMORY_GROUP_MANAGER_H_ */ diff --git a/bifrost/r25p0/kernel/include/linux/protected_memory_allocator.h b/bifrost/r25p0/kernel/include/linux/protected_memory_allocator.h new file mode 100644 index 0000000..3b9205b --- /dev/null +++ b/bifrost/r25p0/kernel/include/linux/protected_memory_allocator.h @@ -0,0 +1,110 @@ +/* + * + * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _PROTECTED_MEMORY_ALLOCATOR_H_ +#define _PROTECTED_MEMORY_ALLOCATOR_H_ + +#include + +/** + * struct protected_memory_allocation - Protected memory allocation + * + * @pa: Physical address of the protected memory allocation. + * @order: Size of memory allocation in pages, as a base-2 logarithm. + */ +struct protected_memory_allocation { + phys_addr_t pa; + unsigned int order; +}; + +struct protected_memory_allocator_device; + +/** + * struct protected_memory_allocator_ops - Callbacks for protected memory + * allocator operations + * + * @pma_alloc_page: Callback to allocate protected memory + * @pma_get_phys_addr: Callback to get the physical address of an allocation + * @pma_free_page: Callback to free protected memory + */ +struct protected_memory_allocator_ops { + /** + * pma_alloc_page - Allocate protected memory pages + * + * @pma_dev: The protected memory allocator the request is being made + * through. + * @order: How many pages to allocate, as a base-2 logarithm. + * + * Return: Pointer to allocated memory, or NULL if allocation failed. + */ + struct protected_memory_allocation *(*pma_alloc_page)( + struct protected_memory_allocator_device *pma_dev, + unsigned int order); + + /** + * pma_get_phys_addr - Get the physical address of the protected memory + * allocation + * + * @pma_dev: The protected memory allocator the request is being made + * through. + * @pma: The protected memory allocation whose physical address + * shall be retrieved + * + * Return: The physical address of the given allocation. + */ + phys_addr_t (*pma_get_phys_addr)( + struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma); + + /** + * pma_free_page - Free a page of memory + * + * @pma_dev: The protected memory allocator the request is being made + * through. + * @pma: The protected memory allocation to free. + */ + void (*pma_free_page)( + struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma); +}; + +/** + * struct protected_memory_allocator_device - Device structure for protected + * memory allocator + * + * @ops: Callbacks associated with this device + * @owner: Pointer to the module owner + * + * In order for a system integrator to provide custom behaviors for protected + * memory operations performed by the kbase module (controller driver), + * they shall provide a platform-specific driver module which implements + * this interface. + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). + */ +struct protected_memory_allocator_device { + struct protected_memory_allocator_ops ops; + struct module *owner; +}; + +#endif /* _PROTECTED_MEMORY_ALLOCATOR_H_ */ diff --git a/bifrost/r25p0/kernel/include/linux/protected_mode_switcher.h b/bifrost/r25p0/kernel/include/linux/protected_mode_switcher.h new file mode 100644 index 0000000..8778d81 --- /dev/null +++ b/bifrost/r25p0/kernel/include/linux/protected_mode_switcher.h @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * SPDX-License-Identifier: GPL-2.0 + * + */ + +#ifndef _PROTECTED_MODE_SWITCH_H_ +#define _PROTECTED_MODE_SWITCH_H_ + +struct protected_mode_device; + +/** + * struct protected_mode_ops - Callbacks for protected mode switch operations + * + * @protected_mode_enable: Callback to enable protected mode for device + * @protected_mode_disable: Callback to disable protected mode for device + */ +struct protected_mode_ops { + /** + * protected_mode_enable() - Enable protected mode on device + * @dev: The struct device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_enable)( + struct protected_mode_device *protected_dev); + + /** + * protected_mode_disable() - Disable protected mode on device, and + * reset device + * @dev: The struct device + * + * Return: 0 on success, non-zero on error + */ + int (*protected_mode_disable)( + struct protected_mode_device *protected_dev); +}; + +/** + * struct protected_mode_device - Device structure for protected mode devices + * + * @ops - Callbacks associated with this device + * @data - Pointer to device private data + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). + */ +struct protected_mode_device { + struct protected_mode_ops ops; + void *data; +}; + +#endif /* _PROTECTED_MODE_SWITCH_H_ */ diff --git a/bifrost/r25p0/kernel/license.txt b/bifrost/r25p0/kernel/license.txt new file mode 100644 index 0000000..77c14bd --- /dev/null +++ b/bifrost/r25p0/kernel/license.txt @@ -0,0 +1,198 @@ +GPLV2 LICENCE AGREEMENT FOR MALI GPUS LINUX KERNEL DEVICE DRIVERS SOURCE CODE + +THE USE OF THE SOFTWARE ACCOMPANYING THIS DOCUMENT IS EXPRESSLY SUBJECT TO THE TERMS OF THE GNU GENERAL PUBLIC LICENSE VERSION 2 AS PUBLISHED BY THE FREE SOFTWARE FOUNDATION AND SET OUT BELOW FOR REFERENCE (?GPL LICENCE?). ARM IS ONLY WILLING TO DISTRIBUTE THE SOFTWARE TO YOU ON CONDITION THAT YOU ACCEPT ALL OF THE TERMS IN THE GPL LICENCE PRIOR TO MODIFYING OR DISTRIBUTING THE SOFTWARE. + + + +Further for the period of three (3) years, ARM hereby offers to make available the source code of any part of the software program that is supplied as object code or in executable form. + + + +GPL Licence + + + +GNU GENERAL PUBLIC LICENSE + +Version 2, June 1991 + + + +Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + + + +Preamble + + + +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. + + + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. + + + +To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. + + + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + + + +We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. + + + +Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. + + + +Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. + + + +The precise terms and conditions for copying, distribution and modification follow. + + + +GNU GENERAL PUBLIC LICENSE + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + + +0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". + + + +Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). + +Whether that is true depends on what the Program does. + + + +1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; + +and give any other recipients of the Program a copy of this License along with the Program. + + + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + + + +2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + + + +a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. + + + +b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. + + + +c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) + + + +These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + + + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. + + + +In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + + + +3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: + + + +a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + + +b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + + +c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) + + + +The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + + + +If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. + + + +4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + + + +5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the + +Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. + + + +6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. + +You are not responsible for enforcing compliance by third parties to this License. + + + +7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. + + + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. + + + +It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + + + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + + + +8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + + + +9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + + + +Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. + + + +10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + + + +NO WARRANTY + + + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +/end + diff --git a/bifrost/r25p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch b/bifrost/r25p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch new file mode 100644 index 0000000..985d423 --- /dev/null +++ b/bifrost/r25p0/kernel/patches/trusted_firmware_gpu_coherency_toggle.patch @@ -0,0 +1,117 @@ +commit 5f0a5507eb6c9ed3745b89a1246bb7a7b6141857 +Author: Petter Lundanes +Date: Mon Feb 9 14:37:06 2015 +0100 + + Adds a service to enable/disable GPU cache snooping + + Adds a service to enable/disable cache snooping over + CCI for GPU on request from SMC (mali kernel driver). + +diff --git a/bl31/bl31.mk b/bl31/bl31.mk +index 4c25a60..cd0b530 100644 +--- a/bl31/bl31.mk ++++ b/bl31/bl31.mk +@@ -50,7 +50,8 @@ BL31_SOURCES += bl31/bl31_main.c \ + services/std_svc/psci/psci_helpers.S \ + services/std_svc/psci/psci_main.c \ + services/std_svc/psci/psci_setup.c \ +- services/std_svc/psci/psci_system_off.c ++ services/std_svc/psci/psci_system_off.c \ ++ services/sips/arm/css/svc.c + + ifeq (${USE_COHERENT_MEM}, 1) + BL31_SOURCES += lib/locks/bakery/bakery_lock_coherent.c +diff --git a/services/sips/arm/css/svc.c b/services/sips/arm/css/svc.c +new file mode 100644 +index 0000000..bbc63a4 +--- /dev/null ++++ b/services/sips/arm/css/svc.c +@@ -0,0 +1,88 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* This should be updated to the CCI port the GPU is connect to on your platform.*/ ++#define GPU_CCI_PORT 3U ++ ++/* ++ * See SMC calling convention for details. ++ * Bit 31 - 1 = Fast call. ++ * Bits 29:24 - 0x2 = SIP service call. ++ * Bits 15:00 - Function number. ++ * ++ * These must correspond to the IDs given in device tree for the device driver. ++ */ ++#define SMC_FID_ENABLE_COHERENCY (0x8200ff04) ++#define SMC_FID_DISABLE_COHERENCY (0x8200ff05) ++ ++/* Setup CSS Services */ ++static int32_t css_svc_setup(void) ++{ ++ return 0; ++} ++ ++static void cci_enable_gpu_coherency(unsigned int cciport) ++{ ++ /* Enable Snoops */ ++ mmio_write_32(CCI400_BASE + SLAVE_IFACE_OFFSET(cciport) + SNOOP_CTRL_REG, ++ (SNOOP_EN_BIT)); ++ ++ /* Wait for the dust to settle down */ ++ while (mmio_read_32(CCI400_BASE + STATUS_REG) & CHANGE_PENDING_BIT) ++ ; ++} ++static void cci_disable_gpu_coherency(unsigned int cciport) ++{ ++ /* Disable Snoops */ ++ mmio_write_32(CCI400_BASE + SLAVE_IFACE_OFFSET(cciport) + SNOOP_CTRL_REG, ++ 0U); ++ ++ /* Wait for the dust to settle down */ ++ while (mmio_read_32(CCI400_BASE + STATUS_REG) & CHANGE_PENDING_BIT) ++ ; ++} ++ ++/* ++ * Top-level CSS Service SMC handler. ++ */ ++uint64_t css_svc_smc_handler(uint32_t smc_fid, ++ uint64_t x1, ++ uint64_t x2, ++ uint64_t x3, ++ uint64_t x4, ++ void *cookie, ++ void *handle, ++ uint64_t flags) ++{ ++ switch (smc_fid) { ++ case SMC_FID_ENABLE_COHERENCY: ++ { ++ cci_enable_gpu_coherency(GPU_CCI_PORT); ++ SMC_RET1(handle, 0x0); ++ } ++ case SMC_FID_DISABLE_COHERENCY: ++ { ++ cci_disable_gpu_coherency(GPU_CCI_PORT); ++ SMC_RET1(handle, 0x0); ++ } ++ default: ++ WARN("Unimplemented CSS Service Call: 0x%x \n", smc_fid); ++ SMC_RET1(handle, SMC_UNK); ++ } ++} ++ ++/* Register as runtime service */ ++DECLARE_RT_SVC( ++ css_svc, ++ ++ OEN_SIP_START, ++ OEN_SIP_END, ++ SMC_TYPE_FAST, ++ css_svc_setup, ++ css_svc_smc_handler ++); diff --git a/bifrost/r25p0 b/bifrost/r32p1 similarity index 100% rename from bifrost/r25p0 rename to bifrost/r32p1 diff --git a/dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali b/dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali new file mode 100644 index 0000000..99f8ae5 --- /dev/null +++ b/dvalin/kernel/Documentation/ABI/testing/sysfs-device-mali @@ -0,0 +1,293 @@ +/* + * + * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation) and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program) and can also be obtained + * from Free Software Foundation) Inc.) 51 Franklin Street) Fifth Floor) + * Boston) MA 02110-1301) USA. + * + */ + +What: /sys/class/misc/mali%u/device/core_mask +Description: + This attribute is used to restrict the number of shader cores + available in this instance, is useful for debugging purposes. + Reading this attribute provides us mask of all cores available. + Writing to it will set the current core mask. Doesn't + allow disabling all the cores present in this instance. + +What: /sys/class/misc/mali%u/device/debug_command +Description: + This attribute is used to issue debug commands that supported + by the driver. On reading it provides the list of debug commands + that are supported, and writing back one of those commands will + enable that debug option. + +What: /sys/class/misc/mali%u/device/dvfs_period +Description: + This is used to set the DVFS sampling period to be used by the + driver, On reading it provides the current DVFS sampling period, + on writing a value we set the DVFS sampling period. + +What: /sys/class/misc/mali%u/device/dummy_job_wa_info +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU that requires a GPU workaround + to execute the dummy fragment job on all shader cores to + workaround a hang issue. + + Its a readonly attribute and on reading gives details on the + options used with the dummy workaround. + +What: /sys/class/misc/mali%u/device/fw_timeout +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. This attribute is + used to set the duration value in milliseconds for the + waiting timeout used for a GPU status change request being + acknowledged by the FW. + +What: /sys/class/misc/mali%u/device/gpuinfo +Description: + This attribute provides description of the present Mali GPU. + Its a read only attribute provides details like GPU family, the + number of cores, the hardware version and the raw product id. + +What: /sys/class/misc/mali%u/device/idle_hysteresis_time +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. This attribute is + used to set the duration value in milliseconds for the + configuring hysteresis field for determining GPU idle detection. + +What: /sys/class/misc/mali%u/device/js_ctx_scheduling_mode +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU. This attribute is used to set + context scheduling priority for a job slot. + + On Reading it provides the currently set job slot context + priority. + + Writing 0 to this attribute sets it to the mode were + higher priority atoms will be scheduled first, regardless of + the context they belong to. Newly-runnable higher priority atoms + can preempt lower priority atoms currently running on the GPU, + even if they belong to a different context. + + Writing 1 to this attribute set it to the mode were the + highest-priority atom will be chosen from each context in turn + using a round-robin algorithm, so priority only has an effect + within the context an atom belongs to. Newly-runnable higher + priority atoms can preempt the lower priority atoms currently + running on the GPU, but only if they belong to the same context. + +What: /sys/class/misc/mali%u/device/js_scheduling_period +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU. Used to set the job scheduler + tick period in nano-seconds. The Job Scheduler determines the + jobs that are run on the GPU, and for how long, Job Scheduler + makes decisions at a regular time interval determined by value + in js_scheduling_period. + +What: /sys/class/misc/mali%u/device/js_softstop_always +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU. Soft-stops are disabled when + only a single context is present, this attribute is used to + enable soft-stop when only a single context is present can be + used for debug and unit-testing purposes. + +What: /sys/class/misc/mali%u/device/js_timeouts +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU. It used to set the soft stop + and hard stop times for the job scheduler. + + Writing value 0 causes no change, or -1 to restore the + default timeout. + + The format used to set js_timeouts is + " + + " + + +What: /sys/class/misc/mali%u/device/lp_mem_pool_max_size +Description: + This attribute is used to set the maximum number of large pages + memory pools that the driver can contain. Large pages are of + size 2MB. On read it displays all the max size of all memory + pools and can be used to modify each individual pools as well. + +What: /sys/class/misc/mali%u/device/lp_mem_pool_size +Description: + This attribute is used to set the number of large memory pages + which should be populated, changing this value may cause + existing pages to be removed from the pool, or new pages to be + created and then added to the pool. On read it will provide + pool size for all available pools and we can modify individual + pool. + +What: /sys/class/misc/mali%u/device/mem_pool_max_size +Description: + This attribute is used to set the maximum number of small pages + for memory pools that the driver can contain. Here small pages + are of size 4KB. On read it will display the max size for all + available pools and allows us to set max size of + individual pools. + +What: /sys/class/misc/mali%u/device/mem_pool_size +Description: + This attribute is used to set the number of small memory pages + which should be populated, changing this value may cause + existing pages to be removed from the pool, or new pages to + be created and then added to the pool. On read it will provide + pool size for all available pools and we can modify individual + pool. + +What: /sys/class/misc/mali%u/device/device/mempool/ctx_default_max_size +Description: + This attribute is used to set maximum memory pool size for + all the memory pool so that the maximum amount of free memory + that each pool can hold is identical. + +What: /sys/class/misc/mali%u/device/device/mempool/lp_max_size +Description: + This attribute is used to set the maximum number of large pages + for all memory pools that the driver can contain. + Large pages are of size 2MB. + +What: /sys/class/misc/mali%u/device/device/mempool/max_size +Description: + This attribute is used to set the maximum number of small pages + for all the memory pools that the driver can contain. + Here small pages are of size 4KB. + +What: /sys/class/misc/mali%u/device/pm_poweroff +Description: + This attribute contains the current values, represented as the + following space-separated integers: + • PM_GPU_POWEROFF_TICK_NS. + • PM_POWEROFF_TICK_SHADER. + • PM_POWEROFF_TICK_GPU. + + Example: + echo 100000 4 4 > /sys/class/misc/mali0/device/pm_poweroff + + Sets the following new values: 100,000ns tick, four ticks + for shader power down, and four ticks for GPU power down. + +What: /sys/class/misc/mali%u/device/power_policy +Description: + This attribute is used to find the current power policy been + used, reading will list the power policies available and + enclosed in square bracket is the current one been selected. + + Example: + cat /sys/class/misc/mali0/device/power_policy + [demand] coarse_demand always_on + + To switch to a different policy at runtime write the valid entry + name back to the attribute. + + Example: + echo "coarse_demand" > /sys/class/misc/mali0/device/power_policy + +What: /sys/class/misc/mali%u/device/progress_timeout +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. This attribute + is used to set the progress timeout value and read the current + progress timeout value. + + Progress timeout value is the maximum number of GPU cycles + without forward progress to allow to elapse before terminating a + GPU command queue group. + +What: /sys/class/misc/mali%u/device/reset_timeout +Description: + This attribute is used to set the number of milliseconds to + wait for the soft stop to complete for the GPU jobs before + proceeding with the GPU reset. + +What: /sys/class/misc/mali%u/device/soft_job_timeout +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU. It used to set the timeout + value for waiting for any soft event to complete. + +What: /sys/class/misc/mali%u/device/scheduling/serialize_jobs +Description: + This attribute is available only with platform device that + supports a Job Manager based GPU. + + Various options available under this are: + • none - for disabling serialization. + • intra-slot - Serialize atoms within a slot, only one + atom per job slot. + • inter-slot - Serialize atoms between slots, only one + job slot running at any time. + • full - it a combination of both inter and intra slot, + so only one atom and one job slot running + at any time. + • full-reset - full serialization and Reset the GPU after + each atom completion + + These options are useful for debugging and investigating + failures and gpu hangs to narrow down atoms that could cause + troubles. + +What: /sys/class/misc/mali%u/device/firmware_config/Compute iterator count/* +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. Its a read-only attribute + which indicates the maximum number of Compute iterators + supported by the GPU. + +What: /sys/class/misc/mali%u/device/firmware_config/CSHWIF count/* +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. Its a read-only + attribute which indicates the maximum number of CSHWIFs + supported by the GPU. + +What: /sys/class/misc/mali%u/device/firmware_config/Fragment iterator count/* +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. Its a read-only + attribute which indicates the maximum number of + Fragment iterators supported by the GPU. + +What: /sys/class/misc/mali%u/device/firmware_config/Scoreboard set count/* +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. Its a read-only + attribute which indicates the maximum number of + Scoreboard set supported by the GPU. + +What: /sys/class/misc/mali%u/device/firmware_config/Tiler iterator count/* +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. Its a read-only + attribute which indicates the maximum number of Tiler iterators + supported by the GPU. + +What: /sys/class/misc/mali%u/device/firmware_config/Log verbosity/* +Description: + This attribute is available only with mali platform + device-driver that supports a CSF GPU. + + Used to enable firmware logs, logging levels valid values + are indicated using 'min and 'max' attribute values + values that are read-only. + + Log level can be set using the 'cur' read, write attribute, + we can use a valid log level value from min and max range values + and set a valid desired log level for firmware logs. diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt index dd8f733..a74d569 100644 --- a/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt +++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/mali-midgard.txt @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,8 +16,6 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # * ARM Mali Midgard / Bifrost devices @@ -46,12 +45,12 @@ Documentation/devicetree/bindings/regulator/regulator.txt for details. This is optional. - operating-points-v2 : Refer to Documentation/devicetree/bindings/power/mali-opp.txt for details. -- quirks_jm : Used to write to the JM_CONFIG register or equivalent. +- quirks_gpu : Used to write to the JM_CONFIG or CSF_CONFIG register. Should be used with care. Options passed here are used to override certain default behavior. Note: This will override 'idvs-group-size' field in devicetree and module param 'corestack_driver_control', - therefore if 'quirks_jm' is used then 'idvs-group-size' and - 'corestack_driver_control' value should be incorporated into 'quirks_jm'. + therefore if 'quirks_gpu' is used then 'idvs-group-size' and + 'corestack_driver_control' value should be incorporated into 'quirks_gpu'. - quirks_sc : Used to write to the SHADER_CONFIG register. Should be used with care. Options passed here are used to override certain default behavior. @@ -64,8 +63,8 @@ for details. - power_model : Sets the power model parameters. Defined power models include: "mali-simple-power-model", "mali-g51-power-model", "mali-g52-power-model", "mali-g52_r1-power-model", "mali-g71-power-model", "mali-g72-power-model", - "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model" - and "mali-tbex-power-model". + "mali-g76-power-model", "mali-g77-power-model", "mali-tnax-power-model", + "mali-tbex-power-model" and "mali-tbax-power-model". - mali-simple-power-model: this model derives the GPU power usage based on the GPU voltage scaled by the system temperature. Note: it was designed for the Juno platform, and may not be suitable for others. @@ -98,6 +97,8 @@ for details. are used at different points so care should be taken to configure both power models in the device tree (specifically dynamic-coefficient, static-coefficient and scale) to best match the platform. +- power_policy : Sets the GPU power policy at probe time. Available options are + "coarse_demand" and "always_on". If not set, then "coarse_demand" is used. - system-coherency : Sets the coherency protocol to be used for coherent accesses made from the GPU. If not set then no coherency is used. diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt index fda8f00..634973f 100644 --- a/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt +++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/memory_group_manager.txt @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,8 +16,6 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # * Arm memory group manager for Mali GPU device drivers diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt new file mode 100644 index 0000000..c7dd14f --- /dev/null +++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/priority_control_manager.txt @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +* Arm priority control manager for Mali GPU device drivers + +Required properties: + +- compatible: Must be "arm,priority-control-manager" + +An example node: + + gpu_priority_control_manager: priority-control-manager { + compatible = "arm,priority-control-manager"; + }; + +It must be referenced by the GPU as well, see priority-control-manager: + + gpu: gpu@0x6e000000 { + compatible = "arm,mali-midgard"; + reg = <0x0 0x6e000000 0x0 0x200000>; + interrupts = <0 168 4>, <0 168 4>, <0 168 4>; + interrupt-names = "JOB", "MMU", "GPU"; + clocks = <&scpi_dvfs 2>; + clock-names = "clk_mali"; + system-coherency = <31>; + priority-control-manager = <&gpu_priority_control_manager>; + operating-points = < + /* KHz uV */ + 50000 820000 + >; + }; diff --git a/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt b/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt index f054348..89a3cc7 100644 --- a/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt +++ b/dvalin/kernel/Documentation/devicetree/bindings/arm/protected_memory_allocator.txt @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,8 +16,6 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # * Arm protected memory allocator for Mali GPU device drivers diff --git a/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt b/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt index 49ed773..b9c0743 100644 --- a/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt +++ b/dvalin/kernel/Documentation/devicetree/bindings/power/mali-opp.txt @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2019-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,8 +16,6 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # * ARM Mali Midgard OPP @@ -54,7 +53,7 @@ Optional properties: - opp-core-count: Number of cores to use for this OPP. If this is present then the driver will build a core mask using the available core mask provided by - the GPU hardware. + the GPU hardware. An opp-core-count value of 0 is not permitted. If neither this nor opp-core-mask are present then all shader cores will be used for this OPP. diff --git a/dvalin/kernel/Documentation/dma-buf-test-exporter.txt b/dvalin/kernel/Documentation/dma-buf-test-exporter.txt index 8d8cbc9..b01020c 100644 --- a/dvalin/kernel/Documentation/dma-buf-test-exporter.txt +++ b/dvalin/kernel/Documentation/dma-buf-test-exporter.txt @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,10 +16,7 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - ===================== dma-buf-test-exporter @@ -42,5 +40,3 @@ It supports being compiled as a module both in-tree and out-of-tree. See include/linux/dma-buf-test-exporter.h for the ioctl interface. See Documentation/dma-buf-sharing.txt for details on dma_buf. - - diff --git a/dvalin/kernel/Mconfig b/dvalin/kernel/Mconfig index e451591..217715c 100644 --- a/dvalin/kernel/Mconfig +++ b/dvalin/kernel/Mconfig @@ -1,27 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# -# (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. # -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. # # -source "kernel/drivers/gpu/arm/midgard/Mconfig" -source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig" +menu "Kernel menu" -config DMA_BUF_SYNC_IOCTL_SUPPORTED - bool "Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" - depends on BACKEND_KERNEL - default y +source "kernel/drivers/base/arm/Mconfig" +source "kernel/drivers/gpu/arm/midgard/Mconfig" -config BUILD_CSF_ONLY_MODULE - bool "Build CSF GPU specific kernel modules" - depends on BUILD_KERNEL_MODULES && GPU_HAS_CSF - default y +endmenu diff --git a/dvalin/kernel/build.bp b/dvalin/kernel/build.bp index 2bc725f..c97da2c 100644 --- a/dvalin/kernel/build.bp +++ b/dvalin/kernel/build.bp @@ -1,15 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * */ @@ -25,6 +31,7 @@ bob_install_group { bob_defaults { name: "kernel_defaults", + build_by_default: false, enabled: false, exclude_srcs: [ "**/*.mod.c", @@ -33,6 +40,7 @@ bob_defaults { "include", ], build_kernel_modules: { + build_by_default: true, enabled: true, kernel_dir: "{{.kernel_dir}}", kernel_cross_compile: "{{.kernel_compiler}}", @@ -42,6 +50,8 @@ bob_defaults { kernel_ld: "{{.kernel_ld}}", }, install_group: "IG_kernel_modules", + add_to_alias: ["kernel"], + owner: "{{.android_module_owner}}", cflags: [ "-Wall", ], @@ -54,49 +64,12 @@ bob_defaults { "optional", ], }, - kbuild_options: [ - // Start of CS experimental features definitions. - // If there is nothing below, definition should be added as follows: - // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" - // experimental_feature above comes from Mconfig in - // /product/base/ - // However, in Mconfig, experimental_feature should be looked up (for - // similar explanation to this one) as ALLCAPS, i.e. - // EXPERIMENTAL_FEATURE. - // - // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it - // is an umbrella feature that would be open for inappropriate use - // (catch-all for experimental CS code without separating it into - // different features). - "MALI_JIT_PRESSURE_LIMIT={{.jit_pressure_limit}}", - "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}", - ], -} - -bob_defaults { - name: "kutf_includes", - local_include_dirs: [ - "drivers/gpu/arm/midgard/tests/include", - ], -} - -bob_defaults { - name: "kernel_test_includes", - defaults: ["kutf_includes"], - local_include_dirs: [ - "drivers/gpu/arm", - "drivers/gpu/arm/midgard", - "drivers/gpu/arm/midgard/backend/gpu", - "drivers/gpu/arm/midgard/debug", - "drivers/gpu/arm/midgard/debug/backend", - ], } +// Alias for all kernel modules. `kernel_defaults` uses `add_to_alias` to +// ensure any modules using that are included in this alias; any +// bob_kernel_modules not using those defaults must explicitly use +// `add_to_alias` or be listed here. bob_alias { name: "kernel", - srcs: [ - "dma-buf-test-exporter", - "memory_group_manager", - "mali_kbase", - ], } diff --git a/dvalin/kernel/drivers/base/arm/Kbuild b/dvalin/kernel/drivers/base/arm/Kbuild new file mode 100644 index 0000000..b0fbf93 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/Kbuild @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +# +# ccflags +# +ccflags-y += -I$(src)/../../../include + +subdir-ccflags-y += $(ccflags-y) + +# +# Kernel modules +# +obj-$(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER) += dma_buf_test_exporter/ +obj-$(CONFIG_MALI_MEMORY_GROUP_MANAGER) += memory_group_manager/ +obj-$(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR) += protected_memory_allocator/ + diff --git a/dvalin/kernel/drivers/base/arm/Kconfig b/dvalin/kernel/drivers/base/arm/Kconfig new file mode 100644 index 0000000..75d5434 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/Kconfig @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +menuconfig MALI_BASE_MODULES + bool "Mali Base extra modules" + default n + help + Enable this option to build support for a Arm Mali base modules. + Those modules provide extra features or debug interfaces and, + are optional for the use of the Mali GPU modules. + +config DMA_SHARED_BUFFER_TEST_EXPORTER + bool "Build dma-buf framework test exporter module" + depends on MALI_BASE_MODULES && DMA_SHARED_BUFFER + default y + help + This option will build the dma-buf framework test exporter module. + Usable to help test importers. + + Modules: + - dma-buf-test-exporter.ko + +config MALI_MEMORY_GROUP_MANAGER + bool "Build Mali Memory Group Manager module" + depends on MALI_BASE_MODULES + default y + help + This option will build the memory group manager module. + This is an example implementation for allocation and release of pages + for memory pools managed by Mali GPU device drivers. + + Modules: + - memory_group_manager.ko + +config MALI_PROTECTED_MEMORY_ALLOCATOR + bool "Build Mali Protected Memory Allocator module" + depends on MALI_BASE_MODULES && MALI_CSF_SUPPORT + default y + help + This option will build the protected memory allocator module. + This is an example implementation for allocation and release of pages + of secure memory intended to be used by the firmware + of Mali GPU device drivers. + + Modules: + - protected_memory_allocator.ko + diff --git a/dvalin/kernel/drivers/base/arm/Makefile b/dvalin/kernel/drivers/base/arm/Makefile new file mode 100644 index 0000000..0bd6ab5 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/Makefile @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +# +# Paths +# +KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build +KDIR ?= $(KERNEL_SRC) + +ifeq ($(KDIR),) + $(error Must specify KDIR to point to the kernel to target)) +endif + +vars := +# +# Default configuration values +# +CONFIG_MALI_BASE_MODULES ?= n + +ifeq ($(CONFIG_MALI_BASE_MODULES),y) + CONFIG_MALI_CSF_SUPPORT ?= n + + ifneq ($(CONFIG_DMA_SHARED_BUFFER),n) + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER ?= y + else + # Prevent misuse when CONFIG_DMA_SHARED_BUFFER=n + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n + endif + + CONFIG_MALI_MEMORY_GROUP_MANAGER ?= y + + ifneq ($(CONFIG_MALI_CSF_SUPPORT), n) + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= y + endif + +else + # Prevent misuse when CONFIG_MALI_BASE_MODULES=n + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER = n + CONFIG_MALI_MEMORY_GROUP_MANAGER = n + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR = n + +endif + +CONFIGS := \ + CONFIG_MALI_BASE_MODULES \ + CONFIG_MALI_CSF_SUPPORT \ + CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER \ + CONFIG_MALI_MEMORY_GROUP_MANAGER \ + CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR + + +# +# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs and values. +# $(value config) is the name of the CONFIG option. +# $(value $(value config)) is its value (y, m). +# When the CONFIG is not set to y or m, it defaults to n. +MAKE_ARGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + $(value config)=$(value $(value config)), \ + $(value config)=n)) + +# +# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs defines with values from CONFIGS. +# $(value config) is the name of the CONFIG option. +# When set to y or m, the CONFIG gets defined to 1. +EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + -D$(value config)=1)) + +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +modules_install: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean diff --git a/dvalin/kernel/drivers/base/arm/Mconfig b/dvalin/kernel/drivers/base/arm/Mconfig new file mode 100644 index 0000000..d21a72e --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/Mconfig @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +menuconfig MALI_BASE_MODULES + bool "Mali Base extra modules" + default y if BACKEND_KERNEL + help + Enable this option to build support for a Arm Mali base modules. + Those modules provide extra features or debug interfaces and, + are optional for the use of the Mali GPU modules. + +config DMA_SHARED_BUFFER_TEST_EXPORTER + bool "Build dma-buf framework test exporter module" + depends on MALI_BASE_MODULES + default y + help + This option will build the dma-buf framework test exporter module. + Usable to help test importers. + + Modules: + - dma-buf-test-exporter.ko + +config MALI_MEMORY_GROUP_MANAGER + bool "Build Mali Memory Group Manager module" + depends on MALI_BASE_MODULES + default y + help + This option will build the memory group manager module. + This is an example implementation for allocation and release of pages + for memory pools managed by Mali GPU device drivers. + + Modules: + - memory_group_manager.ko + +config MALI_PROTECTED_MEMORY_ALLOCATOR + bool "Build Mali Protected Memory Allocator module" + depends on MALI_BASE_MODULES && GPU_HAS_CSF + default y + help + This option will build the protected memory allocator module. + This is an example implementation for allocation and release of pages + of secure memory intended to be used by the firmware + of Mali GPU device drivers. + + Modules: + - protected_memory_allocator.ko + diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild new file mode 100644 index 0000000..c7ae332 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Kbuild @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +ifeq ($(CONFIG_DMA_SHARED_BUFFER), y) +obj-m := dma_buf_lock.o +endif diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile new file mode 100644 index 0000000..451d2d7 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +# linux build system bootstrap for out-of-tree module + +# default to building for the host +ARCH ?= $(shell uname -m) + +# Handle Android Common Kernel source naming +KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build +KDIR ?= $(KERNEL_SRC) + +all: dma_buf_lock + +dma_buf_lock: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../../include" + +clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + +modules_install: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) modules_install diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c new file mode 100644 index 0000000..f5ab1ed --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.c @@ -0,0 +1,898 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2012-2014, 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + +#include + +#define dma_fence_context_alloc(a) fence_context_alloc(a) +#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) +#define dma_fence_get(a) fence_get(a) +#define dma_fence_put(a) fence_put(a) +#define dma_fence_signal(a) fence_signal(a) +#define dma_fence_is_signaled(a) fence_is_signaled(a) +#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) +#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) + +#if (KERNEL_VERSION(4, 9, 68) > LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#else +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) +#endif + +#else + +#include + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ + (a)->status ?: 1 \ + : 0) +#endif + +#endif /* < 4.10.0 */ + +#include "dma_buf_lock.h" + +/* Maximum number of buffers that a single handle can address */ +#define DMA_BUF_LOCK_BUF_MAX 32 + +#define DMA_BUF_LOCK_DEBUG 1 + +#define DMA_BUF_LOCK_INIT_BIAS 0xFF + +static dev_t dma_buf_lock_dev; +static struct cdev dma_buf_lock_cdev; +static struct class *dma_buf_lock_class; +static char dma_buf_lock_dev_name[] = "dma_buf_lock"; + +#ifdef HAVE_UNLOCKED_IOCTL +static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +#else +static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +#endif + +static struct file_operations dma_buf_lock_fops = +{ + .owner = THIS_MODULE, +#ifdef HAVE_UNLOCKED_IOCTL + .unlocked_ioctl = dma_buf_lock_ioctl, +#else + .ioctl = dma_buf_lock_ioctl, +#endif + .compat_ioctl = dma_buf_lock_ioctl, +}; + +typedef struct dma_buf_lock_resource +{ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence fence; +#else + struct dma_fence fence; +#endif + int *list_of_dma_buf_fds; /* List of buffers copied from userspace */ + atomic_t locked; /* Status of lock */ + struct dma_buf **dma_bufs; + unsigned long exclusive; /* Exclusive access bitmap */ + atomic_t fence_dep_count; /* Number of dma-fence dependencies */ + struct list_head dma_fence_callbacks; /* list of all callbacks set up to wait on other fences */ + wait_queue_head_t wait; + struct kref refcount; + struct list_head link; + struct work_struct work; + int count; +} dma_buf_lock_resource; + +/** + * struct dma_buf_lock_fence_cb - Callback data struct for dma-fence + * @fence_cb: Callback function + * @fence: Pointer to the fence object on which this callback is waiting + * @res: Pointer to dma_buf_lock_resource that is waiting on this callback + * @node: List head for linking this callback to the lock resource + */ +struct dma_buf_lock_fence_cb { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence_cb fence_cb; + struct fence *fence; +#else + struct dma_fence_cb fence_cb; + struct dma_fence *fence; +#endif + struct dma_buf_lock_resource *res; + struct list_head node; +}; + +static LIST_HEAD(dma_buf_lock_resource_list); +static DEFINE_MUTEX(dma_buf_lock_mutex); + +static inline int is_dma_buf_lock_file(struct file *); +static void dma_buf_lock_dounlock(struct kref *ref); + + +/*** dma_buf_lock fence part ***/ + +/* Spin lock protecting all Mali fences as fence->lock. */ +static DEFINE_SPINLOCK(dma_buf_lock_fence_lock); + +static const char * +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +dma_buf_lock_fence_get_driver_name(struct fence *fence) +#else +dma_buf_lock_fence_get_driver_name(struct dma_fence *fence) +#endif +{ + return "dma_buf_lock"; +} + +static const char * +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +dma_buf_lock_fence_get_timeline_name(struct fence *fence) +#else +dma_buf_lock_fence_get_timeline_name(struct dma_fence *fence) +#endif +{ + return "dma_buf_lock.timeline"; +} + +static bool +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +dma_buf_lock_fence_enable_signaling(struct fence *fence) +#else +dma_buf_lock_fence_enable_signaling(struct dma_fence *fence) +#endif +{ + return true; +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +const struct fence_ops dma_buf_lock_fence_ops = { + .wait = fence_default_wait, +#else +const struct dma_fence_ops dma_buf_lock_fence_ops = { + .wait = dma_fence_default_wait, +#endif + .get_driver_name = dma_buf_lock_fence_get_driver_name, + .get_timeline_name = dma_buf_lock_fence_get_timeline_name, + .enable_signaling = dma_buf_lock_fence_enable_signaling, +}; + +static void +dma_buf_lock_fence_init(dma_buf_lock_resource *resource) +{ + dma_fence_init(&resource->fence, + &dma_buf_lock_fence_ops, + &dma_buf_lock_fence_lock, + 0, + 0); +} + +static void +dma_buf_lock_fence_free_callbacks(dma_buf_lock_resource *resource) +{ + struct dma_buf_lock_fence_cb *cb, *tmp; + + /* Clean up and free callbacks. */ + list_for_each_entry_safe(cb, tmp, &resource->dma_fence_callbacks, node) { + /* Cancel callbacks that hasn't been called yet and release the + * reference taken in dma_buf_lock_fence_add_callback(). + */ + dma_fence_remove_callback(cb->fence, &cb->fence_cb); + dma_fence_put(cb->fence); + list_del(&cb->node); + kfree(cb); + } +} + +static void +dma_buf_lock_fence_work(struct work_struct *pwork) +{ + dma_buf_lock_resource *resource = + container_of(pwork, dma_buf_lock_resource, work); + + WARN_ON(atomic_read(&resource->fence_dep_count)); + WARN_ON(!atomic_read(&resource->locked)); + WARN_ON(!resource->exclusive); + + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); +} + +static void +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +dma_buf_lock_fence_callback(struct fence *fence, struct fence_cb *cb) +#else +dma_buf_lock_fence_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +#endif +{ + struct dma_buf_lock_fence_cb *dma_buf_lock_cb = container_of(cb, + struct dma_buf_lock_fence_cb, + fence_cb); + dma_buf_lock_resource *resource = dma_buf_lock_cb->res; + +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_fence_callback\n"); +#endif + + /* Callback function will be invoked in atomic context. */ + + if (atomic_dec_and_test(&resource->fence_dep_count)) { + atomic_set(&resource->locked, 1); + wake_up(&resource->wait); + + if (resource->exclusive) + /* Warn if the work was already queued */ + WARN_ON(!schedule_work(&resource->work)); + } +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static int +dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource, + struct fence *fence, + fence_func_t callback) +#else +static int +dma_buf_lock_fence_add_callback(dma_buf_lock_resource *resource, + struct dma_fence *fence, + dma_fence_func_t callback) +#endif +{ + int err = 0; + struct dma_buf_lock_fence_cb *fence_cb; + + if (!fence) + return -EINVAL; + + fence_cb = kmalloc(sizeof(*fence_cb), GFP_KERNEL); + if (!fence_cb) + return -ENOMEM; + + fence_cb->fence = fence; + fence_cb->res = resource; + INIT_LIST_HEAD(&fence_cb->node); + + err = dma_fence_add_callback(fence, &fence_cb->fence_cb, + callback); + + if (err == -ENOENT) { + /* Fence signaled, get the completion result */ + err = dma_fence_get_status(fence); + + /* remap success completion to err code */ + if (err == 1) + err = 0; + + kfree(fence_cb); + } else if (err) { + kfree(fence_cb); + } else { + /* + * Get reference to fence that will be kept until callback gets + * cleaned up in dma_buf_lock_fence_free_callbacks(). + */ + dma_fence_get(fence); + atomic_inc(&resource->fence_dep_count); + /* Add callback to resource's list of callbacks */ + list_add(&fence_cb->node, &resource->dma_fence_callbacks); + } + + return err; +} + +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) +static int +dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource, + struct reservation_object *resv, + bool exclusive) +#else +static int +dma_buf_lock_add_fence_reservation_callback(dma_buf_lock_resource *resource, + struct dma_resv *resv, + bool exclusive) +#endif +{ +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *excl_fence = NULL; + struct fence **shared_fences = NULL; +#else + struct dma_fence *excl_fence = NULL; + struct dma_fence **shared_fences = NULL; +#endif + unsigned int shared_count = 0; + int err, i; + +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + err = reservation_object_get_fences_rcu( +#else + err = dma_resv_get_fences_rcu( +#endif + resv, + &excl_fence, + &shared_count, + &shared_fences); + if (err) + return err; + + if (excl_fence) { + err = dma_buf_lock_fence_add_callback(resource, + excl_fence, + dma_buf_lock_fence_callback); + + /* Release our reference, taken by reservation_object_get_fences_rcu(), + * to the fence. We have set up our callback (if that was possible), + * and it's the fence's owner is responsible for singling the fence + * before allowing it to disappear. + */ + dma_fence_put(excl_fence); + + if (err) + goto out; + } + + if (exclusive) { + for (i = 0; i < shared_count; i++) { + err = dma_buf_lock_fence_add_callback(resource, + shared_fences[i], + dma_buf_lock_fence_callback); + if (err) + goto out; + } + } + + /* Release all our references to the shared fences, taken by + * reservation_object_get_fences_rcu(). We have set up our callback (if + * that was possible), and it's the fence's owner is responsible for + * signaling the fence before allowing it to disappear. + */ +out: + for (i = 0; i < shared_count; i++) + dma_fence_put(shared_fences[i]); + kfree(shared_fences); + + return err; +} + +static void +dma_buf_lock_release_fence_reservation(dma_buf_lock_resource *resource, + struct ww_acquire_ctx *ctx) +{ + unsigned int r; + + for (r = 0; r < resource->count; r++) + ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock); + ww_acquire_fini(ctx); +} + +static int +dma_buf_lock_acquire_fence_reservation(dma_buf_lock_resource *resource, + struct ww_acquire_ctx *ctx) +{ +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *content_resv = NULL; +#else + struct dma_resv *content_resv = NULL; +#endif + unsigned int content_resv_idx = 0; + unsigned int r; + int err = 0; + + ww_acquire_init(ctx, &reservation_ww_class); + +retry: + for (r = 0; r < resource->count; r++) { + if (resource->dma_bufs[r]->resv == content_resv) { + content_resv = NULL; + continue; + } + + err = ww_mutex_lock(&resource->dma_bufs[r]->resv->lock, ctx); + if (err) + goto error; + } + + ww_acquire_done(ctx); + return err; + +error: + content_resv_idx = r; + + /* Unlock the locked one ones */ + while (r--) + ww_mutex_unlock(&resource->dma_bufs[r]->resv->lock); + + if (content_resv) + ww_mutex_unlock(&content_resv->lock); + + /* If we deadlock try with lock_slow and retry */ + if (err == -EDEADLK) { +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "deadlock at dma_buf fd %i\n", + resource->list_of_dma_buf_fds[content_resv_idx]); +#endif + content_resv = resource->dma_bufs[content_resv_idx]->resv; + ww_mutex_lock_slow(&content_resv->lock, ctx); + goto retry; + } + + /* If we are here the function failed */ + ww_acquire_fini(ctx); + return err; +} + +static int dma_buf_lock_handle_release(struct inode *inode, struct file *file) +{ + dma_buf_lock_resource *resource; + + if (!is_dma_buf_lock_file(file)) + return -EINVAL; + + resource = file->private_data; +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_handle_release\n"); +#endif + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + + return 0; +} + +static unsigned int dma_buf_lock_handle_poll(struct file *file, + struct poll_table_struct *wait) +{ + dma_buf_lock_resource *resource; + unsigned int ret = 0; + + if (!is_dma_buf_lock_file(file)) + return POLLERR; + + resource = file->private_data; +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_handle_poll\n"); +#endif + if (atomic_read(&resource->locked) == 1) { + /* Resources have been locked */ + ret = POLLIN | POLLRDNORM; + if (resource->exclusive) + ret |= POLLOUT | POLLWRNORM; + } + else + { + if (!poll_does_not_wait(wait)) + poll_wait(file, &resource->wait, wait); + } +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_handle_poll : return %i\n", ret); +#endif + return ret; +} + +static const struct file_operations dma_buf_lock_handle_fops = { + .owner = THIS_MODULE, + .release = dma_buf_lock_handle_release, + .poll = dma_buf_lock_handle_poll, +}; + +/* + * is_dma_buf_lock_file - Check if struct file* is associated with dma_buf_lock + */ +static inline int is_dma_buf_lock_file(struct file *file) +{ + return file->f_op == &dma_buf_lock_handle_fops; +} + + + +/* + * Start requested lock. + * + * Allocates required memory, copies dma_buf_fd list from userspace, + * acquires related reservation objects, and starts the lock. + */ +static int dma_buf_lock_dolock(dma_buf_lock_k_request *request) +{ + dma_buf_lock_resource *resource; + struct ww_acquire_ctx ww_ctx; + int size; + int fd; + int i; + int ret; + + if (request->list_of_dma_buf_fds == NULL) + return -EINVAL; + if (request->count <= 0) + return -EINVAL; + if (request->count > DMA_BUF_LOCK_BUF_MAX) + return -EINVAL; + if (request->exclusive != DMA_BUF_LOCK_NONEXCLUSIVE && + request->exclusive != DMA_BUF_LOCK_EXCLUSIVE) + return -EINVAL; + + resource = kzalloc(sizeof(dma_buf_lock_resource), GFP_KERNEL); + if (resource == NULL) + return -ENOMEM; + + atomic_set(&resource->locked, 0); + kref_init(&resource->refcount); + INIT_LIST_HEAD(&resource->link); + INIT_WORK(&resource->work, dma_buf_lock_fence_work); + resource->count = request->count; + + /* Allocate space to store dma_buf_fds received from user space */ + size = request->count * sizeof(int); + resource->list_of_dma_buf_fds = kmalloc(size, GFP_KERNEL); + + if (resource->list_of_dma_buf_fds == NULL) { + kfree(resource); + return -ENOMEM; + } + + /* Allocate space to store dma_buf pointers associated with dma_buf_fds */ + size = sizeof(struct dma_buf *) * request->count; + resource->dma_bufs = kmalloc(size, GFP_KERNEL); + + if (resource->dma_bufs == NULL) { + kfree(resource->list_of_dma_buf_fds); + kfree(resource); + return -ENOMEM; + } + + /* Copy requested list of dma_buf_fds from user space */ + size = request->count * sizeof(int); + if (copy_from_user(resource->list_of_dma_buf_fds, + (void __user *)request->list_of_dma_buf_fds, + size) != 0) { + kfree(resource->list_of_dma_buf_fds); + kfree(resource->dma_bufs); + kfree(resource); + return -ENOMEM; + } +#if DMA_BUF_LOCK_DEBUG + for (i = 0; i < request->count; i++) + printk("dma_buf %i = %X\n", i, resource->list_of_dma_buf_fds[i]); +#endif + + /* Initialize the fence associated with dma_buf_lock resource */ + dma_buf_lock_fence_init(resource); + + INIT_LIST_HEAD(&resource->dma_fence_callbacks); + + atomic_set(&resource->fence_dep_count, DMA_BUF_LOCK_INIT_BIAS); + + /* Add resource to global list */ + mutex_lock(&dma_buf_lock_mutex); + + list_add(&resource->link, &dma_buf_lock_resource_list); + + mutex_unlock(&dma_buf_lock_mutex); + + for (i = 0; i < request->count; i++) + { + /* Convert fd into dma_buf structure */ + resource->dma_bufs[i] = dma_buf_get(resource->list_of_dma_buf_fds[i]); + + if (IS_ERR_VALUE(PTR_ERR(resource->dma_bufs[i]))) + { + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + return -EINVAL; + } + + /*Check the reservation object associated with dma_buf */ + if (resource->dma_bufs[i]->resv == NULL) { + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + return -EINVAL; + } +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_dolock : dma_buf_fd %i dma_buf %p dma_fence reservation %p\n", + resource->list_of_dma_buf_fds[i], resource->dma_bufs[i], resource->dma_bufs[i]->resv); +#endif + } + + init_waitqueue_head(&resource->wait); + + kref_get(&resource->refcount); + + /* Create file descriptor associated with lock request */ + fd = anon_inode_getfd("dma_buf_lock", &dma_buf_lock_handle_fops, + (void *)resource, 0); + if (fd < 0) + { + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + return fd; + } + + resource->exclusive = request->exclusive; + + /* Start locking process */ + ret = dma_buf_lock_acquire_fence_reservation(resource, &ww_ctx); + if (ret) { +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d locking reservations.\n", ret); +#endif + put_unused_fd(fd); + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + return ret; + } + + /* Take an extra reference for exclusive access, which will be dropped + * once the pre-existing fences attached to dma-buf resources, for which + * we have commited for exclusive access, are signaled. + * At a given time there can be only one exclusive fence attached to a + * reservation object, so the new exclusive fence replaces the original + * fence and the future sync is done against the new fence which is + * supposed to be signaled only after the original fence was signaled. + * If the new exclusive fence is signaled prematurely then the resources + * would become available for new access while they are already being + * written to by the original owner. + */ + if (resource->exclusive) + kref_get(&resource->refcount); + + for (i = 0; i < request->count; i++) { +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + struct reservation_object *resv = resource->dma_bufs[i]->resv; +#else + struct dma_resv *resv = resource->dma_bufs[i]->resv; +#endif + if (!test_bit(i, &resource->exclusive)) { + +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + ret = reservation_object_reserve_shared(resv); +#else + ret = dma_resv_reserve_shared(resv, 0); +#endif + if (ret) { +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d reserving space for shared fence.\n", ret); +#endif + break; + } + + ret = dma_buf_lock_add_fence_reservation_callback(resource, + resv, + false); + if (ret) { +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret); +#endif + break; + } + +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + reservation_object_add_shared_fence(resv, &resource->fence); +#else + dma_resv_add_shared_fence(resv, &resource->fence); +#endif + } else { + ret = dma_buf_lock_add_fence_reservation_callback(resource, + resv, + true); + if (ret) { +#if DMA_BUF_LOCK_DEBUG + printk(KERN_DEBUG "dma_buf_lock_dolock : Error %d adding reservation to callback.\n", ret); +#endif + break; + } + +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + reservation_object_add_excl_fence(resv, &resource->fence); +#else + dma_resv_add_excl_fence(resv, &resource->fence); +#endif + } + } + + dma_buf_lock_release_fence_reservation(resource, &ww_ctx); + + /* Test if the callbacks were already triggered */ + if (!atomic_sub_return(DMA_BUF_LOCK_INIT_BIAS, &resource->fence_dep_count)) { + atomic_set(&resource->locked, 1); + + /* Drop the extra reference taken for exclusive access */ + if (resource->exclusive) + dma_buf_lock_fence_work(&resource->work); + } + + if (IS_ERR_VALUE((unsigned long)ret)) + { + put_unused_fd(fd); + + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + + return ret; + } + +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_dolock : complete\n"); +#endif + mutex_lock(&dma_buf_lock_mutex); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + + return fd; +} + +static void dma_buf_lock_dounlock(struct kref *ref) +{ + int i; + dma_buf_lock_resource *resource = container_of(ref, dma_buf_lock_resource, refcount); + + atomic_set(&resource->locked, 0); + + /* Signal the resource's fence. */ + dma_fence_signal(&resource->fence); + + dma_buf_lock_fence_free_callbacks(resource); + + list_del(&resource->link); + + for (i = 0; i < resource->count; i++) + { + if (resource->dma_bufs[i]) + dma_buf_put(resource->dma_bufs[i]); + } + + kfree(resource->dma_bufs); + kfree(resource->list_of_dma_buf_fds); + dma_fence_put(&resource->fence); +} + +static int __init dma_buf_lock_init(void) +{ + int err; +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_init\n"); +#endif + err = alloc_chrdev_region(&dma_buf_lock_dev, 0, 1, dma_buf_lock_dev_name); + + if (err == 0) { + cdev_init(&dma_buf_lock_cdev, &dma_buf_lock_fops); + + err = cdev_add(&dma_buf_lock_cdev, dma_buf_lock_dev, 1); + + if (err == 0) { + dma_buf_lock_class = class_create(THIS_MODULE, dma_buf_lock_dev_name); + if (IS_ERR(dma_buf_lock_class)) + err = PTR_ERR(dma_buf_lock_class); + else + { + struct device *mdev; + mdev = device_create(dma_buf_lock_class, NULL, dma_buf_lock_dev, NULL, dma_buf_lock_dev_name); + if (!IS_ERR(mdev)) + return 0; + + err = PTR_ERR(mdev); + class_destroy(dma_buf_lock_class); + } + cdev_del(&dma_buf_lock_cdev); + } + + unregister_chrdev_region(dma_buf_lock_dev, 1); + } +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_init failed\n"); +#endif + return err; +} + +static void __exit dma_buf_lock_exit(void) +{ +#if DMA_BUF_LOCK_DEBUG + printk("dma_buf_lock_exit\n"); +#endif + + /* Unlock all outstanding references */ + while (1) + { + mutex_lock(&dma_buf_lock_mutex); + if (list_empty(&dma_buf_lock_resource_list)) + { + mutex_unlock(&dma_buf_lock_mutex); + break; + } + else + { + dma_buf_lock_resource *resource = list_entry(dma_buf_lock_resource_list.next, + dma_buf_lock_resource, link); + kref_put(&resource->refcount, dma_buf_lock_dounlock); + mutex_unlock(&dma_buf_lock_mutex); + } + } + + device_destroy(dma_buf_lock_class, dma_buf_lock_dev); + + class_destroy(dma_buf_lock_class); + + cdev_del(&dma_buf_lock_cdev); + + unregister_chrdev_region(dma_buf_lock_dev, 1); +} + +#ifdef HAVE_UNLOCKED_IOCTL +static long dma_buf_lock_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +#else +static int dma_buf_lock_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) +#endif +{ + dma_buf_lock_k_request request; + int size = _IOC_SIZE(cmd); + + if (_IOC_TYPE(cmd) != DMA_BUF_LOCK_IOC_MAGIC) + return -ENOTTY; + if ((_IOC_NR(cmd) < DMA_BUF_LOCK_IOC_MINNR) || (_IOC_NR(cmd) > DMA_BUF_LOCK_IOC_MAXNR)) + return -ENOTTY; + + switch (cmd) + { + case DMA_BUF_LOCK_FUNC_LOCK_ASYNC: + if (size != sizeof(dma_buf_lock_k_request)) + return -ENOTTY; + if (copy_from_user(&request, (void __user *)arg, size)) + return -EFAULT; +#if DMA_BUF_LOCK_DEBUG + printk("DMA_BUF_LOCK_FUNC_LOCK_ASYNC - %i\n", request.count); +#endif + return dma_buf_lock_dolock(&request); + } + + return -ENOTTY; +} + +module_init(dma_buf_lock_init); +module_exit(dma_buf_lock_exit); + +MODULE_LICENSE("GPL"); + diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h new file mode 100644 index 0000000..104af1f --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_lock/src/dma_buf_lock.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _DMA_BUF_LOCK_H +#define _DMA_BUF_LOCK_H + +typedef enum dma_buf_lock_exclusive +{ + DMA_BUF_LOCK_NONEXCLUSIVE = 0, + DMA_BUF_LOCK_EXCLUSIVE = -1 +} dma_buf_lock_exclusive; + +typedef struct dma_buf_lock_k_request +{ + int count; + int *list_of_dma_buf_fds; + int timeout; + dma_buf_lock_exclusive exclusive; +} dma_buf_lock_k_request; + +#define DMA_BUF_LOCK_IOC_MAGIC '~' + +#define DMA_BUF_LOCK_FUNC_LOCK_ASYNC _IOW(DMA_BUF_LOCK_IOC_MAGIC, 11, dma_buf_lock_k_request) + +#define DMA_BUF_LOCK_IOC_MINNR 11 +#define DMA_BUF_LOCK_IOC_MAXNR 11 + +#endif /* _DMA_BUF_LOCK_H */ diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild new file mode 100644 index 0000000..0e20cb4 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/Kbuild @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +ifeq ($(CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER), y) +obj-m += dma-buf-test-exporter.o +endif diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp new file mode 100644 index 0000000..a49fb81 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/build.bp @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_kernel_module { + name: "dma-buf-test-exporter", + defaults: [ + "kernel_defaults" + ], + srcs: [ + "Kbuild", + "dma-buf-test-exporter.c", + ], + enabled: false, + dma_shared_buffer_test_exporter: { + kbuild_options: ["CONFIG_DMA_SHARED_BUFFER_TEST_EXPORTER=y"], + enabled: true, + }, +} diff --git a/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c new file mode 100644 index 0000000..ccf73cc --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/dma_buf_test_exporter/dma-buf-test-exporter.c @@ -0,0 +1,824 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) +#include +#endif +#include + +/* Maximum size allowed in a single DMA_BUF_TE_ALLOC call */ +#define DMA_BUF_TE_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ + +/* Since kernel version 5.0 CONFIG_ARCH_NO_SG_CHAIN replaced CONFIG_ARCH_HAS_SG_CHAIN */ +#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE +#if (!defined(ARCH_HAS_SG_CHAIN) && !defined(CONFIG_ARCH_HAS_SG_CHAIN)) +#define NO_SG_CHAIN +#endif +#elif defined(CONFIG_ARCH_NO_SG_CHAIN) +#define NO_SG_CHAIN +#endif + +struct dma_buf_te_alloc { + /* the real alloc */ + size_t nr_pages; + struct page **pages; + + /* the debug usage tracking */ + int nr_attached_devices; + int nr_device_mappings; + int nr_cpu_mappings; + + /* failure simulation */ + int fail_attach; + int fail_map; + int fail_mmap; + + bool contiguous; + dma_addr_t contig_dma_addr; + void *contig_cpu_addr; +}; + +struct dma_buf_te_attachment { + struct sg_table *sg; + bool attachment_mapped; +}; + +static struct miscdevice te_device; + +#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) +static int dma_buf_te_attach(struct dma_buf *buf, struct device *dev, struct dma_buf_attachment *attachment) +#else +static int dma_buf_te_attach(struct dma_buf *buf, struct dma_buf_attachment *attachment) +#endif +{ + struct dma_buf_te_alloc *alloc; + alloc = buf->priv; + + if (alloc->fail_attach) + return -EFAULT; + + attachment->priv = kzalloc(sizeof(struct dma_buf_te_attachment), GFP_KERNEL); + if (!attachment->priv) + return -ENOMEM; + + /* dma_buf is externally locked during call */ + alloc->nr_attached_devices++; + return 0; +} + +static void dma_buf_te_detach(struct dma_buf *buf, struct dma_buf_attachment *attachment) +{ + struct dma_buf_te_alloc *alloc = buf->priv; + struct dma_buf_te_attachment *pa = attachment->priv; + + /* dma_buf is externally locked during call */ + + WARN(pa->attachment_mapped, "WARNING: dma-buf-test-exporter detected detach with open device mappings"); + + alloc->nr_attached_devices--; + + kfree(pa); +} + +static struct sg_table *dma_buf_te_map(struct dma_buf_attachment *attachment, enum dma_data_direction direction) +{ + struct sg_table *sg; + struct scatterlist *iter; + struct dma_buf_te_alloc *alloc; + struct dma_buf_te_attachment *pa = attachment->priv; + size_t i; + int ret; + + alloc = attachment->dmabuf->priv; + + if (alloc->fail_map) + return ERR_PTR(-ENOMEM); + + if (WARN(pa->attachment_mapped, + "WARNING: Attempted to map already mapped attachment.")) + return ERR_PTR(-EBUSY); + +#ifdef NO_SG_CHAIN + /* if the ARCH can't chain we can't have allocs larger than a single sg can hold */ + if (alloc->nr_pages > SG_MAX_SINGLE_ALLOC) + return ERR_PTR(-EINVAL); +#endif /* NO_SG_CHAIN */ + + sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (!sg) + return ERR_PTR(-ENOMEM); + + /* from here we access the allocation object, so lock the dmabuf pointing to it */ + mutex_lock(&attachment->dmabuf->lock); + + if (alloc->contiguous) + ret = sg_alloc_table(sg, 1, GFP_KERNEL); + else + ret = sg_alloc_table(sg, alloc->nr_pages, GFP_KERNEL); + if (ret) { + mutex_unlock(&attachment->dmabuf->lock); + kfree(sg); + return ERR_PTR(ret); + } + + if (alloc->contiguous) { + sg_dma_len(sg->sgl) = alloc->nr_pages * PAGE_SIZE; + sg_set_page(sg->sgl, pfn_to_page(PFN_DOWN(alloc->contig_dma_addr)), alloc->nr_pages * PAGE_SIZE, 0); + sg_dma_address(sg->sgl) = alloc->contig_dma_addr; + } else { + for_each_sg(sg->sgl, iter, alloc->nr_pages, i) + sg_set_page(iter, alloc->pages[i], PAGE_SIZE, 0); + } + + if (!dma_map_sg(attachment->dev, sg->sgl, sg->nents, direction)) { + mutex_unlock(&attachment->dmabuf->lock); + sg_free_table(sg); + kfree(sg); + return ERR_PTR(-ENOMEM); + } + + alloc->nr_device_mappings++; + pa->attachment_mapped = true; + pa->sg = sg; + mutex_unlock(&attachment->dmabuf->lock); + return sg; +} + +static void dma_buf_te_unmap(struct dma_buf_attachment *attachment, + struct sg_table *sg, enum dma_data_direction direction) +{ + struct dma_buf_te_alloc *alloc; + struct dma_buf_te_attachment *pa = attachment->priv; + + alloc = attachment->dmabuf->priv; + + mutex_lock(&attachment->dmabuf->lock); + + WARN(!pa->attachment_mapped, "WARNING: Unmatched unmap of attachment."); + + alloc->nr_device_mappings--; + pa->attachment_mapped = false; + pa->sg = NULL; + mutex_unlock(&attachment->dmabuf->lock); + + dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, direction); + sg_free_table(sg); + kfree(sg); +} + +static void dma_buf_te_release(struct dma_buf *buf) +{ + size_t i; + struct dma_buf_te_alloc *alloc; + alloc = buf->priv; + /* no need for locking */ + + if (alloc->contiguous) { +#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE) + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, + alloc->contig_dma_addr, + DMA_ATTR_WRITE_COMBINE); +#else + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs); +#endif + } else { + for (i = 0; i < alloc->nr_pages; i++) + __free_page(alloc->pages[i]); + } +#if (KERNEL_VERSION(4, 12, 0) <= LINUX_VERSION_CODE) + kvfree(alloc->pages); +#else + kfree(alloc->pages); +#endif + kfree(alloc); +} + +static int dma_buf_te_sync(struct dma_buf *dmabuf, + enum dma_data_direction direction, + bool start_cpu_access) +{ + struct dma_buf_attachment *attachment; + + mutex_lock(&dmabuf->lock); + + list_for_each_entry(attachment, &dmabuf->attachments, node) { + struct dma_buf_te_attachment *pa = attachment->priv; + struct sg_table *sg = pa->sg; + if (!sg) { + dev_dbg(te_device.this_device, "no mapping for device %s\n", dev_name(attachment->dev)); + continue; + } + + if (start_cpu_access) { + dev_dbg(te_device.this_device, "sync cpu with device %s\n", dev_name(attachment->dev)); + + dma_sync_sg_for_cpu(attachment->dev, sg->sgl, sg->nents, direction); + } else { + dev_dbg(te_device.this_device, "sync device %s with cpu\n", dev_name(attachment->dev)); + + dma_sync_sg_for_device(attachment->dev, sg->sgl, sg->nents, direction); + } + } + + mutex_unlock(&dmabuf->lock); + return 0; +} + +#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) +static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, + enum dma_data_direction direction) +#else +static int dma_buf_te_begin_cpu_access(struct dma_buf *dmabuf, size_t start, + size_t len, + enum dma_data_direction direction) +#endif +{ + return dma_buf_te_sync(dmabuf, direction, true); +} + +#if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) +static int dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, + enum dma_data_direction direction) +{ + return dma_buf_te_sync(dmabuf, direction, false); +} +#else +static void dma_buf_te_end_cpu_access(struct dma_buf *dmabuf, size_t start, + size_t len, + enum dma_data_direction direction) +{ + dma_buf_te_sync(dmabuf, direction, false); +} +#endif + +static void dma_buf_te_mmap_open(struct vm_area_struct *vma) +{ + struct dma_buf *dma_buf; + struct dma_buf_te_alloc *alloc; + dma_buf = vma->vm_private_data; + alloc = dma_buf->priv; + + mutex_lock(&dma_buf->lock); + alloc->nr_cpu_mappings++; + mutex_unlock(&dma_buf->lock); +} + +static void dma_buf_te_mmap_close(struct vm_area_struct *vma) +{ + struct dma_buf *dma_buf; + struct dma_buf_te_alloc *alloc; + dma_buf = vma->vm_private_data; + alloc = dma_buf->priv; + + BUG_ON(alloc->nr_cpu_mappings <= 0); + mutex_lock(&dma_buf->lock); + alloc->nr_cpu_mappings--; + mutex_unlock(&dma_buf->lock); +} + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE +static int dma_buf_te_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +#elif KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE +static int dma_buf_te_mmap_fault(struct vm_fault *vmf) +#else +static vm_fault_t dma_buf_te_mmap_fault(struct vm_fault *vmf) +#endif +{ + struct dma_buf_te_alloc *alloc; + struct dma_buf *dmabuf; + struct page *pageptr; + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + dmabuf = vma->vm_private_data; +#else + dmabuf = vmf->vma->vm_private_data; +#endif + alloc = dmabuf->priv; + + if (vmf->pgoff > alloc->nr_pages) + return VM_FAULT_SIGBUS; + + pageptr = alloc->pages[vmf->pgoff]; + + BUG_ON(!pageptr); + + get_page(pageptr); + vmf->page = pageptr; + + return 0; +} + +struct vm_operations_struct dma_buf_te_vm_ops = { + .open = dma_buf_te_mmap_open, + .close = dma_buf_te_mmap_close, + .fault = dma_buf_te_mmap_fault +}; + +static int dma_buf_te_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + struct dma_buf_te_alloc *alloc; + alloc = dmabuf->priv; + + if (alloc->fail_mmap) + return -ENOMEM; + + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_ops = &dma_buf_te_vm_ops; + vma->vm_private_data = dmabuf; + + /* we fault in the pages on access */ + + /* call open to do the ref-counting */ + dma_buf_te_vm_ops.open(vma); + + return 0; +} + +#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE +static void *dma_buf_te_kmap_atomic(struct dma_buf *buf, unsigned long page_num) +{ + /* IGNORE */ + return NULL; +} +#endif + +static void *dma_buf_te_kmap(struct dma_buf *buf, unsigned long page_num) +{ + struct dma_buf_te_alloc *alloc; + + alloc = buf->priv; + if (page_num >= alloc->nr_pages) + return NULL; + + return kmap(alloc->pages[page_num]); +} +static void dma_buf_te_kunmap(struct dma_buf *buf, + unsigned long page_num, void *addr) +{ + struct dma_buf_te_alloc *alloc; + + alloc = buf->priv; + if (page_num >= alloc->nr_pages) + return; + + kunmap(alloc->pages[page_num]); + return; +} + +static struct dma_buf_ops dma_buf_te_ops = { + /* real handlers */ + .attach = dma_buf_te_attach, + .detach = dma_buf_te_detach, + .map_dma_buf = dma_buf_te_map, + .unmap_dma_buf = dma_buf_te_unmap, + .release = dma_buf_te_release, + .mmap = dma_buf_te_mmap, + .begin_cpu_access = dma_buf_te_begin_cpu_access, + .end_cpu_access = dma_buf_te_end_cpu_access, +#if KERNEL_VERSION(4, 12, 0) > LINUX_VERSION_CODE + .kmap = dma_buf_te_kmap, + .kunmap = dma_buf_te_kunmap, + + /* nop handlers for mandatory functions we ignore */ + .kmap_atomic = dma_buf_te_kmap_atomic +#else +#if KERNEL_VERSION(5, 6, 0) > LINUX_VERSION_CODE + .map = dma_buf_te_kmap, + .unmap = dma_buf_te_kunmap, +#endif + +#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE + /* nop handlers for mandatory functions we ignore */ + .map_atomic = dma_buf_te_kmap_atomic +#endif +#endif +}; + +static int do_dma_buf_te_ioctl_version(struct dma_buf_te_ioctl_version __user *buf) +{ + struct dma_buf_te_ioctl_version v; + + if (copy_from_user(&v, buf, sizeof(v))) + return -EFAULT; + + if (v.op != DMA_BUF_TE_ENQ) + return -EFAULT; + + v.op = DMA_BUF_TE_ACK; + v.major = DMA_BUF_TE_VER_MAJOR; + v.minor = DMA_BUF_TE_VER_MINOR; + + if (copy_to_user(buf, &v, sizeof(v))) + return -EFAULT; + else + return 0; +} + +static int do_dma_buf_te_ioctl_alloc(struct dma_buf_te_ioctl_alloc __user *buf, bool contiguous) +{ + struct dma_buf_te_ioctl_alloc alloc_req; + struct dma_buf_te_alloc *alloc; + struct dma_buf *dma_buf; + size_t i = 0; + size_t max_nr_pages = DMA_BUF_TE_ALLOC_MAX_SIZE; + int fd; + + if (copy_from_user(&alloc_req, buf, sizeof(alloc_req))) { + dev_err(te_device.this_device, "%s: couldn't get user data", __func__); + goto no_input; + } + + if (!alloc_req.size) { + dev_err(te_device.this_device, "%s: no size specified", __func__); + goto invalid_size; + } + +#ifdef NO_SG_CHAIN + /* Whilst it is possible to allocate larger buffer, we won't be able to + * map it during actual usage (mmap() still succeeds). We fail here so + * userspace code can deal with it early than having driver failure + * later on. + */ + if (max_nr_pages > SG_MAX_SINGLE_ALLOC) + max_nr_pages = SG_MAX_SINGLE_ALLOC; +#endif /* NO_SG_CHAIN */ + + if (alloc_req.size > max_nr_pages) { + dev_err(te_device.this_device, "%s: buffer size of %llu pages exceeded the mapping limit of %zu pages", + __func__, alloc_req.size, max_nr_pages); + goto invalid_size; + } + + alloc = kzalloc(sizeof(struct dma_buf_te_alloc), GFP_KERNEL); + if (alloc == NULL) { + dev_err(te_device.this_device, "%s: couldn't alloc object", __func__); + goto no_alloc_object; + } + + alloc->nr_pages = alloc_req.size; + alloc->contiguous = contiguous; + +#if (KERNEL_VERSION(4, 12, 0) <= LINUX_VERSION_CODE) + alloc->pages = kvzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL); +#else + alloc->pages = kzalloc(sizeof(struct page *) * alloc->nr_pages, GFP_KERNEL); +#endif + + if (!alloc->pages) { + dev_err(te_device.this_device, + "%s: couldn't alloc %zu page structures", + __func__, alloc->nr_pages); + goto free_alloc_object; + } + + if (contiguous) { + dma_addr_t dma_aux; + +#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE) + alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + &alloc->contig_dma_addr, + GFP_KERNEL | __GFP_ZERO, + DMA_ATTR_WRITE_COMBINE); +#else + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + alloc->contig_cpu_addr = dma_alloc_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + &alloc->contig_dma_addr, + GFP_KERNEL | __GFP_ZERO, &attrs); +#endif + if (!alloc->contig_cpu_addr) { + dev_err(te_device.this_device, "%s: couldn't alloc contiguous buffer %zu pages", + __func__, alloc->nr_pages); + goto free_page_struct; + } + dma_aux = alloc->contig_dma_addr; + for (i = 0; i < alloc->nr_pages; i++) { + alloc->pages[i] = pfn_to_page(PFN_DOWN(dma_aux)); + dma_aux += PAGE_SIZE; + } + } else { + for (i = 0; i < alloc->nr_pages; i++) { + alloc->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (alloc->pages[i] == NULL) { + dev_err(te_device.this_device, "%s: couldn't alloc page", __func__); + goto no_page; + } + } + } + + /* alloc ready, let's export it */ + { + struct dma_buf_export_info export_info = { + .exp_name = "dma_buf_te", + .owner = THIS_MODULE, + .ops = &dma_buf_te_ops, + .size = alloc->nr_pages << PAGE_SHIFT, + .flags = O_CLOEXEC | O_RDWR, + .priv = alloc, + }; + + dma_buf = dma_buf_export(&export_info); + } + + if (IS_ERR_OR_NULL(dma_buf)) { + dev_err(te_device.this_device, "%s: couldn't export dma_buf", __func__); + goto no_export; + } + + /* get fd for buf */ + fd = dma_buf_fd(dma_buf, O_CLOEXEC); + + if (fd < 0) { + dev_err(te_device.this_device, "%s: couldn't get fd from dma_buf", __func__); + goto no_fd; + } + + return fd; + +no_fd: + dma_buf_put(dma_buf); +no_export: + /* i still valid */ +no_page: + if (contiguous) { +#if (KERNEL_VERSION(4, 8, 0) <= LINUX_VERSION_CODE) + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, + alloc->contig_dma_addr, + DMA_ATTR_WRITE_COMBINE); +#else + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(te_device.this_device, + alloc->nr_pages * PAGE_SIZE, + alloc->contig_cpu_addr, alloc->contig_dma_addr, &attrs); +#endif + } else { + while (i-- > 0) + __free_page(alloc->pages[i]); + } +free_page_struct: +#if (KERNEL_VERSION(4, 12, 0) <= LINUX_VERSION_CODE) + kvfree(alloc->pages); +#else + kfree(alloc->pages); +#endif +free_alloc_object: + kfree(alloc); +no_alloc_object: +invalid_size: +no_input: + return -EFAULT; +} + +static int do_dma_buf_te_ioctl_status(struct dma_buf_te_ioctl_status __user *arg) +{ + struct dma_buf_te_ioctl_status status; + struct dma_buf *dmabuf; + struct dma_buf_te_alloc *alloc; + int res = -EINVAL; + + if (copy_from_user(&status, arg, sizeof(status))) + return -EFAULT; + + dmabuf = dma_buf_get(status.fd); + if (IS_ERR_OR_NULL(dmabuf)) + return -EINVAL; + + /* verify it's one of ours */ + if (dmabuf->ops != &dma_buf_te_ops) + goto err_have_dmabuf; + + /* ours, get the current status */ + alloc = dmabuf->priv; + + /* lock while reading status to take a snapshot */ + mutex_lock(&dmabuf->lock); + status.attached_devices = alloc->nr_attached_devices; + status.device_mappings = alloc->nr_device_mappings; + status.cpu_mappings = alloc->nr_cpu_mappings; + mutex_unlock(&dmabuf->lock); + + if (copy_to_user(arg, &status, sizeof(status))) + goto err_have_dmabuf; + + /* All OK */ + res = 0; + +err_have_dmabuf: + dma_buf_put(dmabuf); + return res; +} + +static int do_dma_buf_te_ioctl_set_failing(struct dma_buf_te_ioctl_set_failing __user *arg) +{ + struct dma_buf *dmabuf; + struct dma_buf_te_ioctl_set_failing f; + struct dma_buf_te_alloc *alloc; + int res = -EINVAL; + + if (copy_from_user(&f, arg, sizeof(f))) + return -EFAULT; + + dmabuf = dma_buf_get(f.fd); + if (IS_ERR_OR_NULL(dmabuf)) + return -EINVAL; + + /* verify it's one of ours */ + if (dmabuf->ops != &dma_buf_te_ops) + goto err_have_dmabuf; + + /* ours, set the fail modes */ + alloc = dmabuf->priv; + /* lock to set the fail modes atomically */ + mutex_lock(&dmabuf->lock); + alloc->fail_attach = f.fail_attach; + alloc->fail_map = f.fail_map; + alloc->fail_mmap = f.fail_mmap; + mutex_unlock(&dmabuf->lock); + + /* success */ + res = 0; + +err_have_dmabuf: + dma_buf_put(dmabuf); + return res; +} + +static u32 dma_te_buf_fill(struct dma_buf *dma_buf, unsigned int value) +{ + struct dma_buf_attachment *attachment; + struct sg_table *sgt; + struct scatterlist *sg; + unsigned int count; + unsigned int offset = 0; + int ret = 0; + size_t i; + + attachment = dma_buf_attach(dma_buf, te_device.this_device); + if (IS_ERR_OR_NULL(attachment)) + return -EBUSY; + + sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL); + if (IS_ERR_OR_NULL(sgt)) { + ret = PTR_ERR(sgt); + goto no_import; + } + + ret = dma_buf_begin_cpu_access(dma_buf, +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE + 0, dma_buf->size, +#endif + DMA_BIDIRECTIONAL); + if (ret) + goto no_cpu_access; + + for_each_sg(sgt->sgl, sg, sgt->nents, count) { + for (i = 0; i < sg_dma_len(sg); i = i + PAGE_SIZE) { + void *addr = NULL; +#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + addr = dma_buf_te_kmap(dma_buf, i >> PAGE_SHIFT); +#else + addr = dma_buf_kmap(dma_buf, i >> PAGE_SHIFT); +#endif + if (!addr) { + ret = -EPERM; + goto no_kmap; + } + memset(addr, value, PAGE_SIZE); +#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE + dma_buf_te_kunmap(dma_buf, i >> PAGE_SHIFT, addr); +#else + dma_buf_kunmap(dma_buf, i >> PAGE_SHIFT, addr); +#endif + } + offset += sg_dma_len(sg); + } + +no_kmap: + dma_buf_end_cpu_access(dma_buf, +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE + 0, dma_buf->size, +#endif + DMA_BIDIRECTIONAL); +no_cpu_access: + dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL); +no_import: + dma_buf_detach(dma_buf, attachment); + return ret; +} + +static int do_dma_buf_te_ioctl_fill(struct dma_buf_te_ioctl_fill __user *arg) +{ + + struct dma_buf *dmabuf; + struct dma_buf_te_ioctl_fill f; + int ret; + + if (copy_from_user(&f, arg, sizeof(f))) + return -EFAULT; + + dmabuf = dma_buf_get(f.fd); + if (IS_ERR_OR_NULL(dmabuf)) + return -EINVAL; + + ret = dma_te_buf_fill(dmabuf, f.value); + dma_buf_put(dmabuf); + + return ret; +} + +static long dma_buf_te_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case DMA_BUF_TE_VERSION: + return do_dma_buf_te_ioctl_version((struct dma_buf_te_ioctl_version __user *)arg); + case DMA_BUF_TE_ALLOC: + return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, false); + case DMA_BUF_TE_ALLOC_CONT: + return do_dma_buf_te_ioctl_alloc((struct dma_buf_te_ioctl_alloc __user *)arg, true); + case DMA_BUF_TE_QUERY: + return do_dma_buf_te_ioctl_status((struct dma_buf_te_ioctl_status __user *)arg); + case DMA_BUF_TE_SET_FAILING: + return do_dma_buf_te_ioctl_set_failing((struct dma_buf_te_ioctl_set_failing __user *)arg); + case DMA_BUF_TE_FILL: + return do_dma_buf_te_ioctl_fill((struct dma_buf_te_ioctl_fill __user *)arg); + default: + return -ENOTTY; + } +} + +static const struct file_operations dma_buf_te_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = dma_buf_te_ioctl, + .compat_ioctl = dma_buf_te_ioctl, +}; + +static int __init dma_buf_te_init(void) +{ + int res; + te_device.minor = MISC_DYNAMIC_MINOR; + te_device.name = "dma_buf_te"; + te_device.fops = &dma_buf_te_fops; + + res = misc_register(&te_device); + if (res) { + printk(KERN_WARNING"Misc device registration failed of 'dma_buf_te'\n"); + return res; + } + te_device.this_device->coherent_dma_mask = DMA_BIT_MASK(32); + + dev_info(te_device.this_device, "dma_buf_te ready\n"); + return 0; + +} + +static void __exit dma_buf_te_exit(void) +{ + misc_deregister(&te_device); +} + +module_init(dma_buf_te_init); +module_exit(dma_buf_te_exit); +MODULE_LICENSE("GPL"); diff --git a/dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild b/dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild new file mode 100644 index 0000000..99ce311 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/memory_group_manager/Kbuild @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +ifeq ($(CONFIG_MALI_MEMORY_GROUP_MANAGER), y) +obj-m := memory_group_manager.o +endif diff --git a/dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp b/dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp new file mode 100644 index 0000000..23db183 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/memory_group_manager/build.bp @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_kernel_module { + name: "memory_group_manager", + defaults: [ + "kernel_defaults" + ], + srcs: [ + "Kbuild", + "memory_group_manager.c", + ], + enabled: false, + mali_memory_group_manager: { + kbuild_options: ["CONFIG_MALI_MEMORY_GROUP_MANAGER=y"], + enabled: true, + }, +} diff --git a/dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c b/dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c new file mode 100644 index 0000000..a70fe70 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/memory_group_manager/memory_group_manager.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_DEBUG_FS) +#include +#endif +#include +#include + +#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE) +static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, pgprot_t pgprot) +{ + int err; + +#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ + (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) + if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + err = vm_insert_pfn(vma, addr, pfn); +#else + err = vm_insert_pfn_prot(vma, addr, pfn, pgprot); +#endif + + if (unlikely(err == -ENOMEM)) + return VM_FAULT_OOM; + if (unlikely(err < 0 && err != -EBUSY)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} +#endif + +#define IMPORTED_MEMORY_ID (MEMORY_GROUP_MANAGER_NR_GROUPS - 1) + +/** + * struct mgm_group - Structure to keep track of the number of allocated + * pages per group + * + * @size: The number of allocated small(4KB) pages + * @lp_size: The number of allocated large(2MB) pages + * @insert_pfn: The number of calls to map pages for CPU access. + * @update_gpu_pte: The number of calls to update GPU page table entries. + * + * This structure allows page allocation information to be displayed via + * debugfs. Display is organized per group with small and large sized pages. + */ +struct mgm_group { + size_t size; + size_t lp_size; + size_t insert_pfn; + size_t update_gpu_pte; +}; + +/** + * struct mgm_groups - Structure for groups of memory group manager + * + * @groups: To keep track of the number of allocated pages of all groups + * @dev: device attached + * @mgm_debugfs_root: debugfs root directory of memory group manager + * + * This structure allows page allocation information to be displayed via + * debugfs. Display is organized per group with small and large sized pages. + */ +struct mgm_groups { + struct mgm_group groups[MEMORY_GROUP_MANAGER_NR_GROUPS]; + struct device *dev; +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *mgm_debugfs_root; +#endif +}; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +static int mgm_size_get(void *data, u64 *val) +{ + struct mgm_group *group = data; + + *val = group->size; + + return 0; +} + +static int mgm_lp_size_get(void *data, u64 *val) +{ + struct mgm_group *group = data; + + *val = group->lp_size; + + return 0; +} + +static int mgm_insert_pfn_get(void *data, u64 *val) +{ + struct mgm_group *group = data; + + *val = group->insert_pfn; + + return 0; +} + +static int mgm_update_gpu_pte_get(void *data, u64 *val) +{ + struct mgm_group *group = data; + + *val = group->update_gpu_pte; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_size, mgm_size_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_lp_size, mgm_lp_size_get, NULL, "%llu\n"); + +DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_insert_pfn, mgm_insert_pfn_get, NULL, + "%llu\n"); + +DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_update_gpu_pte, mgm_update_gpu_pte_get, NULL, + "%llu\n"); + +static void mgm_term_debugfs(struct mgm_groups *data) +{ + debugfs_remove_recursive(data->mgm_debugfs_root); +} + +#define MGM_DEBUGFS_GROUP_NAME_MAX 10 +static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) +{ + int i; + struct dentry *e, *g; + char debugfs_group_name[MGM_DEBUGFS_GROUP_NAME_MAX]; + + /* + * Create root directory of memory-group-manager + */ + mgm_data->mgm_debugfs_root = + debugfs_create_dir("physical-memory-group-manager", NULL); + if (IS_ERR(mgm_data->mgm_debugfs_root)) { + dev_err(mgm_data->dev, "fail to create debugfs root directory\n"); + return -ENODEV; + } + + /* + * Create debugfs files per group + */ + for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { + scnprintf(debugfs_group_name, MGM_DEBUGFS_GROUP_NAME_MAX, + "group_%d", i); + g = debugfs_create_dir(debugfs_group_name, + mgm_data->mgm_debugfs_root); + if (IS_ERR(g)) { + dev_err(mgm_data->dev, "fail to create group[%d]\n", i); + goto remove_debugfs; + } + + e = debugfs_create_file("size", 0444, g, &mgm_data->groups[i], + &fops_mgm_size); + if (IS_ERR(e)) { + dev_err(mgm_data->dev, "fail to create size[%d]\n", i); + goto remove_debugfs; + } + + e = debugfs_create_file("lp_size", 0444, g, + &mgm_data->groups[i], &fops_mgm_lp_size); + if (IS_ERR(e)) { + dev_err(mgm_data->dev, + "fail to create lp_size[%d]\n", i); + goto remove_debugfs; + } + + e = debugfs_create_file("insert_pfn", 0444, g, + &mgm_data->groups[i], &fops_mgm_insert_pfn); + if (IS_ERR(e)) { + dev_err(mgm_data->dev, + "fail to create insert_pfn[%d]\n", i); + goto remove_debugfs; + } + + e = debugfs_create_file("update_gpu_pte", 0444, g, + &mgm_data->groups[i], &fops_mgm_update_gpu_pte); + if (IS_ERR(e)) { + dev_err(mgm_data->dev, + "fail to create update_gpu_pte[%d]\n", i); + goto remove_debugfs; + } + } + + return 0; + +remove_debugfs: + mgm_term_debugfs(mgm_data); + return -ENODEV; +} + +#else + +static void mgm_term_debugfs(struct mgm_groups *data) +{ +} + +static int mgm_initialize_debugfs(struct mgm_groups *mgm_data) +{ + return 0; +} + +#endif /* CONFIG_DEBUG_FS */ + +#define ORDER_SMALL_PAGE 0 +#define ORDER_LARGE_PAGE 9 +static void update_size(struct memory_group_manager_device *mgm_dev, int + group_id, int order, bool alloc) +{ + struct mgm_groups *data = mgm_dev->data; + + switch (order) { + case ORDER_SMALL_PAGE: + if (alloc) + data->groups[group_id].size++; + else { + WARN_ON(data->groups[group_id].size == 0); + data->groups[group_id].size--; + } + break; + + case ORDER_LARGE_PAGE: + if (alloc) + data->groups[group_id].lp_size++; + else { + WARN_ON(data->groups[group_id].lp_size == 0); + data->groups[group_id].lp_size--; + } + break; + + default: + dev_err(data->dev, "Unknown order(%d)\n", order); + break; + } +} + +static struct page *example_mgm_alloc_page( + struct memory_group_manager_device *mgm_dev, int group_id, + gfp_t gfp_mask, unsigned int order) +{ + struct mgm_groups *const data = mgm_dev->data; + struct page *p; + + dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d gfp_mask=0x%x order=%u\n", + __func__, (void *)mgm_dev, group_id, gfp_mask, order); + + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return NULL; + + p = alloc_pages(gfp_mask, order); + + if (p) { + update_size(mgm_dev, group_id, order, true); + } else { + struct mgm_groups *data = mgm_dev->data; + + dev_err(data->dev, "alloc_pages failed\n"); + } + + return p; +} + +static void example_mgm_free_page( + struct memory_group_manager_device *mgm_dev, int group_id, + struct page *page, unsigned int order) +{ + struct mgm_groups *const data = mgm_dev->data; + + dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%d page=%p order=%u\n", + __func__, (void *)mgm_dev, group_id, (void *)page, order); + + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return; + + __free_pages(page, order); + + update_size(mgm_dev, group_id, order, false); +} + +static int example_mgm_get_import_memory_id( + struct memory_group_manager_device *mgm_dev, + struct memory_group_manager_import_data *import_data) +{ + struct mgm_groups *const data = mgm_dev->data; + + dev_dbg(data->dev, "%s(mgm_dev=%p, import_data=%p (type=%d)\n", + __func__, (void *)mgm_dev, (void *)import_data, + (int)import_data->type); + + if (!WARN_ON(!import_data)) { + WARN_ON(!import_data->u.dma_buf); + + WARN_ON(import_data->type != + MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF); + } + + return IMPORTED_MEMORY_ID; +} + +static u64 example_mgm_update_gpu_pte( + struct memory_group_manager_device *const mgm_dev, int const group_id, + int const mmu_level, u64 pte) +{ + struct mgm_groups *const data = mgm_dev->data; + const u32 pbha_bit_pos = 59; /* bits 62:59 */ + const u32 pbha_bit_mask = 0xf; /* 4-bit */ + + dev_dbg(data->dev, + "%s(mgm_dev=%p, group_id=%d, mmu_level=%d, pte=0x%llx)\n", + __func__, (void *)mgm_dev, group_id, mmu_level, pte); + + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return pte; + + pte |= ((u64)group_id & pbha_bit_mask) << pbha_bit_pos; + + data->groups[group_id].update_gpu_pte++; + + return pte; +} + +static vm_fault_t example_mgm_vmf_insert_pfn_prot( + struct memory_group_manager_device *const mgm_dev, int const group_id, + struct vm_area_struct *const vma, unsigned long const addr, + unsigned long const pfn, pgprot_t const prot) +{ + struct mgm_groups *const data = mgm_dev->data; + vm_fault_t fault; + + dev_dbg(data->dev, + "%s(mgm_dev=%p, group_id=%d, vma=%p, addr=0x%lx, pfn=0x%lx, prot=0x%llx)\n", + __func__, (void *)mgm_dev, group_id, (void *)vma, addr, pfn, + (unsigned long long int) pgprot_val(prot)); + + if (WARN_ON(group_id < 0) || + WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) + return VM_FAULT_SIGBUS; + + fault = vmf_insert_pfn_prot(vma, addr, pfn, prot); + + if (fault == VM_FAULT_NOPAGE) + data->groups[group_id].insert_pfn++; + else + dev_err(data->dev, "vmf_insert_pfn_prot failed\n"); + + return fault; +} + +static int mgm_initialize_data(struct mgm_groups *mgm_data) +{ + int i; + + for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { + mgm_data->groups[i].size = 0; + mgm_data->groups[i].lp_size = 0; + mgm_data->groups[i].insert_pfn = 0; + mgm_data->groups[i].update_gpu_pte = 0; + } + + return mgm_initialize_debugfs(mgm_data); +} + +static void mgm_term_data(struct mgm_groups *data) +{ + int i; + + for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { + if (data->groups[i].size != 0) + dev_warn(data->dev, + "%zu 0-order pages in group(%d) leaked\n", + data->groups[i].size, i); + if (data->groups[i].lp_size != 0) + dev_warn(data->dev, + "%zu 9 order pages in group(%d) leaked\n", + data->groups[i].lp_size, i); + } + + mgm_term_debugfs(data); +} + +static int memory_group_manager_probe(struct platform_device *pdev) +{ + struct memory_group_manager_device *mgm_dev; + struct mgm_groups *mgm_data; + + mgm_dev = kzalloc(sizeof(*mgm_dev), GFP_KERNEL); + if (!mgm_dev) + return -ENOMEM; + + mgm_dev->owner = THIS_MODULE; + mgm_dev->ops.mgm_alloc_page = example_mgm_alloc_page; + mgm_dev->ops.mgm_free_page = example_mgm_free_page; + mgm_dev->ops.mgm_get_import_memory_id = + example_mgm_get_import_memory_id; + mgm_dev->ops.mgm_vmf_insert_pfn_prot = example_mgm_vmf_insert_pfn_prot; + mgm_dev->ops.mgm_update_gpu_pte = example_mgm_update_gpu_pte; + + mgm_data = kzalloc(sizeof(*mgm_data), GFP_KERNEL); + if (!mgm_data) { + kfree(mgm_dev); + return -ENOMEM; + } + + mgm_dev->data = mgm_data; + mgm_data->dev = &pdev->dev; + + if (mgm_initialize_data(mgm_data)) { + kfree(mgm_data); + kfree(mgm_dev); + return -ENOENT; + } + + platform_set_drvdata(pdev, mgm_dev); + dev_info(&pdev->dev, "Memory group manager probed successfully\n"); + + return 0; +} + +static int memory_group_manager_remove(struct platform_device *pdev) +{ + struct memory_group_manager_device *mgm_dev = + platform_get_drvdata(pdev); + struct mgm_groups *mgm_data = mgm_dev->data; + + mgm_term_data(mgm_data); + kfree(mgm_data); + + kfree(mgm_dev); + + dev_info(&pdev->dev, "Memory group manager removed successfully\n"); + + return 0; +} + +static const struct of_device_id memory_group_manager_dt_ids[] = { + { .compatible = "arm,physical-memory-group-manager" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, memory_group_manager_dt_ids); + +static struct platform_driver memory_group_manager_driver = { + .probe = memory_group_manager_probe, + .remove = memory_group_manager_remove, + .driver = { + .name = "physical-memory-group-manager", + .of_match_table = of_match_ptr(memory_group_manager_dt_ids), + /* + * Prevent the mgm_dev from being unbound and freed, as other's + * may have pointers to it and would get confused, or crash, if + * it suddenly disappear. + */ + .suppress_bind_attrs = true, + } +}; + +module_platform_driver(memory_group_manager_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ARM Ltd."); +MODULE_VERSION("1.0"); diff --git a/dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild new file mode 100644 index 0000000..25295a9 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/Kbuild @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +ifeq ($(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR), y) +obj-m := protected_memory_allocator.o +endif diff --git a/dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp new file mode 100644 index 0000000..4c56154 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/build.bp @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_kernel_module { + name: "protected_memory_allocator", + defaults: [ + "kernel_defaults" + ], + srcs: [ + "Kbuild", + "protected_memory_allocator.c", + ], + enabled: false, + mali_protected_memory_allocator: { + kbuild_options: ["CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR=y"], + enabled: true, + }, +} diff --git a/dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c new file mode 100644 index 0000000..6684210 --- /dev/null +++ b/dvalin/kernel/drivers/base/arm/protected_memory_allocator/protected_memory_allocator.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Size of a bitfield element in bytes */ +#define BITFIELD_ELEM_SIZE sizeof(u64) + +/* We can track whether or not 64 pages are currently allocated in a u64 */ +#define PAGES_PER_BITFIELD_ELEM (BITFIELD_ELEM_SIZE * BITS_PER_BYTE) + +/* Order 6 (ie, 64) corresponds to the number of pages held in a bitfield */ +#define ORDER_OF_PAGES_PER_BITFIELD_ELEM 6 + +/** + * struct simple_pma_device - Simple implementation of a protected memory + * allocator device + * + * @pma_dev: Protected memory allocator device pointer + * @dev: Device pointer + * @alloc_pages_bitfield_arr: Status of all the physical memory pages within the + * protected memory region, one bit per page + * @rmem_base: Base address of the reserved memory region + * @rmem_size: Size of the reserved memory region, in pages + * @num_free_pages: Number of free pages in the memory region + * @rmem_lock: Lock to serialize the allocation and freeing of + * physical pages from the protected memory region + */ +struct simple_pma_device { + struct protected_memory_allocator_device pma_dev; + struct device *dev; + u64 *allocated_pages_bitfield_arr; + phys_addr_t rmem_base; + size_t rmem_size; + size_t num_free_pages; + spinlock_t rmem_lock; +}; + +/** + * Number of elements in array 'allocated_pages_bitfield_arr'. If the number of + * pages required does not divide exactly by PAGES_PER_BITFIELD_ELEM, adds an + * extra page for the remainder. + */ +#define ALLOC_PAGES_BITFIELD_ARR_SIZE(num_pages) \ + ((PAGES_PER_BITFIELD_ELEM * (0 != (num_pages % PAGES_PER_BITFIELD_ELEM)) + \ + num_pages) / PAGES_PER_BITFIELD_ELEM) + +/** + * Allocate a power-of-two number of pages, N, where + * 0 <= N <= ORDER_OF_PAGES_PER_BITFIELD_ELEM - 1. ie, Up to 32 pages. The routine + * fills-in a pma structure and sets the appropriate bits in the allocated-pages + * bitfield array but assumes the caller has already determined that these are + * already clear. + * + * This routine always works within only a single allocated-pages bitfield element. + * It can be thought of as the 'small-granularity' allocator. + */ +static void small_granularity_alloc(struct simple_pma_device *const epma_dev, + size_t alloc_bitfield_idx, size_t start_bit, + size_t order, + struct protected_memory_allocation *pma) +{ + size_t i; + size_t page_idx; + u64 *bitfield; + size_t alloc_pages_bitfield_size; + + if (WARN_ON(!epma_dev) || + WARN_ON(!pma)) + return; + + WARN(epma_dev->rmem_size == 0, "%s: rmem_size is 0", __func__); + alloc_pages_bitfield_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); + + WARN(alloc_bitfield_idx >= alloc_pages_bitfield_size, + "%s: idx>bf_size: %zu %zu", __FUNCTION__, + alloc_bitfield_idx, alloc_pages_bitfield_size); + + WARN((start_bit + (1 << order)) > PAGES_PER_BITFIELD_ELEM, + "%s: start=%zu order=%zu ppbe=%zu", + __FUNCTION__, start_bit, order, PAGES_PER_BITFIELD_ELEM); + + bitfield = &epma_dev->allocated_pages_bitfield_arr[alloc_bitfield_idx]; + + for (i = 0; i < (1 << order); i++) { + /* Check the pages represented by this bit are actually free */ + WARN (*bitfield & (1ULL << (start_bit + i)), + "in %s: page not free: %zu %zu %.16llx %zu\n", + __FUNCTION__, i, order, *bitfield, alloc_pages_bitfield_size); + + /* Mark the pages as now allocated */ + *bitfield |= (1ULL << (start_bit + i)); + } + + /* Compute the page index */ + page_idx = (alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM) + start_bit; + + /* Fill-in the allocation struct for the caller */ + pma->pa = epma_dev->rmem_base + (page_idx << PAGE_SHIFT); + pma->order = order; +} + +/** + * Allocate a power-of-two number of pages, N, where + * N >= ORDER_OF_PAGES_PER_BITFIELD_ELEM. ie, 64 pages or more. The routine fills-in + * a pma structure and sets the appropriate bits in the allocated-pages bitfield array + * but assumes the caller has already determined that these are already clear. + * + * Unlike small_granularity_alloc, this routine can work with multiple 64-page groups, + * ie multiple elements from the allocated-pages bitfield array. However, it always + * works with complete sets of these 64-page groups. It can therefore be thought of + * as the 'large-granularity' allocator. + */ +static void large_granularity_alloc(struct simple_pma_device *const epma_dev, + size_t start_alloc_bitfield_idx, + size_t order, + struct protected_memory_allocation *pma) +{ + size_t i; + size_t num_pages_to_alloc = (size_t)1 << order; + size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM; + size_t start_page_idx = start_alloc_bitfield_idx * PAGES_PER_BITFIELD_ELEM; + + if (WARN_ON(!epma_dev) || + WARN_ON(!pma)) + return; + + /* + * Are there anough bitfield array elements (groups of 64 pages) + * between the start element and the end of the bitfield array + * to fulfill the request? + */ + WARN((start_alloc_bitfield_idx + order) >= ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size), + "%s: start=%zu order=%zu ms=%zu", + __FUNCTION__, start_alloc_bitfield_idx, order, epma_dev->rmem_size); + + for (i = 0; i < num_bitfield_elements_needed; i++) { + u64 *bitfield = &epma_dev->allocated_pages_bitfield_arr[start_alloc_bitfield_idx + i]; + + /* We expect all pages that relate to this bitfield element to be free */ + WARN((*bitfield != 0), + "in %s: pages not free: i=%zu o=%zu bf=%.16llx\n", + __FUNCTION__, i, order, *bitfield); + + /* Mark all the pages for this element as not free */ + *bitfield = ~0ULL; + } + + /* Fill-in the allocation struct for the caller */ + pma->pa = epma_dev->rmem_base + (start_page_idx << PAGE_SHIFT); + pma->order = order; +} + +static struct protected_memory_allocation *simple_pma_alloc_page( + struct protected_memory_allocator_device *pma_dev, unsigned int order) +{ + struct simple_pma_device *const epma_dev = + container_of(pma_dev, struct simple_pma_device, pma_dev); + struct protected_memory_allocation *pma; + size_t num_pages_to_alloc; + + u64 *bitfields = epma_dev->allocated_pages_bitfield_arr; + size_t i; + size_t bit; + size_t count; + + dev_dbg(epma_dev->dev, "%s(pma_dev=%px, order=%u\n", + __func__, (void *)pma_dev, order); + + /* This is an example function that follows an extremely simple logic + * and is very likely to fail to allocate memory if put under stress. + * + * The simple_pma_device maintains an array of u64s, with one bit used + * to track the status of each page. + * + * In order to create a memory allocation, the allocator looks for an + * adjacent group of cleared bits. This does leave the algorithm open + * to fragmentation issues, but is deemed sufficient for now. + * If successful, the allocator shall mark all the pages as allocated + * and increment the offset accordingly. + * + * Allocations of 64 pages or more (order 6) can be allocated only with + * 64-page alignment, in order to keep the algorithm as simple as + * possible. ie, starting from bit 0 of any 64-bit page-allocation + * bitfield. For this, the large-granularity allocator is utilised. + * + * Allocations of lower-order can only be allocated entirely within the + * same group of 64 pages, with the small-ganularity allocator (ie + * always from the same 64-bit page-allocation bitfield) - again, to + * keep things as simple as possible, but flexible to meet + * current needs. + */ + + num_pages_to_alloc = (size_t)1 << order; + + pma = devm_kzalloc(epma_dev->dev, sizeof(*pma), GFP_KERNEL); + if (!pma) { + dev_err(epma_dev->dev, "Failed to alloc pma struct"); + return NULL; + } + + spin_lock(&epma_dev->rmem_lock); + + if (epma_dev->num_free_pages < num_pages_to_alloc) { + dev_err(epma_dev->dev, "not enough free pages\n"); + devm_kfree(epma_dev->dev, pma); + spin_unlock(&epma_dev->rmem_lock); + return NULL; + } + + /* + * For order 0-5 (ie, 1 to 32 pages) we always allocate within the same set of 64 pages + * Currently, most allocations will be very small (1 page), so the more likely path + * here is order < ORDER_OF_PAGES_PER_BITFIELD_ELEM. + */ + if (likely(order < ORDER_OF_PAGES_PER_BITFIELD_ELEM)) { + size_t alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); + + for (i = 0; i < alloc_pages_bitmap_size; i++) { + count = 0; + + for (bit = 0; bit < PAGES_PER_BITFIELD_ELEM; bit++) { + if (0 == (bitfields[i] & (1ULL << bit))) { + if ((count + 1) >= num_pages_to_alloc) { + /* + * We've found enough free, consecutive pages with which to + * make an allocation + */ + small_granularity_alloc( + epma_dev, i, + bit - count, order, + pma); + + epma_dev->num_free_pages -= + num_pages_to_alloc; + + spin_unlock( + &epma_dev->rmem_lock); + return pma; + } + + /* So far so good, but we need more set bits yet */ + count++; + } else { + /* + * We found an allocated page, so nothing we've seen so far can be used. + * Keep looking. + */ + count = 0; + } + } + } + } else { + /** + * For allocations of order ORDER_OF_PAGES_PER_BITFIELD_ELEM and above (>= 64 pages), we know + * we'll only get allocations for whole groups of 64 pages, which hugely simplifies the task. + */ + size_t alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); + + /* How many 64-bit bitfield elements will be needed for the allocation? */ + size_t num_bitfield_elements_needed = num_pages_to_alloc / PAGES_PER_BITFIELD_ELEM; + + count = 0; + + for (i = 0; i < alloc_pages_bitmap_size; i++) { + /* Are all the pages free for the i'th u64 bitfield element? */ + if (bitfields[i] == 0) { + count += PAGES_PER_BITFIELD_ELEM; + + if (count >= (1 << order)) { + size_t start_idx = (i + 1) - num_bitfield_elements_needed; + + large_granularity_alloc(epma_dev, + start_idx, + order, pma); + + epma_dev->num_free_pages -= 1 << order; + spin_unlock(&epma_dev->rmem_lock); + return pma; + } + } + else + { + count = 0; + } + } + } + + spin_unlock(&epma_dev->rmem_lock); + devm_kfree(epma_dev->dev, pma); + + dev_err(epma_dev->dev, "not enough contiguous pages (need %zu), total free pages left %zu\n", + num_pages_to_alloc, epma_dev->num_free_pages); + return NULL; +} + +static phys_addr_t simple_pma_get_phys_addr( + struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma) +{ + struct simple_pma_device *const epma_dev = + container_of(pma_dev, struct simple_pma_device, pma_dev); + + dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", + __func__, (void *)pma_dev, (void *)pma, + (unsigned long long)pma->pa); + + return pma->pa; +} + +static void simple_pma_free_page( + struct protected_memory_allocator_device *pma_dev, + struct protected_memory_allocation *pma) +{ + struct simple_pma_device *const epma_dev = + container_of(pma_dev, struct simple_pma_device, pma_dev); + size_t num_pages_in_allocation; + size_t offset; + size_t i; + size_t bitfield_idx; + size_t bitfield_start_bit; + size_t page_num; + u64 *bitfield; + size_t alloc_pages_bitmap_size; + size_t num_bitfield_elems_used_by_alloc; + + WARN_ON(pma == NULL); + + dev_dbg(epma_dev->dev, "%s(pma_dev=%px, pma=%px, pa=%llx\n", + __func__, (void *)pma_dev, (void *)pma, + (unsigned long long)pma->pa); + + WARN_ON(pma->pa < epma_dev->rmem_base); + + /* This is an example function that follows an extremely simple logic + * and is vulnerable to abuse. + */ + offset = (pma->pa - epma_dev->rmem_base); + num_pages_in_allocation = (size_t)1 << pma->order; + + /* The number of bitfield elements used by the allocation */ + num_bitfield_elems_used_by_alloc = num_pages_in_allocation / PAGES_PER_BITFIELD_ELEM; + + /* The page number of the first page of the allocation, relative to rmem_base */ + page_num = offset >> PAGE_SHIFT; + + /* Which u64 bitfield refers to this page? */ + bitfield_idx = page_num / PAGES_PER_BITFIELD_ELEM; + + alloc_pages_bitmap_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); + + /* Is the allocation within expected bounds? */ + WARN_ON((bitfield_idx + num_bitfield_elems_used_by_alloc) >= alloc_pages_bitmap_size); + + spin_lock(&epma_dev->rmem_lock); + + if (pma->order < ORDER_OF_PAGES_PER_BITFIELD_ELEM) { + bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx]; + + /* Which bit within that u64 bitfield is the lsb covering this allocation? */ + bitfield_start_bit = page_num % PAGES_PER_BITFIELD_ELEM; + + /* Clear the bits for the pages we're now freeing */ + *bitfield &= ~(((1ULL << num_pages_in_allocation) - 1) << bitfield_start_bit); + } + else { + WARN(page_num % PAGES_PER_BITFIELD_ELEM, + "%s: Expecting allocs of order >= %d to be %zu-page aligned\n", + __FUNCTION__, ORDER_OF_PAGES_PER_BITFIELD_ELEM, PAGES_PER_BITFIELD_ELEM); + + for (i = 0; i < num_bitfield_elems_used_by_alloc; i++) { + bitfield = &epma_dev->allocated_pages_bitfield_arr[bitfield_idx + i]; + + /* We expect all bits to be set (all pages allocated) */ + WARN((*bitfield != ~0), + "%s: alloc being freed is not fully allocated: of=%zu np=%zu bf=%.16llx\n", + __FUNCTION__, offset, num_pages_in_allocation, *bitfield); + + /* + * Now clear all the bits in the bitfield element to mark all the pages + * it refers to as free. + */ + *bitfield = 0ULL; + } + } + + epma_dev->num_free_pages += num_pages_in_allocation; + spin_unlock(&epma_dev->rmem_lock); + devm_kfree(epma_dev->dev, pma); +} + +static int protected_memory_allocator_probe(struct platform_device *pdev) +{ + struct simple_pma_device *epma_dev; + struct device_node *np; + phys_addr_t rmem_base; + size_t rmem_size; + size_t alloc_bitmap_pages_arr_size; +#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE) + struct reserved_mem *rmem; +#endif + + np = pdev->dev.of_node; + + if (!np) { + dev_err(&pdev->dev, "device node pointer not set\n"); + return -ENODEV; + } + + np = of_parse_phandle(np, "memory-region", 0); + if (!np) { + dev_err(&pdev->dev, "memory-region node not set\n"); + return -ENODEV; + } + +#if (KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE) + rmem = of_reserved_mem_lookup(np); + if (rmem) { + rmem_base = rmem->base; + rmem_size = rmem->size >> PAGE_SHIFT; + } else +#endif + { + of_node_put(np); + dev_err(&pdev->dev, "could not read reserved memory-region\n"); + return -ENODEV; + } + + of_node_put(np); + epma_dev = devm_kzalloc(&pdev->dev, sizeof(*epma_dev), GFP_KERNEL); + if (!epma_dev) + return -ENOMEM; + + epma_dev->pma_dev.ops.pma_alloc_page = simple_pma_alloc_page; + epma_dev->pma_dev.ops.pma_get_phys_addr = simple_pma_get_phys_addr; + epma_dev->pma_dev.ops.pma_free_page = simple_pma_free_page; + epma_dev->pma_dev.owner = THIS_MODULE; + epma_dev->dev = &pdev->dev; + epma_dev->rmem_base = rmem_base; + epma_dev->rmem_size = rmem_size; + epma_dev->num_free_pages = rmem_size; + spin_lock_init(&epma_dev->rmem_lock); + + alloc_bitmap_pages_arr_size = ALLOC_PAGES_BITFIELD_ARR_SIZE(epma_dev->rmem_size); + + epma_dev->allocated_pages_bitfield_arr = devm_kzalloc(&pdev->dev, + alloc_bitmap_pages_arr_size * BITFIELD_ELEM_SIZE, GFP_KERNEL); + + if (!epma_dev->allocated_pages_bitfield_arr) { + dev_err(&pdev->dev, "failed to allocate resources\n"); + devm_kfree(&pdev->dev, epma_dev); + return -ENOMEM; + } + + if (epma_dev->rmem_size % PAGES_PER_BITFIELD_ELEM) { + size_t extra_pages = + alloc_bitmap_pages_arr_size * PAGES_PER_BITFIELD_ELEM - + epma_dev->rmem_size; + size_t last_bitfield_index = alloc_bitmap_pages_arr_size - 1; + + /* Mark the extra pages (that lie outside the reserved range) as + * always in use. + */ + epma_dev->allocated_pages_bitfield_arr[last_bitfield_index] = + ((1ULL << extra_pages) - 1) << + (PAGES_PER_BITFIELD_ELEM - extra_pages); + } + + platform_set_drvdata(pdev, &epma_dev->pma_dev); + dev_info(&pdev->dev, + "Protected memory allocator probed successfully\n"); + dev_info(&pdev->dev, "Protected memory region: base=%llx num pages=%zu\n", + (unsigned long long)rmem_base, rmem_size); + + return 0; +} + +static int protected_memory_allocator_remove(struct platform_device *pdev) +{ + struct protected_memory_allocator_device *pma_dev = + platform_get_drvdata(pdev); + struct simple_pma_device *epma_dev; + struct device *dev; + + if (!pma_dev) + return -EINVAL; + + epma_dev = container_of(pma_dev, struct simple_pma_device, pma_dev); + dev = epma_dev->dev; + + if (epma_dev->num_free_pages < epma_dev->rmem_size) { + dev_warn(&pdev->dev, "Leaking %zu pages of protected memory\n", + epma_dev->rmem_size - epma_dev->num_free_pages); + } + + platform_set_drvdata(pdev, NULL); + devm_kfree(dev, epma_dev->allocated_pages_bitfield_arr); + devm_kfree(dev, epma_dev); + + dev_info(&pdev->dev, + "Protected memory allocator removed successfully\n"); + + return 0; +} + +static const struct of_device_id protected_memory_allocator_dt_ids[] = { + { .compatible = "arm,protected-memory-allocator" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, protected_memory_allocator_dt_ids); + +static struct platform_driver protected_memory_allocator_driver = { + .probe = protected_memory_allocator_probe, + .remove = protected_memory_allocator_remove, + .driver = { + .name = "simple_protected_memory_allocator", + .of_match_table = of_match_ptr(protected_memory_allocator_dt_ids), + } +}; + +module_platform_driver(protected_memory_allocator_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("ARM Ltd."); +MODULE_VERSION("1.0"); diff --git a/dvalin/kernel/drivers/gpu/arm/Kbuild b/dvalin/kernel/drivers/gpu/arm/Kbuild index 1a6fa3c..52ea5fb 100644 --- a/dvalin/kernel/drivers/gpu/arm/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,9 +16,6 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - obj-$(CONFIG_MALI_MIDGARD) += midgard/ diff --git a/dvalin/kernel/drivers/gpu/arm/Kconfig b/dvalin/kernel/drivers/gpu/arm/Kconfig index 693b86f..2da8c98 100644 --- a/dvalin/kernel/drivers/gpu/arm/Kconfig +++ b/dvalin/kernel/drivers/gpu/arm/Kconfig @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012, 2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,10 +16,7 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - menu "ARM GPU Configuration" source "drivers/gpu/arm/midgard/Kconfig" diff --git a/dvalin/kernel/drivers/gpu/arm/Makefile b/dvalin/kernel/drivers/gpu/arm/Makefile new file mode 100644 index 0000000..ea9ecc7 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +include midgard/Makefile diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild index fa52548..d3c4ee1 100755 --- a/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,202 +16,240 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # + +# make $(src) as absolute path if it is not already, by prefixing $(srctree) +# This is to prevent any build issue due to wrong path. +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) + # +# Prevent misuse when Kernel configurations are not present by default +# in out-of-tree builds +# +ifneq ($(CONFIG_ANDROID),n) +ifeq ($(CONFIG_GPU_TRACEPOINTS),n) + $(error CONFIG_GPU_TRACEPOINTS must be set in Kernel configuration) +endif +endif +ifeq ($(CONFIG_DMA_SHARED_BUFFER),n) + $(error CONFIG_DMA_SHARED_BUFFER must be set in Kernel configuration) +endif -# Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r25p0-01rel0" +ifeq ($(CONFIG_PM_DEVFREQ),n) + $(error CONFIG_PM_DEVFREQ must be set in Kernel configuration) +endif -# Paths required for build -# make $(src) as absolute path if it isn't already, by prefixing $(srctree) -src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) -KBASE_PATH = $(src) -KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy -UMP_PATH = $(src)/../../../base +ifeq ($(CONFIG_DEVFREQ_THERMAL),n) + $(error CONFIG_DEVFREQ_THERMAL must be set in Kernel configuration) +endif + +ifeq ($(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND),n) + $(error CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND must be set in Kernel configuration) +endif + +ifeq ($(CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS), y) + ifneq ($(CONFIG_DEBUG_FS), y) + $(error CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS depends on CONFIG_DEBUG_FS to be set in Kernel configuration) + endif +endif + +ifeq ($(CONFIG_MALI_FENCE_DEBUG), y) + ifneq ($(CONFIG_SYNC), y) + ifneq ($(CONFIG_SYNC_FILE), y) + $(error CONFIG_MALI_FENCE_DEBUG depends on CONFIG_SYNC || CONFIG_SYNC_FILE to be set in Kernel configuration) + endif + endif +endif +# +# Configurations +# + +# Driver version string which is returned to userspace via an ioctl +MALI_RELEASE_NAME ?= '"r32p1-01bet0"' # Set up defaults if not defined by build system -MALI_CUSTOMER_RELEASE ?= 1 -MALI_USE_CSF ?= 0 -MALI_UNIT_TEST ?= 0 -MALI_KERNEL_TEST_API ?= 0 +ifeq ($(CONFIG_MALI_DEBUG), y) + MALI_UNIT_TEST = 1 + MALI_CUSTOMER_RELEASE ?= 0 +else + MALI_UNIT_TEST ?= 0 + MALI_CUSTOMER_RELEASE ?= 1 +endif MALI_COVERAGE ?= 0 + CONFIG_MALI_PLATFORM_NAME ?= "devicetree" + +# Kconfig passes in the name with quotes for in-tree builds - remove them. +MALI_PLATFORM_DIR := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + MALI_JIT_PRESSURE_LIMIT_BASE = 0 + MALI_USE_CSF = 1 +else + MALI_JIT_PRESSURE_LIMIT_BASE ?= 1 + MALI_USE_CSF ?= 0 +endif + +ifneq ($(CONFIG_MALI_KUTF), n) + MALI_KERNEL_TEST_API ?= 1 +else + MALI_KERNEL_TEST_API ?= 0 +endif + # Experimental features (corresponding -D definition should be appended to -# DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE, +# ccflags-y below, e.g. for MALI_EXPERIMENTAL_FEATURE, # -DMALI_EXPERIMENTAL_FEATURE=$(MALI_EXPERIMENTAL_FEATURE) should be appended) # # Experimental features must default to disabled, e.g.: # MALI_EXPERIMENTAL_FEATURE ?= 0 -MALI_JIT_PRESSURE_LIMIT ?= 0 MALI_INCREMENTAL_RENDERING ?= 0 -# Set up our defines, which will be passed to gcc -DEFINES = \ - -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ - -DMALI_USE_CSF=$(MALI_USE_CSF) \ - -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ - -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ - -DMALI_COVERAGE=$(MALI_COVERAGE) \ - -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ - -DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \ - -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) +# +# ccflags +# +ccflags-y = \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_USE_CSF=$(MALI_USE_CSF) \ + -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_COVERAGE=$(MALI_COVERAGE) \ + -DMALI_RELEASE_NAME=$(MALI_RELEASE_NAME) \ + -DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \ + -DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING) \ + -DMALI_KBASE_BUILD \ + -DMALI_PLATFORM_DIR=$(MALI_PLATFORM_DIR) + ifeq ($(KBUILD_EXTMOD),) # in-tree -DEFINES +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) + ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) else # out-of-tree -DEFINES +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) -endif - -DEFINES += -I$(srctree)/drivers/staging/android -#meson graphics start -ldflags-y += --strip-debug -#meson graphics end - -DEFINES += -DMALI_KBASE_BUILD - -# Use our defines when compiling -ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux -subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux - -SRC := \ - context/mali_kbase_context.c \ - debug/mali_kbase_debug_ktrace.c \ - device/mali_kbase_device.c \ - mali_kbase_cache_policy.c \ - mali_kbase_mem.c \ - mali_kbase_mem_pool_group.c \ - mali_kbase_native_mgm.c \ - mali_kbase_ctx_sched.c \ - mali_kbase_jm.c \ - mali_kbase_gpuprops.c \ - mali_kbase_pm.c \ - mali_kbase_config.c \ - mali_kbase_vinstr.c \ - mali_kbase_hwcnt.c \ - mali_kbase_hwcnt_backend_gpu.c \ - mali_kbase_hwcnt_gpu.c \ - mali_kbase_hwcnt_legacy.c \ - mali_kbase_hwcnt_types.c \ - mali_kbase_hwcnt_virtualizer.c \ - mali_kbase_softjobs.c \ - mali_kbase_hw.c \ - mali_kbase_debug.c \ - mali_kbase_gpu_memory_debugfs.c \ - mali_kbase_mem_linux.c \ - mali_kbase_core_linux.c \ - mali_kbase_mem_profile_debugfs.c \ - mmu/mali_kbase_mmu.c \ - mmu/mali_kbase_mmu_hw_direct.c \ - mmu/mali_kbase_mmu_mode_lpae.c \ - mmu/mali_kbase_mmu_mode_aarch64.c \ - mali_kbase_disjoint_events.c \ - mali_kbase_debug_mem_view.c \ - mali_kbase_smc.c \ - mali_kbase_mem_pool.c \ - mali_kbase_mem_pool_debugfs.c \ - mali_kbase_debugfs_helper.c \ - mali_kbase_strings.c \ - mali_kbase_as_fault_debugfs.c \ - mali_kbase_regs_history_debugfs.c \ - thirdparty/mali_kbase_mmap.c \ - tl/mali_kbase_timeline.c \ - tl/mali_kbase_timeline_io.c \ - tl/mali_kbase_tlstream.c \ - tl/mali_kbase_tracepoints.c \ - gpu/mali_kbase_gpu.c - -ifeq ($(MALI_USE_CSF),1) - SRC += \ - debug/backend/mali_kbase_debug_ktrace_csf.c \ - device/backend/mali_kbase_device_csf.c \ - gpu/backend/mali_kbase_gpu_fault_csf.c \ - tl/backend/mali_kbase_timeline_csf.c \ - mmu/backend/mali_kbase_mmu_csf.c \ - context/backend/mali_kbase_context_csf.c -else - SRC += \ - mali_kbase_dummy_job_wa.c \ - mali_kbase_debug_job_fault.c \ - mali_kbase_event.c \ - mali_kbase_jd.c \ - mali_kbase_jd_debugfs.c \ - mali_kbase_js.c \ - mali_kbase_js_ctx_attr.c \ - debug/backend/mali_kbase_debug_ktrace_jm.c \ - device/backend/mali_kbase_device_jm.c \ - gpu/backend/mali_kbase_gpu_fault_jm.c \ - tl/backend/mali_kbase_timeline_jm.c \ - mmu/backend/mali_kbase_mmu_jm.c \ - context/backend/mali_kbase_context_jm.c + ccflags-y +=-DMALI_KBASE_PLATFORM_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_NAME) endif -ifeq ($(CONFIG_MALI_CINSTR_GWT),y) - SRC += mali_kbase_gwt.c -endif +ccflags-y += \ + -I$(srctree)/include/linux \ + -I$(srctree)/drivers/staging/android \ + -I$(src) \ + -I$(src)/platform/$(MALI_PLATFORM_DIR) \ + -I$(src)/../../../base \ + -I$(src)/../../../../include -ifeq ($(MALI_UNIT_TEST),1) - SRC += tl/mali_kbase_timeline_test.c -endif +subdir-ccflags-y += $(ccflags-y) -ifeq ($(MALI_CUSTOMER_RELEASE),0) - SRC += mali_kbase_regs_dump_debugfs.c -endif +# +# Kernel Modules +# +obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o +obj-$(CONFIG_MALI_ARBITRATION) += arbitration/ +obj-$(CONFIG_MALI_KUTF) += tests/ + +mali_kbase-y := \ + mali_kbase_cache_policy.o \ + mali_kbase_ccswe.o \ + mali_kbase_mem.o \ + mali_kbase_mem_pool_group.o \ + mali_kbase_native_mgm.o \ + mali_kbase_ctx_sched.o \ + mali_kbase_gpuprops.o \ + mali_kbase_pm.o \ + mali_kbase_config.o \ + mali_kbase_vinstr.o \ + mali_kbase_hwcnt.o \ + mali_kbase_hwcnt_gpu.o \ + mali_kbase_hwcnt_legacy.o \ + mali_kbase_hwcnt_types.o \ + mali_kbase_hwcnt_virtualizer.o \ + mali_kbase_softjobs.o \ + mali_kbase_hw.o \ + mali_kbase_debug.o \ + mali_kbase_gpu_memory_debugfs.o \ + mali_kbase_mem_linux.o \ + mali_kbase_core_linux.o \ + mali_kbase_mem_profile_debugfs.o \ + mali_kbase_disjoint_events.o \ + mali_kbase_debug_mem_view.o \ + mali_kbase_smc.o \ + mali_kbase_mem_pool.o \ + mali_kbase_mem_pool_debugfs.o \ + mali_kbase_debugfs_helper.o \ + mali_kbase_strings.o \ + mali_kbase_as_fault_debugfs.o \ + mali_kbase_regs_history_debugfs.o \ + mali_kbase_dvfs_debugfs.o \ + mali_power_gpu_frequency_trace.o \ + mali_kbase_trace_gpu_mem.o + +mali_kbase-$(CONFIG_MALI_CINSTR_GWT) += mali_kbase_gwt.o +mali_kbase-$(CONFIG_SYNC) += \ + mali_kbase_sync_android.o \ + mali_kbase_sync_common.o -ccflags-y += -I$(KBASE_PATH) -I$(KBASE_PATH)/debug \ - -I$(KBASE_PATH)/debug/backend +mali_kbase-$(CONFIG_SYNC_FILE) += \ + mali_kbase_fence_ops.o \ + mali_kbase_sync_file.o \ + mali_kbase_sync_common.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += \ + mali_kbase_hwcnt_backend_csf.o \ + mali_kbase_hwcnt_backend_csf_if_fw.o +else + mali_kbase-y += \ + mali_kbase_jm.o \ + mali_kbase_hwcnt_backend_jm.o \ + mali_kbase_dummy_job_wa.o \ + mali_kbase_debug_job_fault.o \ + mali_kbase_event.o \ + mali_kbase_jd.o \ + mali_kbase_jd_debugfs.o \ + mali_kbase_js.o \ + mali_kbase_js_ctx_attr.o \ + mali_kbase_kinstr_jm.o + + mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ + mali_kbase_fence_ops.o \ + mali_kbase_dma_fence.o \ + mali_kbase_fence.o + + mali_kbase-$(CONFIG_SYNC_FILE) += \ + mali_kbase_fence_ops.o \ + mali_kbase_fence.o +endif -# Tell the Linux build system from which .o file to create the kernel module -obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o -# Tell the Linux build system to enable building of our .c files -mali_kbase-y := $(SRC:.c=.o) +INCLUDE_SUBDIR = \ + $(src)/context/Kbuild \ + $(src)/debug/Kbuild \ + $(src)/device/Kbuild \ + $(src)/backend/gpu/Kbuild \ + $(src)/mmu/Kbuild \ + $(src)/tl/Kbuild \ + $(src)/gpu/Kbuild \ + $(src)/thirdparty/Kbuild \ + $(src)/platform/$(MALI_PLATFORM_DIR)/Kbuild -# Kconfig passes in the name with quotes for in-tree builds - remove them. -platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_NAME)) -MALI_PLATFORM_DIR := platform/$(platform_name) -ccflags-y += -I$(src)/$(MALI_PLATFORM_DIR) -include $(src)/$(MALI_PLATFORM_DIR)/Kbuild +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + INCLUDE_SUBDIR += $(src)/csf/Kbuild +endif -ifeq ($(CONFIG_MALI_DEVFREQ),y) - ifeq ($(CONFIG_DEVFREQ_THERMAL),y) - include $(src)/ipa/Kbuild - endif +ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) + INCLUDE_SUBDIR += $(src)/arbiter/Kbuild endif -ifeq ($(MALI_USE_CSF),1) - include $(src)/csf/Kbuild -else -# empty +ifeq ($(CONFIG_MALI_DEVFREQ),y) + ifeq ($(CONFIG_DEVFREQ_THERMAL),y) + INCLUDE_SUBDIR += $(src)/ipa/Kbuild + endif endif -ifeq ($(CONFIG_MALI_ARBITER_SUPPORT),y) - include $(src)/arbiter/Kbuild +ifeq ($(KBUILD_EXTMOD),) +# in-tree + -include $(INCLUDE_SUBDIR) else -# empty +# out-of-tree + include $(INCLUDE_SUBDIR) endif - -mali_kbase-$(CONFIG_MALI_DMA_FENCE) += \ - mali_kbase_dma_fence.o \ - mali_kbase_fence.o -mali_kbase-$(CONFIG_SYNC) += \ - mali_kbase_sync_android.o \ - mali_kbase_sync_common.o -mali_kbase-$(CONFIG_SYNC_FILE) += \ - mali_kbase_sync_file.o \ - mali_kbase_sync_common.o \ - mali_kbase_fence.o - -include $(src)/backend/gpu/Kbuild -mali_kbase-y += $(BACKEND:.c=.o) - - -ccflags-y += -I$(src)/backend/gpu -subdir-ccflags-y += -I$(src)/backend/gpu - -# For kutf and mali_kutf_irq_latency_test -obj-$(CONFIG_MALI_KUTF) += tests/ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig index ca59dbb..5541383 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig +++ b/dvalin/kernel/drivers/gpu/arm/midgard/Kconfig @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,15 +16,14 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - menuconfig MALI_MIDGARD tristate "Mali Midgard series support" select GPU_TRACEPOINTS if ANDROID select DMA_SHARED_BUFFER + select PM_DEVFREQ + select DEVFREQ_THERMAL default n help Enable this option to build support for a ARM Mali Midgard GPU. @@ -31,13 +31,43 @@ menuconfig MALI_MIDGARD To compile this driver as a module, choose M here: this will generate a single module, called mali_kbase. -config MALI_GATOR_SUPPORT - bool "Enable Streamline tracing support" +if MALI_MIDGARD + +config MALI_PLATFORM_NAME depends on MALI_MIDGARD + string "Platform name" + default "devicetree" + help + Enter the name of the desired platform configuration directory to + include in the build. 'platform/$(MALI_PLATFORM_NAME)/Makefile' must + exist. + +config MALI_REAL_HW + depends on MALI_MIDGARD + def_bool !MALI_NO_MALI + +menu "Platform specific options" +source "drivers/gpu/arm/midgard/platform/Kconfig" +endmenu + +config MALI_CSF_SUPPORT + bool "Enable Mali CSF based GPU support" + depends on MALI_MIDGARD=m + default n + help + Enables support for CSF based GPUs. + +config MALI_DEVFREQ + bool "Enable devfreq support for Mali" + depends on MALI_MIDGARD && PM_DEVFREQ + select DEVFREQ_GOV_SIMPLE_ONDEMAND default y help - Enables kbase tracing used by the Arm Streamline Performance Analyzer. - The tracepoints are used to derive GPU activity charts in Streamline. + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simple on-demand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -46,28 +76,25 @@ config MALI_MIDGARD_DVFS help Choose this option to enable legacy DVFS in the Mali Midgard DDK. +config MALI_GATOR_SUPPORT + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD + default y + help + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. + config MALI_MIDGARD_ENABLE_TRACE bool "Enable kbase tracing" depends on MALI_MIDGARD default y if MALI_DEBUG default n help - Enables tracing in kbase. Trace log available through + Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_DEVFREQ - bool "devfreq support for Mali" - depends on MALI_MIDGARD && PM_DEVFREQ - default y - help - Support devfreq for Mali. - - Using the devfreq framework and, by default, the simpleondemand - governor, the frequency of Mali will be dynamically selected from the - available OPPs. - config MALI_DMA_FENCE - bool "DMA_BUF fence support for Mali" + bool "Enable DMA_BUF fence support for Mali" depends on MALI_MIDGARD default n help @@ -76,18 +103,9 @@ config MALI_DMA_FENCE This option should only be enabled if the Linux Kernel has built in support for DMA_BUF fences. -config MALI_PLATFORM_NAME - depends on MALI_MIDGARD - string "Platform name" - default "devicetree" - help - Enter the name of the desired platform configuration directory to - include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must - exist. - config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" - depends on MALI_MIDGARD + depends on MALI_MIDGARD && !MALI_CSF_SUPPORT default n help Enable support for the arbiter interface in the driver. @@ -96,18 +114,64 @@ config MALI_ARBITER_SUPPORT If unsure, say N. -# MALI_EXPERT configuration options +config MALI_DMA_BUF_MAP_ON_DEMAND + bool "Enable map imported dma-bufs on demand" + depends on MALI_MIDGARD + default n + help + This option caused kbase to set up the GPU mapping of imported + dma-buf when needed to run atoms. This is the legacy behavior. + + This is intended for testing and the option will get removed in the + future. + +config MALI_DMA_BUF_LEGACY_COMPAT + bool "Enable legacy compatibility cache flush on dma-buf map" + depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND + default n + help + This option enables compatibility with legacy dma-buf mapping + behavior, then the dma-buf is mapped on import, by adding cache + maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, + including a cache flush. + + This option might work-around issues related to missing cache + flushes in other drivers. This only has an effect for clients using + UK 11.18 or older. For later UK versions it is not possible. menuconfig MALI_EXPERT depends on MALI_MIDGARD bool "Enable Expert Settings" default n help - Enabling this option and modifying the default settings may produce a driver with performance or - other limitations. + Enabling this option and modifying the default settings may produce + a driver with performance or other limitations. + +if MALI_EXPERT + +config MALI_2MB_ALLOC + bool "Attempt to allocate 2MB pages" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Rather than allocating all GPU memory page-by-page, attempt to + allocate 2MB pages from the kernel. This reduces TLB pressure and + helps to prevent memory fragmentation. + + If in doubt, say N + +config MALI_MEMORY_FULLY_BACKED + bool "Enable memory fully physically-backed" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option enables full physical backing of all virtual + memory allocations in the kernel. Notice that this build + option only affects allocations of grow-on-GPU-page-fault + memory. config MALI_CORESTACK - bool "Support controlling power to the GPU core stack" + bool "Enable support of GPU core stack power control" depends on MALI_MIDGARD && MALI_EXPERT default n help @@ -119,15 +183,48 @@ config MALI_CORESTACK If unsure, say N. +comment "Platform options" + depends on MALI_MIDGARD && MALI_EXPERT + +config MALI_NO_MALI + bool "Enable No Mali" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + +config MALI_ERROR_INJECT + bool "Enable No Mali error injection" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI + default n + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + +comment "Debug options" + depends on MALI_MIDGARD && MALI_EXPERT + config MALI_DEBUG - bool "Debug build" + bool "Enable debug build" depends on MALI_MIDGARD && MALI_EXPERT default n help Select this option for increased checking and reporting of errors. config MALI_FENCE_DEBUG - bool "Debug sync fence usage" + bool "Enable debug sync fence usage" depends on MALI_MIDGARD && MALI_EXPERT && (SYNC || SYNC_FILE) default y if MALI_DEBUG help @@ -143,28 +240,6 @@ config MALI_FENCE_DEBUG The timeout can be changed at runtime through the js_soft_timeout device attribute, where the timeout is specified in milliseconds. -config MALI_NO_MALI - bool "No Mali" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - This can be used to test the driver in a simulated environment - whereby the hardware is not physically present. If the hardware is physically - present it will not be used. This can be used to test the majority of the - driver without needing actual hardware or for software benchmarking. - All calls to the simulated hardware will complete immediately as if the hardware - completed the task. - -config MALI_REAL_HW - def_bool !MALI_NO_MALI - -config MALI_ERROR_INJECT - bool "Error injection" - depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI - default n - help - Enables insertion of errors to test module failure and recovery mechanisms. - config MALI_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_MIDGARD && MALI_EXPERT @@ -176,63 +251,93 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_2MB_ALLOC - bool "Attempt to allocate 2MB pages" +comment "Instrumentation options" depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Rather than allocating all GPU memory page-by-page, attempt to - allocate 2MB pages from the kernel. This reduces TLB pressure and - helps to prevent memory fragmentation. - If in doubt, say N +choice + prompt "Select Performance counters set" + default MALI_PRFCNT_SET_PRIMARY + depends on MALI_MIDGARD && MALI_EXPERT -config MALI_PWRSOFT_765 - bool "PWRSOFT-765 ticket" +config MALI_PRFCNT_SET_PRIMARY + bool "Primary" depends on MALI_MIDGARD && MALI_EXPERT - default n help - PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged - in kernel v4.10, however if backported into the kernel then this - option must be manually selected. + Select this option to use primary set of performance counters. - If using kernel >= v4.10 then say N, otherwise if devfreq cooling - changes have been backported say Y to avoid compilation errors. +config MALI_PRFCNT_SET_SECONDARY + bool "Secondary" + depends on MALI_MIDGARD && MALI_EXPERT + help + Select this option to use secondary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. -config MALI_MEMORY_FULLY_BACKED - bool "Memory fully physically-backed" + If unsure, use MALI_PRFCNT_SET_PRIMARY. + +config MALI_PRFCNT_SET_TERTIARY + bool "Tertiary" depends on MALI_MIDGARD && MALI_EXPERT + help + Select this option to use tertiary set of performance counters. Kernel + features that depend on an access to the primary set of counters may + become unavailable. Enabling this option will prevent power management + from working optimally and may cause instrumentation tools to return + bogus results. + + If unsure, use MALI_PRFCNT_SET_PRIMARY. + +endchoice + +config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + bool "Enable runtime selection of performance counters set via debugfs" + depends on MALI_MIDGARD && MALI_EXPERT && DEBUG_FS default n help - This option enables full physical backing of all virtual - memory allocations in the kernel. Notice that this build - option only affects allocations of grow-on-GPU-page-fault - memory. + Select this option to make the secondary set of performance counters + available at runtime via debugfs. Kernel features that depend on an + access to the primary set of counters may become unavailable. -config MALI_DMA_BUF_MAP_ON_DEMAND - bool "Map imported dma-bufs on demand" - depends on MALI_MIDGARD + If no runtime debugfs option is set, the build time counter set + choice will be used. + + This feature is unsupported and unstable, and may break at any time. + Enabling this option will prevent power management from working + optimally and may cause instrumentation tools to return bogus results. + + No validation is done on the debugfs input. Invalid input could cause + performance counter errors. Valid inputs are the values accepted by + the SET_SELECT bits of the PRFCNT_CONFIG register as defined in the + architecture specification. + + If unsure, say N. + +config MALI_JOB_DUMP + bool "Enable system level support needed for job dumping" + depends on MALI_MIDGARD && MALI_EXPERT default n help - This option caused kbase to set up the GPU mapping of imported - dma-buf when needed to run atoms. This is the legacy behaviour. + Choose this option to enable system level support needed for + job dumping. This is typically used for instrumentation but has + minimal overhead when not in use. Enable only if you know what + you are doing. - This is intended for testing and the option will get removed in the - future. +comment "Workarounds" + depends on MALI_MIDGARD && MALI_EXPERT -config MALI_DMA_BUF_LEGACY_COMPAT - bool "Enable legacy compatibility cache flush on dma-buf map" - depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND +config MALI_PWRSOFT_765 + bool "Enable workaround for PWRSOFT-765" + depends on MALI_MIDGARD && MALI_EXPERT default n help - This option enables compatibility with legacy dma-buf mapping - behavior, then the dma-buf is mapped on import, by adding cache - maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, - including a cache flush. + PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged + in kernel v4.10, however if backported into the kernel then this + option must be manually selected. - This option might work-around issues related to missing cache - flushes in other drivers. This only has an effect for clients using - UK 11.18 or older. For later UK versions it is not possible. + If using kernel >= v4.10 then say N, otherwise if devfreq cooling + changes have been backported say Y to avoid compilation errors. config MALI_HW_ERRATA_1485982_NOT_AFFECTED bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" @@ -252,58 +357,22 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE default n help This option uses an alternative workaround for GPU2017-1336. Lowering - the GPU clock to a, platform specific, known good frequeuncy before + the GPU clock to a, platform specific, known good frequency before powering down the L2 cache. The clock can be specified in the device tree using the property, opp-mali-errata-1485982. Otherwise the slowest clock will be selected. -config MALI_GEM5_BUILD - bool "Enable build of Mali kernel driver for GEM5" - depends on MALI_MIDGARD - default n - help - This option is to do a Mali GEM5 build. - If unsure, say N. - -# Instrumentation options. +endif -config MALI_JOB_DUMP - bool "Enable system level support needed for job dumping" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Choose this option to enable system level support needed for - job dumping. This is typically used for instrumentation but has - minimal overhead when not in use. Enable only if you know what - you are doing. - -config MALI_PRFCNT_SET_SECONDARY - bool "Use secondary set of performance counters" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Select this option to use secondary set of performance counters. Kernel - features that depend on an access to the primary set of counters may - become unavailable. Enabling this option will prevent power management - from working optimally and may cause instrumentation tools to return - bogus results. - - If unsure, say N. - -config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS - bool "Use secondary set of performance counters" - depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS +config MALI_ARBITRATION + bool "Enable Virtualization reference code" + depends on MALI_MIDGARD default n help - Select this option to make the secondary set of performance counters - available at runtime via debugfs. Kernel features that depend on an - access to the primary set of counters may become unavailable. - - This feature is unsupported and unstable, and may break at any time. - Enabling this option will prevent power management from working - optimally and may cause instrumentation tools to return bogus results. - + Enables the build of several reference modules used in the reference + virtualization setup for Mali If unsure, say N. -source "drivers/gpu/arm/midgard/platform/Kconfig" source "drivers/gpu/arm/midgard/tests/Kconfig" + +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile b/dvalin/kernel/drivers/gpu/arm/midgard/Makefile index 53a1209..4384e80 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/Makefile +++ b/dvalin/kernel/drivers/gpu/arm/midgard/Makefile @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,24 +16,200 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # + +KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build +KDIR ?= $(KERNEL_SRC) + +ifeq ($(KDIR),) + $(error Must specify KDIR to point to the kernel to target)) +endif + +# +# Default configuration values +# +# Dependency resolution is done through statements as Kconfig +# is not supported for out-of-tree builds. # +CONFIG_MALI_MIDGARD ?= m +ifeq ($(CONFIG_MALI_MIDGARD),m) + CONFIG_MALI_GATOR_SUPPORT ?= y + CONFIG_MALI_ARBITRATION ?= n + CONFIG_MALI_PARTITION_MANAGER ?= n + + ifneq ($(CONFIG_MALI_NO_MALI),y) + # Prevent misuse when CONFIG_MALI_NO_MALI=y + CONFIG_MALI_REAL_HW ?= y + endif + + ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y) + # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y + CONFIG_MALI_DEVFREQ ?= n + else + CONFIG_MALI_DEVFREQ ?= y + endif + + ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) + # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y + CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n + endif + + ifeq ($(CONFIG_BSP_HAS_HYPERVISOR),y) + ifneq ($(CONFIG_MALI_ARBITRATION), n) + CONFIG_MALI_XEN ?= m + endif + endif + + # + # Expert/Debug/Test released configurations + # + ifeq ($(CONFIG_MALI_EXPERT), y) + ifeq ($(CONFIG_MALI_NO_MALI), y) + CONFIG_MALI_REAL_HW = n + else + # Prevent misuse when CONFIG_MALI_NO_MALI=n + CONFIG_MALI_REAL_HW = y + CONFIG_MALI_ERROR_INJECT = n + endif + + ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) + # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + endif -KDIR ?= /lib/modules/$(shell uname -r)/build + ifeq ($(CONFIG_MALI_DEBUG), y) + CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y + CONFIG_MALI_SYSTEM_TRACE ?= y -BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../.. -KBASE_PATH_RELATIVE = $(CURDIR) + ifeq ($(CONFIG_SYNC), y) + CONFIG_MALI_FENCE_DEBUG ?= y + else + ifeq ($(CONFIG_SYNC_FILE), y) + CONFIG_MALI_FENCE_DEBUG ?= y + else + CONFIG_MALI_FENCE_DEBUG = n + endif + endif + else + # Prevent misuse when CONFIG_MALI_DEBUG=n + CONFIG_MALI_MIDGARD_ENABLE_TRACE = n + CONFIG_MALI_SYSTEM_TRACE = n + CONFIG_MALI_FENCE_DEBUG = n + endif + else + # Prevent misuse when CONFIG_MALI_EXPERT=n + CONFIG_MALI_CORESTACK = n + CONFIG_MALI_2MB_ALLOC = n + CONFIG_MALI_PWRSOFT_765 = n + CONFIG_MALI_MEMORY_FULLY_BACKED = n + CONFIG_MALI_JOB_DUMP = n + CONFIG_MALI_NO_MALI = n + CONFIG_MALI_REAL_HW = y + CONFIG_MALI_ERROR_INJECT = n + CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n + CONFIG_MALI_DEBUG = n + CONFIG_MALI_MIDGARD_ENABLE_TRACE = n + CONFIG_MALI_SYSTEM_TRACE = n + CONFIG_MALI_FENCE_DEBUG = n + endif -ifeq ($(CONFIG_MALI_BUSLOG),y) -#Add bus logger symbols -EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers + ifeq ($(CONFIG_MALI_DEBUG), y) + CONFIG_MALI_KUTF ?= y + ifeq ($(CONFIG_MALI_KUTF), y) + CONFIG_MALI_KUTF_IRQ_TEST ?= y + CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y + else + # Prevent misuse when CONFIG_MALI_KUTF=n + CONFIG_MALI_KUTF_IRQ_TEST = n + CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + endif + else + # Prevent misuse when CONFIG_MALI_DEBUG=n + CONFIG_MALI_KUTF = n + CONFIG_MALI_KUTF_IRQ_TEST = n + CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + endif +else + # Prevent misuse when CONFIG_MALI_MIDGARD=n + CONFIG_MALI_ARBITRATION = n + CONFIG_MALI_KUTF = n + CONFIG_MALI_KUTF_IRQ_TEST = n + CONFIG_MALI_KUTF_CLK_RATE_TRACE = n endif -# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +# All Mali CONFIG should be listed here +CONFIGS := \ + CONFIG_MALI_MIDGARD \ + CONFIG_MALI_CSF_SUPPORT \ + CONFIG_MALI_GATOR_SUPPORT \ + CONFIG_MALI_DMA_FENCE \ + CONFIG_MALI_ARBITER_SUPPORT \ + CONFIG_MALI_ARBITRATION \ + CONFIG_MALI_PARTITION_MANAGER \ + CONFIG_MALI_REAL_HW \ + CONFIG_MALI_GEM5_BUILD \ + CONFIG_MALI_DEVFREQ \ + CONFIG_MALI_MIDGARD_DVFS \ + CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ + CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ + CONFIG_MALI_EXPERT \ + CONFIG_MALI_CORESTACK \ + CONFIG_MALI_2MB_ALLOC \ + CONFIG_MALI_PWRSOFT_765 \ + CONFIG_MALI_MEMORY_FULLY_BACKED \ + CONFIG_MALI_JOB_DUMP \ + CONFIG_MALI_NO_MALI \ + CONFIG_MALI_ERROR_INJECT \ + CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ + CONFIG_MALI_PRFCNT_SET_PRIMARY \ + CONFIG_MALI_PRFCNT_SET_SECONDARY \ + CONFIG_MALI_PRFCNT_SET_TERTIARY \ + CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ + CONFIG_MALI_DEBUG \ + CONFIG_MALI_MIDGARD_ENABLE_TRACE \ + CONFIG_MALI_SYSTEM_TRACE \ + CONFIG_MALI_FENCE_DEBUG \ + CONFIG_MALI_KUTF \ + CONFIG_MALI_KUTF_IRQ_TEST \ + CONFIG_MALI_KUTF_CLK_RATE_TRACE \ + CONFIG_MALI_XEN + + +# +# MAKE_ARGS to pass the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs and values. +# $(value config) is the name of the CONFIG option. +# $(value $(value config)) is its value (y, m). +# When the CONFIG is not set to y or m, it defaults to n. +MAKE_ARGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + $(value config)=$(value $(value config)), \ + $(value config)=n)) + +# +# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build +# +# Generate the list of CONFIGs defines with values from CONFIGS. +# $(value config) is the name of the CONFIG option. +# When set to y or m, the CONFIG gets defined to 1. +EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ + $(if $(filter y m,$(value $(value config))), \ + -D$(value config)=1)) + +# +# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +# + all: - $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +modules_install: + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) modules_install clean: - $(MAKE) -C $(KDIR) M=$(CURDIR) clean + $(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) clean diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig b/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig index b137793..d71a113 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig +++ b/dvalin/kernel/drivers/gpu/arm/midgard/Mconfig @@ -1,17 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. # # - menuconfig MALI_MIDGARD bool "Mali Midgard series support" @@ -22,13 +27,44 @@ menuconfig MALI_MIDGARD To compile this driver as a module, choose M here: this will generate a single module, called mali_kbase. -config MALI_GATOR_SUPPORT - bool "Enable Streamline tracing support" - depends on MALI_MIDGARD && !BACKEND_USER +config MALI_PLATFORM_NAME + depends on MALI_MIDGARD + string "Platform name" + default "hisilicon" if PLATFORM_HIKEY960 + default "hisilicon" if PLATFORM_HIKEY970 + default "devicetree" + help + Enter the name of the desired platform configuration directory to + include in the build. 'platform/$(MALI_PLATFORM_NAME)/Makefile' must + exist. + + When PLATFORM_CUSTOM is set, this needs to be set manually to + pick up the desired platform files. + +config MALI_REAL_HW + bool + depends on MALI_MIDGARD default y + default n if NO_MALI + +config MALI_CSF_SUPPORT + bool "Enable Mali CSF based GPU support" + depends on MALI_MIDGARD + default y if GPU_HAS_CSF help - Enables kbase tracing used by the Arm Streamline Performance Analyzer. - The tracepoints are used to derive GPU activity charts in Streamline. + Enables support for CSF based GPUs. + +config MALI_DEVFREQ + bool "Enable devfreq support for Mali" + depends on MALI_MIDGARD + default y if PLATFORM_JUNO + default y if PLATFORM_CUSTOM + help + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simple on-demand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -37,29 +73,25 @@ config MALI_MIDGARD_DVFS help Choose this option to enable legacy DVFS in the Mali Midgard DDK. +config MALI_GATOR_SUPPORT + bool "Enable Streamline tracing support" + depends on MALI_MIDGARD && !BACKEND_USER + default y + help + Enables kbase tracing used by the Arm Streamline Performance Analyzer. + The tracepoints are used to derive GPU activity charts in Streamline. + config MALI_MIDGARD_ENABLE_TRACE bool "Enable kbase tracing" depends on MALI_MIDGARD default y if MALI_DEBUG default n help - Enables tracing in kbase. Trace log available through + Enables tracing in kbase. Trace log available through the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled -config MALI_DEVFREQ - bool "devfreq support for Mali" - depends on MALI_MIDGARD - default y if PLATFORM_JUNO - default y if PLATFORM_CUSTOM - help - Support devfreq for Mali. - - Using the devfreq framework and, by default, the simpleondemand - governor, the frequency of Mali will be dynamically selected from the - available OPPs. - config MALI_DMA_FENCE - bool "DMA_BUF fence support for Mali" + bool "Enable DMA_BUF fence support for Mali" depends on MALI_MIDGARD default n help @@ -68,23 +100,9 @@ config MALI_DMA_FENCE This option should only be enabled if the Linux Kernel has built in support for DMA_BUF fences. -config MALI_PLATFORM_NAME - depends on MALI_MIDGARD - string "Platform name" - default "hisilicon" if PLATFORM_HIKEY960 - default "hisilicon" if PLATFORM_HIKEY970 - default "devicetree" - help - Enter the name of the desired platform configuration directory to - include in the build. 'platform/$(MALI_PLATFORM_NAME)/Kbuild' must - exist. - - When PLATFORM_CUSTOM is set, this needs to be set manually to - pick up the desired platform files. - config MALI_ARBITER_SUPPORT bool "Enable arbiter support for Mali" - depends on MALI_MIDGARD + depends on MALI_MIDGARD && !MALI_CSF_SUPPORT default n help Enable support for the arbiter interface in the driver. @@ -93,62 +111,89 @@ config MALI_ARBITER_SUPPORT If unsure, say N. -# MALI_EXPERT configuration options +config DMA_BUF_SYNC_IOCTL_SUPPORTED + bool "Enable Kernel DMA buffers support DMA_BUF_IOCTL_SYNC" + depends on MALI_MIDGARD && BACKEND_KERNEL + default y + +config MALI_DMA_BUF_MAP_ON_DEMAND + bool "Enable map imported dma-bufs on demand" + depends on MALI_MIDGARD + default n + default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED + help + This option caused kbase to set up the GPU mapping of imported + dma-buf when needed to run atoms. This is the legacy behavior. + + This is intended for testing and the option will get removed in the + future. + +config MALI_DMA_BUF_LEGACY_COMPAT + bool "Enable legacy compatibility cache flush on dma-buf map" + depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND + default n + help + This option enables compatibility with legacy dma-buf mapping + behavior, then the dma-buf is mapped on import, by adding cache + maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, + including a cache flush. + + This option might work-around issues related to missing cache + flushes in other drivers. This only has an effect for clients using + UK 11.18 or older. For later UK versions it is not possible. menuconfig MALI_EXPERT depends on MALI_MIDGARD bool "Enable Expert Settings" default y help - Enabling this option and modifying the default settings may produce a driver with performance or - other limitations. + Enabling this option and modifying the default settings may produce + a driver with performance or other limitations. -config MALI_CORESTACK - bool "Support controlling power to the GPU core stack" +config MALI_2MB_ALLOC + bool "Attempt to allocate 2MB pages" depends on MALI_MIDGARD && MALI_EXPERT default n help - Enabling this feature on supported GPUs will let the driver powering - on/off the GPU core stack independently without involving the Power - Domain Controller. This should only be enabled on platforms which - integration of the PDC to the Mali GPU is known to be problematic. - This feature is currently only supported on t-Six and t-HEx GPUs. + Rather than allocating all GPU memory page-by-page, attempt to + allocate 2MB pages from the kernel. This reduces TLB pressure and + helps to prevent memory fragmentation. - If unsure, say N. + If in doubt, say N -config MALI_DEBUG - bool "Debug build" +config MALI_MEMORY_FULLY_BACKED + bool "Enable memory fully physically-backed" depends on MALI_MIDGARD && MALI_EXPERT - default y if DEBUG default n help - Select this option for increased checking and reporting of errors. + This option enables full physical backing of all virtual + memory allocations in the kernel. Notice that this build + option only affects allocations of grow-on-GPU-page-fault + memory. -config MALI_FENCE_DEBUG - bool "Debug sync fence usage" +config MALI_CORESTACK + bool "Enable support of GPU core stack power control" depends on MALI_MIDGARD && MALI_EXPERT - default y if MALI_DEBUG + default n help - Select this option to enable additional checking and reporting on the - use of sync fences in the Mali driver. - - This will add a 3s timeout to all sync fence waits in the Mali - driver, so that when work for Mali has been waiting on a sync fence - for a long time a debug message will be printed, detailing what fence - is causing the block, and which dependent Mali atoms are blocked as a - result of this. + Enabling this feature on supported GPUs will let the driver powering + on/off the GPU core stack independently without involving the Power + Domain Controller. This should only be enabled on platforms which + integration of the PDC to the Mali GPU is known to be problematic. + This feature is currently only supported on t-Six and t-HEx GPUs. - The timeout can be changed at runtime through the js_soft_timeout - device attribute, where the timeout is specified in milliseconds. + If unsure, say N. choice prompt "Error injection level" + depends on MALI_MIDGARD && MALI_EXPERT default MALI_ERROR_INJECT_NONE help Enables insertion of errors to test module failure and recovery mechanisms. config MALI_ERROR_INJECT_NONE bool "disabled" + depends on MALI_MIDGARD && MALI_EXPERT help Error injection is disabled. @@ -168,14 +213,49 @@ endchoice config MALI_ERROR_INJECT_ON string + depends on MALI_MIDGARD && MALI_EXPERT default "0" if MALI_ERROR_INJECT_NONE default "1" if MALI_ERROR_INJECT_TRACK_LIST default "2" if MALI_ERROR_INJECT_RANDOM config MALI_ERROR_INJECT bool + depends on MALI_MIDGARD && MALI_EXPERT default y if !MALI_ERROR_INJECT_NONE +config MALI_GEM5_BUILD + bool "Enable build of Mali kernel driver for GEM5" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This option is to do a Mali GEM5 build. + If unsure, say N. + +config MALI_DEBUG + bool "Enable debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default y if DEBUG + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_FENCE_DEBUG + bool "Enable debug sync fence usage" + depends on MALI_MIDGARD && MALI_EXPERT + default y if MALI_DEBUG + help + Select this option to enable additional checking and reporting on the + use of sync fences in the Mali driver. + + This will add a 3s timeout to all sync fence waits in the Mali + driver, so that when work for Mali has been waiting on a sync fence + for a long time a debug message will be printed, detailing what fence + is causing the block, and which dependent Mali atoms are blocked as a + result of this. + + The timeout can be changed at runtime through the js_soft_timeout + device attribute, where the timeout is specified in milliseconds. + config MALI_SYSTEM_TRACE bool "Enable system event tracing support" depends on MALI_MIDGARD && MALI_EXPERT @@ -187,56 +267,35 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_2MB_ALLOC - bool "Attempt to allocate 2MB pages" - depends on MALI_MIDGARD && MALI_EXPERT - default n - help - Rather than allocating all GPU memory page-by-page, attempt to - allocate 2MB pages from the kernel. This reduces TLB pressure and - helps to prevent memory fragmentation. +# Instrumentation options. - If in doubt, say N +# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig. +# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. +# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig. +# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig. -config MALI_PWRSOFT_765 - bool "PWRSOFT-765 ticket" +config MALI_JOB_DUMP + bool "Enable system level support needed for job dumping" depends on MALI_MIDGARD && MALI_EXPERT default n help - PWRSOFT-765 fixes devfreq cooling devices issues. However, they are - not merged in mainline kernel yet. So this define helps to guard those - parts of the code. - -config MALI_MEMORY_FULLY_BACKED - bool "Memory fully physically-backed" - default n - help - This option enables full backing of all virtual memory allocations - for the kernel. This only affects grow-on-GPU-page-fault memory. + Choose this option to enable system level support needed for + job dumping. This is typically used for instrumentation but has + minimal overhead when not in use. Enable only if you know what + you are doing. -config MALI_DMA_BUF_MAP_ON_DEMAND - bool "Map imported dma-bufs on demand" - depends on MALI_MIDGARD +config MALI_PWRSOFT_765 + bool "Enable workaround for PWRSOFT-765" + depends on MALI_MIDGARD && MALI_EXPERT default n - default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED help - This option caused kbase to set up the GPU mapping of imported - dma-buf when needed to run atoms. This is the legacy behaviour. + PWRSOFT-765 fixes devfreq cooling devices issues. The fix was merged + in kernel v4.10, however if backported into the kernel then this + option must be manually selected. -config MALI_DMA_BUF_LEGACY_COMPAT - bool "Enable legacy compatibility cache flush on dma-buf map" - depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND - default n - help - This option enables compatibility with legacy dma-buf mapping - behavior, then the dma-buf is mapped on import, by adding cache - maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping, - including a cache flush. + If using kernel >= v4.10 then say N, otherwise if devfreq cooling + changes have been backported say Y to avoid compilation errors. -config MALI_REAL_HW - bool - default y - default n if NO_MALI config MALI_HW_ERRATA_1485982_NOT_AFFECTED bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336" @@ -262,17 +321,6 @@ config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE tree using the property, opp-mali-errata-1485982. Otherwise the slowest clock will be selected. -config MALI_GEM5_BUILD - bool "Enable build of Mali kernel driver for GEM5" - depends on MALI_MIDGARD - default n - help - This option is to do a Mali GEM5 build. - If unsure, say N. - -# Instrumentation options. - -# config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig. -# config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig. +source "kernel/drivers/gpu/arm/midgard/arbitration/Mconfig" source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild index 98e47be..5203281 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,10 +16,8 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # mali_kbase-y += \ - arbiter/mali_kbase_arbif.o \ - arbiter/mali_kbase_arbiter_pm.o + arbiter/mali_kbase_arbif.o \ + arbiter/mali_kbase_arbiter_pm.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c index d193cb9..64e11ce 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.c @@ -1,13 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 - +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,13 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_arbif.c - * Mali arbiter interface APIs to share GPU between Virtual Machines + * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines */ #include @@ -34,32 +30,155 @@ #include #include "mali_kbase_arbiter_interface.h" +/* Arbiter interface version against which was implemented this module */ +#define MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION 5 +#if MALI_REQUIRED_KBASE_ARBITER_INTERFACE_VERSION != \ + MALI_KBASE_ARBITER_INTERFACE_VERSION +#error "Unsupported Mali Arbiter interface version." +#endif + +static void on_max_config(struct device *dev, uint32_t max_l2_slices, + uint32_t max_core_mask) +{ + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + if (!max_l2_slices || !max_core_mask) { + dev_dbg(dev, + "%s(): max_config ignored as one of the fields is zero", + __func__); + return; + } + + /* set the max config info in the kbase device */ + kbase_arbiter_set_max_config(kbdev, max_l2_slices, max_core_mask); +} + +/** + * on_update_freq() - Updates GPU clock frequency + * @dev: arbiter interface device handle + * @freq: GPU clock frequency value reported from arbiter + * + * call back function to update GPU clock frequency with + * new value from arbiter + */ +static void on_update_freq(struct device *dev, uint32_t freq) +{ + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + kbase_arbiter_pm_update_gpu_freq(&kbdev->arb.arb_freq, freq); +} + +/** + * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop + * @dev: arbiter interface device handle + * + * call back function to signal a GPU STOP event from arbiter interface + */ static void on_gpu_stop(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } - KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED(kbdev, kbdev); + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } + + KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED(kbdev, kbdev); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT); } +/** + * on_gpu_granted() - sends KBASE_VM_GPU_GRANTED_EVT event on GPU granted + * @dev: arbiter interface device handle + * + * call back function to signal a GPU GRANT event from arbiter interface + */ static void on_gpu_granted(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } - KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED(kbdev, kbdev); + KBASE_TLSTREAM_TL_ARBITER_GRANTED(kbdev, kbdev); kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT); } +/** + * on_gpu_lost() - sends KBASE_VM_GPU_LOST_EVT event on GPU granted + * @dev: arbiter interface device handle + * + * call back function to signal a GPU LOST event from arbiter interface + */ static void on_gpu_lost(struct device *dev) { - struct kbase_device *kbdev = dev_get_drvdata(dev); + struct kbase_device *kbdev; + + if (!dev) { + pr_err("%s(): dev is NULL", __func__); + return; + } + + kbdev = dev_get_drvdata(dev); + if (!kbdev) { + dev_err(dev, "%s(): kbdev is NULL", __func__); + return; + } kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT); } +/** + * kbase_arbif_init() - Kbase Arbiter interface initialisation. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialise Kbase Arbiter interface and assign callback functions. + * + * Return: + * * 0 - the interface was initialized or was not specified + * * in the device tree. + * * -EFAULT - the interface was specified but failed to initialize. + * * -EPROBE_DEFER - module dependencies are not yet available. + */ int kbase_arbif_init(struct kbase_device *kbdev) { -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) struct arbiter_if_arb_vm_ops ops; struct arbiter_if_dev *arb_if; struct device_node *arbiter_if_node; @@ -100,17 +219,26 @@ int kbase_arbif_init(struct kbase_device *kbdev) ops.arb_vm_gpu_stop = on_gpu_stop; ops.arb_vm_gpu_granted = on_gpu_granted; ops.arb_vm_gpu_lost = on_gpu_lost; + ops.arb_vm_max_config = on_max_config; + ops.arb_vm_update_freq = on_update_freq; + + kbdev->arb.arb_freq.arb_freq = 0; + kbdev->arb.arb_freq.freq_updated = false; + mutex_init(&kbdev->arb.arb_freq.arb_freq_lock); /* register kbase arbiter_if callbacks */ if (arb_if->vm_ops.vm_arb_register_dev) { err = arb_if->vm_ops.vm_arb_register_dev(arb_if, kbdev->dev, &ops); if (err) { - dev_err(kbdev->dev, "Arbiter registration failed.\n"); + dev_err(&pdev->dev, "Failed to register with arbiter\n"); module_put(pdev->dev.driver->owner); + if (err != -EPROBE_DEFER) + err = -EFAULT; return err; } } + #else /* CONFIG_OF */ dev_dbg(kbdev->dev, "No arbiter without Device Tree support\n"); kbdev->arb.arb_dev = NULL; @@ -119,6 +247,12 @@ int kbase_arbif_init(struct kbase_device *kbdev) return 0; } +/** + * kbase_arbif_destroy() - De-init Kbase arbiter interface + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * De-initialise Kbase arbiter interface + */ void kbase_arbif_destroy(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; @@ -133,27 +267,64 @@ void kbase_arbif_destroy(struct kbase_device *kbdev) kbdev->arb.arb_dev = NULL; } +/** + * kbase_arbif_get_max_config() - Request max config info + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting max config info + */ +void kbase_arbif_get_max_config(struct kbase_device *kbdev) +{ + struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; + + if (arb_if && arb_if->vm_ops.vm_arb_get_max_config) { + dev_dbg(kbdev->dev, "%s\n", __func__); + arb_if->vm_ops.vm_arb_get_max_config(arb_if); + } +} + +/** + * kbase_arbif_gpu_request() - Request GPU from + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting GPU for VM + */ void kbase_arbif_gpu_request(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; if (arb_if && arb_if->vm_ops.vm_arb_gpu_request) { dev_dbg(kbdev->dev, "%s\n", __func__); + KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); arb_if->vm_ops.vm_arb_gpu_request(arb_if); } } +/** + * kbase_arbif_gpu_stopped() - send GPU stopped message to the arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @gpu_required: GPU request flag + * + */ void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; if (arb_if && arb_if->vm_ops.vm_arb_gpu_stopped) { dev_dbg(kbdev->dev, "%s\n", __func__); - KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED(kbdev, kbdev); + KBASE_TLSTREAM_TL_ARBITER_STOPPED(kbdev, kbdev); + if (gpu_required) + KBASE_TLSTREAM_TL_ARBITER_REQUESTED(kbdev, kbdev); arb_if->vm_ops.vm_arb_gpu_stopped(arb_if, gpu_required); } } +/** + * kbase_arbif_gpu_active() - Sends a GPU_ACTIVE message to the Arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Informs the arbiter VM is active + */ void kbase_arbif_gpu_active(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; @@ -164,6 +335,12 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev) } } +/** + * kbase_arbif_gpu_idle() - Inform the arbiter that the VM has gone idle + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Informs the arbiter VM is idle + */ void kbase_arbif_gpu_idle(struct kbase_device *kbdev) { struct arbiter_if_dev *arb_if = kbdev->arb.arb_if; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h index e7e9de7..701ffd4 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbif.h @@ -1,28 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ - -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,12 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * */ /** - * @file - * Mali arbiter interface APIs to share GPU between Virtual Machines + * DOC: Mali arbiter interface APIs to share GPU between Virtual Machines */ #ifndef _MALI_KBASE_ARBIF_H_ @@ -80,8 +57,11 @@ enum kbase_arbif_evt { * Initialize the arbiter interface and also determines * if Arbiter functionality is required. * - * Return: 0 if the Arbiter interface was successfully initialized or the - * Arbiter was not required. + * Return: + * * 0 - the interface was initialized or was not specified + * * in the device tree. + * * -EFAULT - the interface was specified but failed to initialize. + * * -EPROBE_DEFER - module dependencies are not yet available. */ int kbase_arbif_init(struct kbase_device *kbdev); @@ -94,6 +74,14 @@ int kbase_arbif_init(struct kbase_device *kbdev); */ void kbase_arbif_destroy(struct kbase_device *kbdev); +/** + * kbase_arbif_get_max_config() - Request max config info + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * call back function from arb interface to arbiter requesting max config info + */ +void kbase_arbif_get_max_config(struct kbase_device *kbdev); + /** * kbase_arbif_gpu_request() - Send GPU request message to the arbiter * @kbdev: The kbase device structure for the device (must be a valid pointer) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h index 1f53cbf..570a82a 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_defs.h @@ -1,28 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ - -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,7 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * */ /** @@ -66,7 +44,8 @@ * @vm_resume_work: Work item for vm_arb_wq to resume current work on GPU * @vm_arb_starting: Work queue resume in progress * @vm_arb_stopping: Work queue suspend in progress - * @vm_arb_users_waiting: Count of users waiting for GPU + * @interrupts_installed: Flag set when interrupts are installed + * @vm_request_timer: Timer to monitor GPU request */ struct kbase_arbiter_vm_state { struct kbase_device *kbdev; @@ -78,7 +57,8 @@ struct kbase_arbiter_vm_state { struct work_struct vm_resume_work; bool vm_arb_starting; bool vm_arb_stopping; - int vm_arb_users_waiting; + bool interrupts_installed; + struct hrtimer vm_request_timer; }; /** @@ -86,10 +66,12 @@ struct kbase_arbiter_vm_state { * allocated from the probe method of Mali driver * @arb_if: Pointer to the arbiter interface device * @arb_dev: Pointer to the arbiter device + * @arb_freq: GPU clock frequency retrieved from arbiter. */ struct kbase_arbiter_device { struct arbiter_if_dev *arb_if; struct device *arb_dev; + struct kbase_arbiter_freq arb_freq; }; #endif /* _MALI_KBASE_ARBITER_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h index 5d5d8a7..c0137f7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_interface.h @@ -1,28 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ - -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,7 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * */ /** @@ -50,7 +28,7 @@ #define _MALI_KBASE_ARBITER_INTERFACE_H_ /** - * @brief Mali arbiter interface version + * Mali arbiter interface version * * This specifies the current version of the configuration interface. Whenever * the arbiter interface changes, so that integration effort is required, the @@ -61,8 +39,15 @@ * 1 - Added the Mali arbiter configuration interface. * 2 - Strip out reference code from header * 3 - Removed DVFS utilization interface (DVFS moved to arbiter side) + * 4 - Added max_config support + * 5 - Added GPU clock frequency reporting support from arbiter */ -#define MALI_KBASE_ARBITER_INTERFACE_VERSION 3 +#define MALI_KBASE_ARBITER_INTERFACE_VERSION 5 + +/** + * NO_FREQ is used in case platform doesn't support reporting frequency + */ +#define NO_FREQ 0 struct arbiter_if_dev; @@ -108,6 +93,27 @@ struct arbiter_if_arb_vm_ops { * If successful, will respond with a vm_arb_gpu_stopped message. */ void (*arb_vm_gpu_lost)(struct device *dev); + + /** + * arb_vm_max_config() - Send max config info to the VM + * @dev: The arbif kernel module device. + * @max_l2_slices: The maximum number of L2 slices. + * @max_core_mask: The largest core mask. + * + * Informs KBase the maximum resources that can be allocated to the + * partition in use. + */ + void (*arb_vm_max_config)(struct device *dev, uint32_t max_l2_slices, + uint32_t max_core_mask); + + /** + * arb_vm_update_freq() - GPU clock frequency has been updated + * @dev: The arbif kernel module device. + * @freq: GPU clock frequency value reported from arbiter + * + * Informs KBase that the GPU clock frequency has been updated. + */ + void (*arb_vm_update_freq)(struct device *dev, uint32_t freq); }; /** @@ -126,6 +132,11 @@ struct arbiter_if_vm_arb_ops { * @dev: The device structure to supply in the callbacks. * @ops: The callbacks that the device driver supports * (none are optional). + * + * Return: + * * 0 - successful. + * * -EINVAL - invalid argument. + * * -EPROBE_DEFER - module dependencies are not yet available. */ int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev, struct device *dev, struct arbiter_if_arb_vm_ops *ops); @@ -136,6 +147,13 @@ struct arbiter_if_vm_arb_ops { */ void (*vm_arb_unregister_dev)(struct arbiter_if_dev *arbif_dev); + /** + * vm_arb_gpu_get_max_config() - Request the max config from the + * Arbiter. + * @arbif_dev: The arbiter interface we want to issue the request. + */ + void (*vm_arb_get_max_config)(struct arbiter_if_dev *arbif_dev); + /** * vm_arb_gpu_request() - Ask the arbiter interface for GPU access. * @arbif_dev: The arbiter interface we want to issue the request. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c index 6c35e16..5c75686 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.c @@ -1,13 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 - +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,27 +17,49 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_arbiter_pm.c + * @file * Mali arbiter power manager state machine and APIs */ #include #include #include -#include +#include #include -#include +#include #include +#include + +/* A dmesg warning will occur if the GPU is not granted + * after the following time (in milliseconds) has ellapsed. + */ +#define GPU_REQUEST_TIMEOUT 1000 +#define KHZ_TO_HZ 1000 + +#define MAX_L2_SLICES_MASK 0xFF + +/* Maximum time in ms, before deferring probe incase + * GPU_GRANTED message is not received + */ +static int gpu_req_timeout = 1; +module_param(gpu_req_timeout, int, 0644); +MODULE_PARM_DESC(gpu_req_timeout, + "On a virtualized platform, if the GPU is not granted within this time(ms) kbase will defer the probe"); static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev); static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( struct kbase_device *kbdev); +/** + * kbase_arbiter_pm_vm_state_str() - Helper function to get string + * for kbase VM state.(debug) + * @state: kbase VM state + * + * Return: string representation of Kbase_vm_state + */ static inline const char *kbase_arbiter_pm_vm_state_str( enum kbase_vm_state state) { @@ -73,6 +94,13 @@ static inline const char *kbase_arbiter_pm_vm_state_str( } } +/** + * kbase_arbiter_pm_vm_event_str() - Helper function to get string + * for kbase VM event.(debug) + * @evt: kbase VM state + * + * Return: String representation of Kbase_arbif_event + */ static inline const char *kbase_arbiter_pm_vm_event_str( enum kbase_arbif_evt evt) { @@ -99,6 +127,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str( } } +/** + * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @new_state: kbase VM new state + * + * This function sets the new state for the VM + */ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, enum kbase_vm_state new_state) { @@ -107,11 +142,22 @@ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev, dev_dbg(kbdev->dev, "VM set_state %s -> %s", kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state), kbase_arbiter_pm_vm_state_str(new_state)); + lockdep_assert_held(&arb_vm_state->vm_state_lock); arb_vm_state->vm_state = new_state; + if (new_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && + new_state != KBASE_VM_STATE_INITIALIZING) + KBASE_KTRACE_ADD(kbdev, ARB_VM_STATE, NULL, new_state); wake_up(&arb_vm_state->vm_state_wait); } +/** + * kbase_arbiter_pm_suspend_wq() - suspend work queue of the driver. + * @data: work queue + * + * Suspends work queue of the driver, when VM is in SUSPEND_PENDING or + * STOPPING_IDLE or STOPPING_ACTIVE state + */ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) { struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, @@ -136,6 +182,13 @@ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data) dev_dbg(kbdev->dev, "<%s\n", __func__); } +/** + * kbase_arbiter_pm_resume_wq() -Kbase resume work queue. + * @data: work item + * + * Resume work queue of the driver when VM is in STARTING state, + * else if its in STOPPING_ACTIVE will request a stop event. + */ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) { struct kbase_arbiter_vm_state *arb_vm_state = container_of(data, @@ -157,9 +210,74 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data) } arb_vm_state->vm_arb_starting = false; mutex_unlock(&arb_vm_state->vm_state_lock); + KBASE_TLSTREAM_TL_ARBITER_STARTED(kbdev, kbdev); dev_dbg(kbdev->dev, "<%s\n", __func__); } +/** + * request_timer_callback() - Issue warning on request timer expiration + * @timer: Request hr timer data + * + * Called when the Arbiter takes too long to grant the GPU after a + * request has been made. Issues a warning in dmesg. + * + * Return: Always returns HRTIMER_NORESTART + */ +static enum hrtimer_restart request_timer_callback(struct hrtimer *timer) +{ + struct kbase_arbiter_vm_state *arb_vm_state = container_of(timer, + struct kbase_arbiter_vm_state, vm_request_timer); + + KBASE_DEBUG_ASSERT(arb_vm_state); + KBASE_DEBUG_ASSERT(arb_vm_state->kbdev); + + dev_warn(arb_vm_state->kbdev->dev, + "Still waiting for GPU to be granted from Arbiter after %d ms\n", + GPU_REQUEST_TIMEOUT); + return HRTIMER_NORESTART; +} + +/** + * start_request_timer() - Start a timer after requesting GPU + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Start a timer to track when kbase is waiting for the GPU from the + * Arbiter. If the timer expires before GPU is granted, a warning in + * dmesg will be issued. + */ +static void start_request_timer(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + hrtimer_start(&arb_vm_state->vm_request_timer, + HR_TIMER_DELAY_MSEC(GPU_REQUEST_TIMEOUT), + HRTIMER_MODE_REL); +} + +/** + * cancel_request_timer() - Stop the request timer + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Stops the request timer once GPU has been granted. Safe to call + * even if timer is no longer running. + */ +static void cancel_request_timer(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + + hrtimer_cancel(&arb_vm_state->vm_request_timer); +} + +/** + * kbase_arbiter_pm_early_init() - Initialize arbiter for VM + * Paravirtualized use. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Initialize the arbiter and other required resources during the runtime + * and request the GPU for the VM for the first time. + * + * Return: 0 if success, or a Linux error code + */ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) { int err; @@ -179,29 +297,49 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev) WQ_HIGHPRI); if (!arb_vm_state->vm_arb_wq) { dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n"); + kfree(arb_vm_state); return -ENOMEM; } INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq); INIT_WORK(&arb_vm_state->vm_resume_work, kbase_arbiter_pm_resume_wq); arb_vm_state->vm_arb_starting = false; - arb_vm_state->vm_arb_users_waiting = 0; + atomic_set(&kbdev->pm.gpu_users_waiting, 0); + hrtimer_init(&arb_vm_state->vm_request_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + arb_vm_state->vm_request_timer.function = + request_timer_callback; kbdev->pm.arb_vm_state = arb_vm_state; err = kbase_arbif_init(kbdev); if (err) { + dev_err(kbdev->dev, "Failed to initialise arbif module\n"); goto arbif_init_fail; } + if (kbdev->arb.arb_if) { kbase_arbif_gpu_request(kbdev); dev_dbg(kbdev->dev, "Waiting for initial GPU assignment...\n"); - wait_event(arb_vm_state->vm_state_wait, + err = wait_event_timeout(arb_vm_state->vm_state_wait, arb_vm_state->vm_state == - KBASE_VM_STATE_INITIALIZING_WITH_GPU); + KBASE_VM_STATE_INITIALIZING_WITH_GPU, + msecs_to_jiffies(gpu_req_timeout)); + + if (!err) { + dev_dbg(kbdev->dev, + "Kbase probe Deferred after waiting %d ms to receive GPU_GRANT\n", + gpu_req_timeout); + err = -EPROBE_DEFER; + goto arbif_eprobe_defer; + } + dev_dbg(kbdev->dev, "Waiting for initial GPU assignment - done\n"); } return 0; +arbif_eprobe_defer: + kbase_arbiter_pm_early_term(kbdev); + return err; arbif_init_fail: destroy_workqueue(arb_vm_state->vm_arb_wq); kfree(arb_vm_state); @@ -209,35 +347,72 @@ arbif_init_fail: return err; } +/** + * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Clean up all the resources + */ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + cancel_request_timer(kbdev); mutex_lock(&arb_vm_state->vm_state_lock); - if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + if (arb_vm_state->vm_state > KBASE_VM_STATE_STOPPED_GPU_REQUESTED) { + kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, false); - + } mutex_unlock(&arb_vm_state->vm_state_lock); - kbase_arbif_destroy(kbdev); destroy_workqueue(arb_vm_state->vm_arb_wq); + kbase_arbif_destroy(kbdev); arb_vm_state->vm_arb_wq = NULL; kfree(kbdev->pm.arb_vm_state); kbdev->pm.arb_vm_state = NULL; } +/** + * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Releases interrupts and set the interrupt flag to false + */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; mutex_lock(&arb_vm_state->vm_state_lock); - if (!kbdev->arb.arb_if || - arb_vm_state->vm_state > - KBASE_VM_STATE_STOPPED_GPU_REQUESTED) + if (arb_vm_state->interrupts_installed == true) { + arb_vm_state->interrupts_installed = false; kbase_release_interrupts(kbdev); + } + mutex_unlock(&arb_vm_state->vm_state_lock); +} +/** + * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Install interrupts and set the interrupt_install flag to true. + */ +int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + int err; + + mutex_lock(&arb_vm_state->vm_state_lock); + arb_vm_state->interrupts_installed = true; + err = kbase_install_interrupts(kbdev); mutex_unlock(&arb_vm_state->vm_state_lock); + return err; } +/** + * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Handles a stop state for the VM + */ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) { bool request_gpu = false; @@ -245,14 +420,19 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) lockdep_assert_held(&arb_vm_state->vm_state_lock); - if (arb_vm_state->vm_arb_users_waiting > 0 && + if (atomic_read(&kbdev->pm.gpu_users_waiting) > 0 && arb_vm_state->vm_state == KBASE_VM_STATE_STOPPING_IDLE) kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPING_ACTIVE); dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); - kbase_release_interrupts(kbdev); + + if (arb_vm_state->interrupts_installed) { + arb_vm_state->interrupts_installed = false; + kbase_release_interrupts(kbdev); + } + switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STOPPING_ACTIVE: request_gpu = true; @@ -271,14 +451,95 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev) break; } + kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, request_gpu); + if (request_gpu) + start_request_timer(kbdev); +} + +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, + uint32_t max_l2_slices, + uint32_t max_core_mask) +{ + struct kbase_arbiter_vm_state *arb_vm_state; + struct max_config_props max_config; + + if (!kbdev) + return; + + /* Mask the max_l2_slices as it is stored as 8 bits into kbase */ + max_config.l2_slices = max_l2_slices & MAX_L2_SLICES_MASK; + max_config.core_mask = max_core_mask; + arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + /* Just set the max_props in kbase during initialization. */ + if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING) + kbase_gpuprops_set_max_config(kbdev, &max_config); + else + dev_dbg(kbdev->dev, "Unexpected max_config on VM state %s", + kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + + mutex_unlock(&arb_vm_state->vm_state_lock); +} + +int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev) +{ + struct kbase_arbiter_vm_state *arb_vm_state; + int result = -EINVAL; + + if (!kbdev) + return result; + + /* First check the GPU_LOST state */ + kbase_pm_lock(kbdev); + if (kbase_pm_is_gpu_lost(kbdev)) { + kbase_pm_unlock(kbdev); + return 0; + } + kbase_pm_unlock(kbdev); + + /* Then the arbitration state machine */ + arb_vm_state = kbdev->pm.arb_vm_state; + + mutex_lock(&arb_vm_state->vm_state_lock); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING: + case KBASE_VM_STATE_SUSPENDED: + case KBASE_VM_STATE_STOPPED: + case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: + case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + result = 0; + break; + default: + result = 1; + break; + } + mutex_unlock(&arb_vm_state->vm_state_lock); + + return result; } +/** + * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Handles the start state of the VM + */ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + bool freq_updated = false; lockdep_assert_held(&arb_vm_state->vm_state_lock); + mutex_lock(&kbdev->arb.arb_freq.arb_freq_lock); + if (kbdev->arb.arb_freq.freq_updated) { + kbdev->arb.arb_freq.freq_updated = false; + freq_updated = true; + } + mutex_unlock(&kbdev->arb.arb_freq.arb_freq_lock); + + cancel_request_timer(kbdev); switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_INITIALIZING: kbase_arbiter_pm_vm_set_state(kbdev, @@ -286,22 +547,43 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev) break; case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STARTING); + arb_vm_state->interrupts_installed = true; kbase_install_interrupts(kbdev); + /* + * GPU GRANTED received while in stop can be a result of a + * repartitioning. + */ + kbase_gpuprops_req_curr_config_update(kbdev); + /* curr_config will be updated while resuming the PM. */ queue_work(arb_vm_state->vm_arb_wq, &arb_vm_state->vm_resume_work); break; case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: + kbase_pm_set_gpu_lost(kbdev, false); kbase_arbif_gpu_stopped(kbdev, false); kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); break; default: - dev_warn(kbdev->dev, - "GPU_GRANTED when not expected - state %s\n", - kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); + /* + * GPU_GRANTED can be received when there is a frequency update + * Only show a warning if received in an unexpected state + * without a frequency update + */ + if (!freq_updated) + dev_warn(kbdev->dev, + "GPU_GRANTED when not expected - state %s\n", + kbase_arbiter_pm_vm_state_str( + arb_vm_state->vm_state)); break; } } +/** + * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Handles the start state of the VM + */ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -344,9 +626,16 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev) } } +/** + * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * On GPU lost event signals GPU_LOST to the aribiter + */ static void kbase_gpu_lost(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + bool handle_gpu_lost = false; lockdep_assert_held(&arb_vm_state->vm_state_lock); @@ -357,33 +646,47 @@ static void kbase_gpu_lost(struct kbase_device *kbdev) dev_warn(kbdev->dev, "GPU lost in state %s", kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state)); kbase_arbiter_pm_vm_gpu_stop(kbdev); - mutex_unlock(&arb_vm_state->vm_state_lock); - kbase_pm_handle_gpu_lost(kbdev); - mutex_lock(&arb_vm_state->vm_state_lock); + handle_gpu_lost = true; break; case KBASE_VM_STATE_STOPPING_IDLE: case KBASE_VM_STATE_STOPPING_ACTIVE: case KBASE_VM_STATE_SUSPEND_PENDING: - dev_info(kbdev->dev, "GPU lost while stopping"); - mutex_unlock(&arb_vm_state->vm_state_lock); - kbase_pm_handle_gpu_lost(kbdev); - mutex_lock(&arb_vm_state->vm_state_lock); + dev_dbg(kbdev->dev, "GPU lost while stopping"); + handle_gpu_lost = true; break; case KBASE_VM_STATE_SUSPENDED: case KBASE_VM_STATE_STOPPED: case KBASE_VM_STATE_STOPPED_GPU_REQUESTED: - dev_info(kbdev->dev, "GPU lost while already stopped"); + dev_dbg(kbdev->dev, "GPU lost while already stopped"); break; case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT: - dev_info(kbdev->dev, "GPU lost while waiting to suspend"); + dev_dbg(kbdev->dev, "GPU lost while waiting to suspend"); kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_SUSPENDED); break; default: break; } - + if (handle_gpu_lost) { + /* Releasing the VM state lock here is safe because + * we are guaranteed to be in either STOPPING_IDLE, + * STOPPING_ACTIVE or SUSPEND_PENDING at this point. + * The only transitions that are valid from here are to + * STOPPED, STOPPED_GPU_REQUESTED or SUSPENDED which can + * only happen at the completion of the GPU lost handling. + */ + mutex_unlock(&arb_vm_state->vm_state_lock); + kbase_pm_handle_gpu_lost(kbdev); + mutex_lock(&arb_vm_state->vm_state_lock); + } } +/** + * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready + * to be moved to suspended state. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: True if its ready to be suspended else False. + */ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( struct kbase_device *kbdev) { @@ -398,6 +701,14 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state( } } +/** + * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state + * until it receives the grant message from arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Prepares OS to be in suspend state until it receives GRANT message + * from Arbiter asynchronously. + */ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -463,6 +774,14 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev) } } +/** + * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives + * a grant message from arbiter + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Resume OS function once it receives GRANT message + * from Arbiter asynchronously. + */ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -475,6 +794,7 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); /* Release lock and block resume OS function until we have * asynchronously received the GRANT message from the Arbiter and @@ -486,6 +806,14 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev) mutex_lock(&arb_vm_state->vm_state_lock); } +/** + * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @evt: VM event + * + * The state machine function. Receives events and transitions states + * according the event received and the current state + */ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, enum kbase_arbif_evt evt) { @@ -497,7 +825,9 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, mutex_lock(&arb_vm_state->vm_state_lock); dev_dbg(kbdev->dev, "%s %s\n", __func__, kbase_arbiter_pm_vm_event_str(evt)); - + if (arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING_WITH_GPU && + arb_vm_state->vm_state != KBASE_VM_STATE_INITIALIZING) + KBASE_KTRACE_ADD(kbdev, ARB_VM_EVT, NULL, evt); switch (evt) { case KBASE_VM_GPU_GRANTED_EVT: kbase_arbiter_pm_vm_gpu_start(kbdev); @@ -506,7 +836,7 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, kbase_arbiter_pm_vm_gpu_stop(kbdev); break; case KBASE_VM_GPU_LOST_EVT: - dev_info(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); + dev_dbg(kbdev->dev, "KBASE_ARBIF_GPU_LOST_EVT!"); kbase_gpu_lost(kbdev); break; case KBASE_VM_OS_SUSPEND_EVENT: @@ -530,8 +860,6 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, case KBASE_VM_REF_EVENT: switch (arb_vm_state->vm_state) { case KBASE_VM_STATE_STARTING: - KBASE_TLSTREAM_TL_EVENT_ARB_STARTED(kbdev, kbdev); - /* FALL THROUGH */ case KBASE_VM_STATE_IDLE: kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_ACTIVE); @@ -547,15 +875,21 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, break; case KBASE_VM_GPU_INITIALIZED_EVT: - lockdep_assert_held(&kbdev->pm.lock); - if (kbdev->pm.active_count > 0) { - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_ACTIVE); - kbase_arbif_gpu_active(kbdev); - } else { - kbase_arbiter_pm_vm_set_state(kbdev, - KBASE_VM_STATE_IDLE); - kbase_arbif_gpu_idle(kbdev); + switch (arb_vm_state->vm_state) { + case KBASE_VM_STATE_INITIALIZING_WITH_GPU: + lockdep_assert_held(&kbdev->pm.lock); + if (kbdev->pm.active_count > 0) { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_ACTIVE); + kbase_arbif_gpu_active(kbdev); + } else { + kbase_arbiter_pm_vm_set_state(kbdev, + KBASE_VM_STATE_IDLE); + kbase_arbif_gpu_idle(kbdev); + } + break; + default: + break; } break; @@ -566,6 +900,14 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev, mutex_unlock(&arb_vm_state->vm_state_lock); } +KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event); + +/** + * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * VM waits for a GPU assignment. + */ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) { struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; @@ -577,6 +919,12 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n"); } +/** + * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Checks if the virtual machine holds VM state lock. + */ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( struct kbase_device *kbdev) { @@ -587,11 +935,25 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld( arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE); } +/** + * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for + * arbitration mode + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @suspend_handler: The handler code for how to handle a suspend + * that might occur + * + * This function handles a suspend event from the driver, + * communicating with the arbiter and waiting synchronously for the GPU + * to be granted again depending on the VM state. + * + * Return: 0 on success else 1 suspend handler isn not possible. + */ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) { struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; + int res = 0; if (kbdev->arb.arb_if) { mutex_lock(&arb_vm_state->vm_state_lock); @@ -606,30 +968,41 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, kbase_arbiter_pm_vm_set_state(kbdev, KBASE_VM_STATE_STOPPED_GPU_REQUESTED); kbase_arbif_gpu_request(kbdev); + start_request_timer(kbdev); } else if (arb_vm_state->vm_state == KBASE_VM_STATE_INITIALIZING_WITH_GPU) break; if (suspend_handler != KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE) { - if (suspend_handler == - KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED - || - kbdev->pm.active_count > 0) - break; - mutex_unlock(&arb_vm_state->vm_state_lock); - mutex_unlock(&kbdev->pm.lock); - mutex_unlock(&js_devdata->runpool_mutex); - return 1; - } + /* In case of GPU lost, even if + * active_count > 0, we no longer have GPU + * access + */ + if (kbase_pm_is_gpu_lost(kbdev)) + res = 1; - if (arb_vm_state->vm_state == - KBASE_VM_STATE_INITIALIZING_WITH_GPU) + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: + res = 1; + break; + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count == 0) + res = 1; + break; + case KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED: + break; + default: + WARN(1, "Unknown suspend_handler\n"); + res = 1; + break; + } break; + } /* Need to synchronously wait for GPU assignment */ - arb_vm_state->vm_arb_users_waiting++; + atomic_inc(&kbdev->pm.gpu_users_waiting); mutex_unlock(&arb_vm_state->vm_state_lock); mutex_unlock(&kbdev->pm.lock); mutex_unlock(&js_devdata->runpool_mutex); @@ -637,9 +1010,128 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, mutex_lock(&js_devdata->runpool_mutex); mutex_lock(&kbdev->pm.lock); mutex_lock(&arb_vm_state->vm_state_lock); - arb_vm_state->vm_arb_users_waiting--; + atomic_dec(&kbdev->pm.gpu_users_waiting); } mutex_unlock(&arb_vm_state->vm_state_lock); } - return 0; + return res; +} + +/** + * kbase_arbiter_pm_update_gpu_freq() - Updates GPU clock frequency received + * from arbiter. + * @arb_freq - Pointer to struchture holding GPU clock frequenecy data + * @freq - New frequency value in KHz + */ +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, + uint32_t freq) +{ + struct kbase_gpu_clk_notifier_data ndata; + + mutex_lock(&arb_freq->arb_freq_lock); + if (arb_freq->arb_freq != freq) { + ndata.new_rate = freq * KHZ_TO_HZ; + ndata.old_rate = arb_freq->arb_freq * KHZ_TO_HZ; + ndata.gpu_clk_handle = arb_freq; + arb_freq->arb_freq = freq; + arb_freq->freq_updated = true; + if (arb_freq->nb) + arb_freq->nb->notifier_call(arb_freq->nb, + POST_RATE_CHANGE, &ndata); + } + + mutex_unlock(&arb_freq->arb_freq_lock); +} + +/** + * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns pointer to structure holding GPU clock frequency data reported from + * arbiter, only index 0 is valid. + */ +static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + if (index == 0) + return &kbdev->arb.arb_freq; + return NULL; +} + +/** + * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns the GPU clock frequency value saved when gpu is granted from arbiter + */ +static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ + uint32_t freq; + struct kbase_arbiter_freq *arb_dev_freq = + (struct kbase_arbiter_freq *) gpu_clk_handle; + + mutex_lock(&arb_dev_freq->arb_freq_lock); + /* Convert from KHz to Hz */ + freq = arb_dev_freq->arb_freq * KHZ_TO_HZ; + mutex_unlock(&arb_dev_freq->arb_freq_lock); + return freq; +} + +/** + * arb_gpu_clk_notifier_register() - Register a clock rate change notifier. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function pointer + * + * Returns 0 on success, negative error code otherwise. + * + * This function registers a callback function that is invoked whenever the + * frequency of the clock corresponding to @gpu_clk_handle changes. + */ +static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + int ret = 0; + struct kbase_arbiter_freq *arb_dev_freq = + (struct kbase_arbiter_freq *)gpu_clk_handle; + + if (!arb_dev_freq->nb) + arb_dev_freq->nb = nb; + else + ret = -EBUSY; + + return ret; +} + +/** + * gpu_clk_notifier_unregister() - Unregister clock rate change notifier + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function pointer + * + * This function pointer is used to unregister a callback function that + * was previously registered to get notified of a frequency change of the + * clock corresponding to @gpu_clk_handle. + */ +static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + struct kbase_arbiter_freq *arb_dev_freq = + (struct kbase_arbiter_freq *)gpu_clk_handle; + if (arb_dev_freq->nb == nb) { + arb_dev_freq->nb = NULL; + } else { + dev_err(kbdev->dev, "%s - notifier did not match\n", + __func__); + } } + +struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = { + .get_gpu_clk_rate = get_arb_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_arb_gpu_clk, + .gpu_clk_notifier_register = arb_gpu_clk_notifier_register, + .gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister +}; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h index 3c49eb1..1f570bb 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/arbiter/mali_kbase_arbiter_pm.h @@ -1,28 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ - -/* - * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -116,10 +93,18 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev); * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts * @kbdev: The kbase device structure for the device (must be a valid pointer) * - * Releases interrupts if needed (GPU is available) otherwise does nothing + * Releases interrupts and set the interrupt flag to false */ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev); +/** + * kbase_arbiter_pm_install_interrupts() - Install the GPU interrupts + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Install interrupts and set the interrupt_install flag to true. + */ +int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev); + /** * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -156,4 +141,54 @@ int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev, */ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev); +/** + * kbase_arbiter_set_max_config() - Set the max config data in kbase device. + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @max_l2_slices: The maximum number of L2 slices. + * @max_core_mask: The largest core mask. + * + * This function handles a stop event for the VM. + * It will update the VM state and forward the stop event to the driver. + */ +void kbase_arbiter_set_max_config(struct kbase_device *kbdev, + uint32_t max_l2_slices, + uint32_t max_core_mask); + +/** + * kbase_arbiter_pm_gpu_assigned() - Determine if this VM has access to the GPU + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the VM does not have access, 1 if it does, and a negative number + * if an error occurred + */ +int kbase_arbiter_pm_gpu_assigned(struct kbase_device *kbdev); + +extern struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops; + +/** + * struct kbase_arbiter_freq - Holding the GPU clock frequency data retrieved + * from arbiter + * @arb_freq: GPU clock frequency value + * @arb_freq_lock: Mutex protecting access to arbfreq value + * @nb: Notifier block to receive rate change callbacks + * @freq_updated: Flag to indicate whether a frequency changed has just been + * communicated to avoid "GPU_GRANTED when not expected" warning + */ +struct kbase_arbiter_freq { + uint32_t arb_freq; + struct mutex arb_freq_lock; + struct notifier_block *nb; + bool freq_updated; +}; + +/** + * kbase_arbiter_pm_update_gpu_freq() - Update GPU frequency + * @arb_freq: Pointer to GPU clock frequency data + * @freq: The new frequency + * + * Updates the GPU frequency and triggers any notifications + */ +void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq, + uint32_t freq); + #endif /*_MALI_KBASE_ARBITER_PM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild index 2449e80..5dbcff3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,51 +16,34 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # -BACKEND += \ - backend/gpu/mali_kbase_cache_policy_backend.c \ - backend/gpu/mali_kbase_device_hw.c \ - backend/gpu/mali_kbase_gpuprops_backend.c \ - backend/gpu/mali_kbase_irq_linux.c \ - backend/gpu/mali_kbase_instr_backend.c \ - backend/gpu/mali_kbase_js_backend.c \ - backend/gpu/mali_kbase_pm_backend.c \ - backend/gpu/mali_kbase_pm_driver.c \ - backend/gpu/mali_kbase_pm_metrics.c \ - backend/gpu/mali_kbase_pm_ca.c \ - backend/gpu/mali_kbase_pm_always_on.c \ - backend/gpu/mali_kbase_pm_coarse_demand.c \ - backend/gpu/mali_kbase_pm_policy.c \ - backend/gpu/mali_kbase_time.c \ - backend/gpu/mali_kbase_l2_mmu_config.c +mali_kbase-y += \ + backend/gpu/mali_kbase_cache_policy_backend.o \ + backend/gpu/mali_kbase_gpuprops_backend.o \ + backend/gpu/mali_kbase_irq_linux.o \ + backend/gpu/mali_kbase_js_backend.o \ + backend/gpu/mali_kbase_pm_backend.o \ + backend/gpu/mali_kbase_pm_driver.o \ + backend/gpu/mali_kbase_pm_metrics.o \ + backend/gpu/mali_kbase_pm_ca.o \ + backend/gpu/mali_kbase_pm_always_on.o \ + backend/gpu/mali_kbase_pm_coarse_demand.o \ + backend/gpu/mali_kbase_pm_policy.o \ + backend/gpu/mali_kbase_time.o \ + backend/gpu/mali_kbase_l2_mmu_config.o \ + backend/gpu/mali_kbase_clk_rate_trace_mgr.o -ifeq ($(MALI_USE_CSF),1) -# empty -else - BACKEND += \ - backend/gpu/mali_kbase_jm_as.c \ - backend/gpu/mali_kbase_debug_job_fault_backend.c \ - backend/gpu/mali_kbase_jm_hw.c \ - backend/gpu/mali_kbase_jm_rb.c +ifeq ($(MALI_USE_CSF),0) + mali_kbase-y += \ + backend/gpu/mali_kbase_instr_backend.o \ + backend/gpu/mali_kbase_jm_as.o \ + backend/gpu/mali_kbase_debug_job_fault_backend.o \ + backend/gpu/mali_kbase_jm_hw.o \ + backend/gpu/mali_kbase_jm_rb.o endif -ifeq ($(MALI_CUSTOMER_RELEASE),0) -BACKEND += \ - backend/gpu/mali_kbase_pm_always_on_demand.c -endif -ifeq ($(CONFIG_MALI_DEVFREQ),y) -BACKEND += \ - backend/gpu/mali_kbase_devfreq.c -endif +mali_kbase-$(CONFIG_MALI_DEVFREQ) += \ + backend/gpu/mali_kbase_devfreq.o -ifeq ($(CONFIG_MALI_NO_MALI),y) - # Dummy model - BACKEND += backend/gpu/mali_kbase_model_dummy.c - BACKEND += backend/gpu/mali_kbase_model_linux.c - # HW error simulation - BACKEND += backend/gpu/mali_kbase_model_error_generator.c -endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h index 4a61f96..6924fdb 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index 7378bfd..e542ccf 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2016,2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "backend/gpu/mali_kbase_cache_policy_backend.h" -#include +#include void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, u32 mode) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h index f78ada7..278125a 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,16 +17,13 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - #ifndef _KBASE_CACHE_POLICY_BACKEND_H_ #define _KBASE_CACHE_POLICY_BACKEND_H_ #include "mali_kbase.h" -#include "mali_base_kernel.h" +#include /** * kbase_cache_set_coherency_mode() - Sets the system coherency mode diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c new file mode 100644 index 0000000..6ad0f58 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Implementation of the GPU clock rate trace manager. + */ + +#include +#include +#include +#include +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY +#include +#else +#include "mali_power_gpu_frequency_trace.h" +#endif + +#ifndef CLK_RATE_TRACE_OPS +#define CLK_RATE_TRACE_OPS (NULL) +#endif + +/** + * get_clk_rate_trace_callbacks() - Returns pointer to clk trace ops. + * @kbdev: Pointer to kbase device, used to check if arbitration is enabled + * when compiled with arbiter support. + * Return: Pointer to clk trace ops if supported or NULL. + */ +static struct kbase_clk_rate_trace_op_conf * +get_clk_rate_trace_callbacks(struct kbase_device *kbdev __maybe_unused) +{ + /* base case */ + struct kbase_clk_rate_trace_op_conf *callbacks = + (struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS; +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + const void *arbiter_if_node; + + if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) + return callbacks; + + arbiter_if_node = + of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ + if (arbiter_if_node) + callbacks = &arb_clk_rate_trace_ops; + else + dev_dbg(kbdev->dev, + "Arbitration supported but disabled by platform. Leaving clk rate callbacks as default.\n"); + +#endif + + return callbacks; +} + +static int gpu_clk_rate_change_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct kbase_gpu_clk_notifier_data *ndata = data; + struct kbase_clk_data *clk_data = + container_of(nb, struct kbase_clk_data, clk_rate_change_nb); + struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm; + unsigned long flags; + + if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle)) + return NOTIFY_BAD; + + spin_lock_irqsave(&clk_rtm->lock, flags); + if (event == POST_RATE_CHANGE) { + if (!clk_rtm->gpu_idle && + (clk_data->clock_val != ndata->new_rate)) { + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, ndata->new_rate); + } + + clk_data->clock_val = ndata->new_rate; + } + spin_unlock_irqrestore(&clk_rtm->lock, flags); + + return NOTIFY_DONE; +} + +static int gpu_clk_data_init(struct kbase_device *kbdev, + void *gpu_clk_handle, unsigned int index) +{ + struct kbase_clk_rate_trace_op_conf *callbacks; + struct kbase_clk_data *clk_data; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + int ret = 0; + + callbacks = get_clk_rate_trace_callbacks(kbdev); + + if (WARN_ON(!callbacks) || + WARN_ON(!gpu_clk_handle) || + WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS)) + return -EINVAL; + + clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL); + if (!clk_data) { + dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index); + return -ENOMEM; + } + + clk_data->index = (u8)index; + clk_data->gpu_clk_handle = gpu_clk_handle; + /* Store the initial value of clock */ + clk_data->clock_val = + callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle); + + { + /* At the initialization time, GPU is powered off. */ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, 0); + spin_unlock_irqrestore(&clk_rtm->lock, flags); + } + + clk_data->clk_rtm = clk_rtm; + clk_rtm->clks[index] = clk_data; + + clk_data->clk_rate_change_nb.notifier_call = + gpu_clk_rate_change_notifier; + + if (callbacks->gpu_clk_notifier_register) + ret = callbacks->gpu_clk_notifier_register(kbdev, + gpu_clk_handle, &clk_data->clk_rate_change_nb); + if (ret) { + dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index); + kfree(clk_data); + } + + return ret; +} + +int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_op_conf *callbacks; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + int ret = 0; + + callbacks = get_clk_rate_trace_callbacks(kbdev); + + spin_lock_init(&clk_rtm->lock); + INIT_LIST_HEAD(&clk_rtm->listeners); + + /* Return early if no callbacks provided for clock rate tracing */ + if (!callbacks) { + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); + return 0; + } + + clk_rtm->gpu_idle = true; + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + void *gpu_clk_handle = + callbacks->enumerate_gpu_clk(kbdev, i); + + if (!gpu_clk_handle) + break; + + ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i); + if (ret) + goto error; + } + + /* Activate clock rate trace manager if at least one GPU clock was + * enumerated. + */ + if (i) { + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks); + } else { + dev_info(kbdev->dev, "No clock(s) available for rate tracing"); + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); + } + + return 0; + +error: + while (i--) { + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister( + kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); + kfree(clk_rtm->clks[i]); + } + + return ret; +} + +void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + + WARN_ON(!list_empty(&clk_rtm->listeners)); + + if (!clk_rtm->clk_rate_trace_ops) + return; + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (!clk_rtm->clks[i]) + break; + + if (clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister) + clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister + (kbdev, clk_rtm->clks[i]->gpu_clk_handle, + &clk_rtm->clks[i]->clk_rate_change_nb); + kfree(clk_rtm->clks[i]); + } + + WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL); +} + +void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + unsigned long flags; + + if (!clk_rtm->clk_rate_trace_ops) + return; + + spin_lock_irqsave(&clk_rtm->lock, flags); + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + + if (!clk_data) + break; + + if (unlikely(!clk_data->clock_val)) + continue; + + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, clk_data->clock_val); + } + + clk_rtm->gpu_idle = false; + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) +{ + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + unsigned int i; + unsigned long flags; + + if (!clk_rtm->clk_rate_trace_ops) + return; + + spin_lock_irqsave(&clk_rtm->lock, flags); + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + struct kbase_clk_data *clk_data = clk_rtm->clks[i]; + + if (!clk_data) + break; + + if (unlikely(!clk_data->clock_val)) + continue; + + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, clk_data->index, 0); + } + + clk_rtm->gpu_idle = true; + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +void kbase_clk_rate_trace_manager_notify_all( + struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clk_index, + unsigned long new_rate) +{ + struct kbase_clk_rate_listener *pos; + struct kbase_device *kbdev; + + lockdep_assert_held(&clk_rtm->lock); + + kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm); + + dev_dbg(kbdev->dev, "%s - GPU clock %u rate changed to %lu, pid: %d", + __func__, clk_index, new_rate, current->pid); + + /* Raise standard `power/gpu_frequency` ftrace event */ + { + unsigned long new_rate_khz = new_rate; + +#if BITS_PER_LONG == 64 + do_div(new_rate_khz, 1000); +#elif BITS_PER_LONG == 32 + new_rate_khz /= 1000; +#else +#error "unsigned long division is not supported for this architecture" +#endif + + trace_gpu_frequency(new_rate_khz, clk_index); + } + + /* Notify the listeners. */ + list_for_each_entry(pos, &clk_rtm->listeners, node) { + pos->notify(pos, clk_index, new_rate); + } +} +KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h new file mode 100644 index 0000000..f7ec9d1 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_clk_rate_trace_mgr.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CLK_RATE_TRACE_MGR_ +#define _KBASE_CLK_RATE_TRACE_MGR_ + +/* The index of top clock domain in kbase_clk_rate_trace_manager:clks. */ +#define KBASE_CLOCK_DOMAIN_TOP (0) + +/* The index of shader-cores clock domain in + * kbase_clk_rate_trace_manager:clks. + */ +#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1) + +/** + * struct kbase_clk_data - Data stored per enumerated GPU clock. + * + * @clk_rtm: Pointer to clock rate trace manager object. + * @gpu_clk_handle: Handle unique to the enumerated GPU clock. + * @plat_private: Private data for the platform to store into + * @clk_rate_change_nb: notifier block containing the pointer to callback + * function that is invoked whenever the rate of + * enumerated GPU clock changes. + * @clock_val: Current rate of the enumerated GPU clock. + * @index: Index at which the GPU clock was enumerated. + */ +struct kbase_clk_data { + struct kbase_clk_rate_trace_manager *clk_rtm; + void *gpu_clk_handle; + void *plat_private; + struct notifier_block clk_rate_change_nb; + unsigned long clock_val; + u8 index; +}; + +/** + * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager. + * + * @kbdev: Device pointer + * + * Return: 0 if success, or an error code on failure. + */ +int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager. + * + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace + * manager of GPU becoming active. + * + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace + * manager of GPU becoming idle. + * @kbdev: Device pointer + */ +void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev); + +/** + * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + * + * kbase_clk_rate_trace_manager:lock must be held by the caller. + */ +static inline void kbase_clk_rate_trace_manager_subscribe_no_lock( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + lockdep_assert_held(&clk_rtm->lock); + list_add(&listener->node, &clk_rtm->listeners); +} + +/** + * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + */ +static inline void kbase_clk_rate_trace_manager_subscribe( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + kbase_clk_rate_trace_manager_subscribe_no_lock( + clk_rtm, listener); + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +/** + * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener. + * + * @clk_rtm: Clock rate manager instance. + * @listener: Listener handle + */ +static inline void kbase_clk_rate_trace_manager_unsubscribe( + struct kbase_clk_rate_trace_manager *clk_rtm, + struct kbase_clk_rate_listener *listener) +{ + unsigned long flags; + + spin_lock_irqsave(&clk_rtm->lock, flags); + list_del(&listener->node); + spin_unlock_irqrestore(&clk_rtm->lock, flags); +} + +/** + * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \ + * rate listeners. + * + * @clk_rtm: Clock rate manager instance. + * @clock_index: Clock index. + * @new_rate: New clock frequency(Hz) + * + * kbase_clk_rate_trace_manager:lock must be locked. + * This function is exported to be used by clock rate trace test + * portal. + */ +void kbase_clk_rate_trace_manager_notify_all( + struct kbase_clk_rate_trace_manager *clk_rtm, + u32 clock_index, + unsigned long new_rate); + +#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */ + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c index b05844e..e121b41 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2015,2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,13 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include -#include +#include #include "mali_kbase_debug_job_fault.h" -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) /*GPU_CONTROL_REG(r)*/ static int gpu_control_reg_snapshot[] = { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index 2806f05..8c31499 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -27,40 +26,53 @@ #include #include #include -#ifdef CONFIG_DEVFREQ_THERMAL +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) #include #endif #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) #include -#else /* Linux >= 3.13 */ -/* In 3.13 the OPP include header file, types, and functions were all - * renamed. Use the old filename for the include, and define the new names to - * the old, when an old kernel is detected. - */ -#include -#define dev_pm_opp opp -#define dev_pm_opp_get_voltage opp_get_voltage -#define dev_pm_opp_get_opp_count opp_get_opp_count -#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil -#define dev_pm_opp_find_freq_floor opp_find_freq_floor -#endif /* Linux >= 3.13 */ /** - * opp_translate - Translate nominal OPP frequency from devicetree into real - * frequency and core mask - * @kbdev: Device pointer - * @freq: Nominal frequency - * @core_mask: Pointer to u64 to store core mask to - * @freqs: Pointer to array of frequencies - * @volts: Pointer to array of voltages + * get_voltage() - Get the voltage value corresponding to the nominal frequency + * used by devfreq. + * @kbdev: Device pointer + * @freq: Nominal frequency in Hz passed by devfreq. + * + * This function will be called only when the opp table which is compatible with + * "operating-points-v2-mali", is not present in the devicetree for GPU device. * - * This function will only perform translation if an operating-points-v2-mali - * table is present in devicetree. If one is not present then it will return an - * untranslated frequency and all cores enabled. + * Return: Voltage value in milli volts, 0 in case of error. */ -static void opp_translate(struct kbase_device *kbdev, unsigned long freq, +static unsigned long get_voltage(struct kbase_device *kbdev, unsigned long freq) +{ + struct dev_pm_opp *opp; + unsigned long voltage = 0; + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_lock(); +#endif + + opp = dev_pm_opp_find_freq_exact(kbdev->dev, freq, true); + + if (IS_ERR_OR_NULL(opp)) + dev_err(kbdev->dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + else { + voltage = dev_pm_opp_get_voltage(opp); +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE + dev_pm_opp_put(opp); +#endif + } + +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE + rcu_read_unlock(); +#endif + + /* Return the voltage in milli volts */ + return voltage / 1000; +} + +void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, u64 *core_mask, unsigned long *freqs, unsigned long *volts) { unsigned int i; @@ -82,12 +94,17 @@ static void opp_translate(struct kbase_device *kbdev, unsigned long freq, } /* If failed to find OPP, return all cores enabled - * and nominal frequency + * and nominal frequency and the corresponding voltage. */ if (i == kbdev->num_opps) { + unsigned long voltage = get_voltage(kbdev, freq); + *core_mask = kbdev->gpu_props.props.raw_props.shader_present; - for (i = 0; i < kbdev->nr_clocks; i++) + + for (i = 0; i < kbdev->nr_clocks; i++) { freqs[i] = freq; + volts[i] = voltage; + } } } @@ -104,18 +121,18 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) nominal_freq = *target_freq; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_lock(); #endif opp = devfreq_recommended_opp(dev, &nominal_freq, flags); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_unlock(); #endif if (IS_ERR_OR_NULL(opp)) { dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); return PTR_ERR(opp); } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE dev_pm_opp_put(opp); #endif @@ -127,9 +144,10 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) return 0; } - opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts); + kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, + freqs, volts); -#ifdef CONFIG_REGULATOR +#if IS_ENABLED(CONFIG_REGULATOR) /* Regulators and clocks work in pairs: every clock has a regulator, * and we never expect to have more regulators than clocks. * @@ -177,7 +195,7 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) } } -#ifdef CONFIG_REGULATOR +#if IS_ENABLED(CONFIG_REGULATOR) for (i = 0; i < kbdev->nr_clocks; i++) { if (kbdev->regulators[i] && kbdev->current_voltages[i] != volts[i] && @@ -238,6 +256,10 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) stat->current_frequency = kbdev->current_nominal_freq; stat->private_data = NULL; +#if MALI_USE_CSF && defined CONFIG_DEVFREQ_THERMAL + kbase_ipa_reset_data(kbdev); +#endif + return 0; } @@ -249,11 +271,11 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, unsigned long freq; struct dev_pm_opp *opp; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_lock(); #endif count = dev_pm_opp_get_opp_count(kbdev->dev); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_unlock(); #endif if (count < 0) @@ -264,20 +286,20 @@ static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, if (!dp->freq_table) return -ENOMEM; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_lock(); #endif for (i = 0, freq = ULONG_MAX; i < count; i++, freq--) { opp = dev_pm_opp_find_freq_floor(kbdev->dev, &freq); if (IS_ERR(opp)) break; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE dev_pm_opp_put(opp); -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) */ +#endif /* KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE */ dp->freq_table[i] = freq; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE rcu_read_unlock(); #endif @@ -309,18 +331,21 @@ static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) struct devfreq_dev_profile *dp = &kbdev->devfreq_profile; kfree(dp->freq_table); + dp->freq_table = NULL; } static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev) { kfree(kbdev->devfreq_table); + kbdev->devfreq_table = NULL; } static void kbase_devfreq_exit(struct device *dev) { struct kbase_device *kbdev = dev_get_drvdata(dev); - kbase_devfreq_term_freq_table(kbdev); + if (kbdev) + kbase_devfreq_term_freq_table(kbdev); } static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, @@ -359,7 +384,7 @@ static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev, static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) { -#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) +#ifndef CONFIG_OF /* OPP table initialization requires at least the capability to get * regulators and clocks from the device tree, as well as parsing * arrays of unsigned integer values. @@ -392,7 +417,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) u64 core_mask, opp_freq, real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS]; int err; -#ifdef CONFIG_REGULATOR +#if IS_ENABLED(CONFIG_REGULATOR) u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS]; #endif @@ -420,7 +445,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) err); continue; } -#ifdef CONFIG_REGULATOR +#if IS_ENABLED(CONFIG_REGULATOR) err = of_property_read_u32_array(node, "opp-microvolt", opp_volts, kbdev->nr_regulators); if (err < 0) { @@ -474,7 +499,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) kbdev->devfreq_table[i].real_freqs[j] = real_freqs[j]; } -#ifdef CONFIG_REGULATOR +#if IS_ENABLED(CONFIG_REGULATOR) if (kbdev->nr_regulators > 0) { int j; @@ -493,11 +518,9 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev) kbdev->num_opps = i; return 0; -#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ +#endif /* CONFIG_OF */ } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) - static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type) { const char *p; @@ -554,27 +577,26 @@ static void kbase_devfreq_suspend_resume_worker(struct work_struct *work) } } -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */ - void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) unsigned long flags; WARN_ON(work_type == DEVFREQ_WORK_NONE); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->devfreq_queue.req_type = work_type; - queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work); + /* Skip enqueuing a work if workqueue has already been terminated. */ + if (likely(kbdev->devfreq_queue.workq)) { + kbdev->devfreq_queue.req_type = work_type; + queue_work(kbdev->devfreq_queue.workq, + &kbdev->devfreq_queue.work); + } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n", kbase_devfreq_req_type_name(work_type)); -#endif } static int kbase_devfreq_work_init(struct kbase_device *kbdev) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE; kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME; @@ -584,17 +606,23 @@ static int kbase_devfreq_work_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->devfreq_queue.work, kbase_devfreq_suspend_resume_worker); -#endif return 0; } static void kbase_devfreq_work_term(struct kbase_device *kbdev) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) - destroy_workqueue(kbdev->devfreq_queue.workq); -#endif + unsigned long flags; + struct workqueue_struct *workq; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + workq = kbdev->devfreq_queue.workq; + kbdev->devfreq_queue.workq = NULL; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + destroy_workqueue(workq); } + int kbase_devfreq_init(struct kbase_device *kbdev) { struct devfreq_dev_profile *dp; @@ -631,19 +659,11 @@ int kbase_devfreq_init(struct kbase_device *kbdev) /* Record the maximum frequency possible */ kbdev->gpu_props.props.core_props.gpu_freq_khz_max = dp->freq_table[0] / 1000; - }; - - err = kbase_devfreq_init_core_mask_table(kbdev); - if (err) { - kbase_devfreq_term_freq_table(kbdev); - return err; } - /* Initialise devfreq suspend/resume workqueue */ - err = kbase_devfreq_work_init(kbdev); + err = kbase_devfreq_init_core_mask_table(kbdev); if (err) { kbase_devfreq_term_freq_table(kbdev); - dev_err(kbdev->dev, "Devfreq initialization failed"); return err; } @@ -651,13 +671,27 @@ int kbase_devfreq_init(struct kbase_device *kbdev) "simple_ondemand", NULL); if (IS_ERR(kbdev->devfreq)) { err = PTR_ERR(kbdev->devfreq); - kbase_devfreq_work_term(kbdev); + kbdev->devfreq = NULL; + kbase_devfreq_term_core_mask_table(kbdev); kbase_devfreq_term_freq_table(kbdev); + dev_err(kbdev->dev, "Fail to add devfreq device(%d)\n", err); + return err; + } + + /* Initialize devfreq suspend/resume workqueue */ + err = kbase_devfreq_work_init(kbdev); + if (err) { + if (devfreq_remove_device(kbdev->devfreq)) + dev_err(kbdev->dev, "Fail to rm devfreq\n"); + kbdev->devfreq = NULL; + kbase_devfreq_term_core_mask_table(kbdev); + dev_err(kbdev->dev, "Fail to init devfreq workqueue\n"); return err; } /* devfreq_add_device only copies a few of kbdev->dev's fields, so - * set drvdata explicitly so IPA models can access kbdev. */ + * set drvdata explicitly so IPA models can access kbdev. + */ dev_set_drvdata(&kbdev->devfreq->dev, kbdev); err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); @@ -667,11 +701,11 @@ int kbase_devfreq_init(struct kbase_device *kbdev) goto opp_notifier_failed; } -#ifdef CONFIG_DEVFREQ_THERMAL +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) err = kbase_ipa_init(kbdev); if (err) { dev_err(kbdev->dev, "IPA initialization failed\n"); - goto cooling_failed; + goto ipa_init_failed; } kbdev->devfreq_cooling = of_devfreq_cooling_register_power( @@ -683,23 +717,28 @@ int kbase_devfreq_init(struct kbase_device *kbdev) dev_err(kbdev->dev, "Failed to register cooling device (%d)\n", err); - goto cooling_failed; + goto cooling_reg_failed; } #endif return 0; -#ifdef CONFIG_DEVFREQ_THERMAL -cooling_failed: +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) +cooling_reg_failed: + kbase_ipa_term(kbdev); +ipa_init_failed: devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); #endif /* CONFIG_DEVFREQ_THERMAL */ + opp_notifier_failed: + kbase_devfreq_work_term(kbdev); + if (devfreq_remove_device(kbdev->devfreq)) dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); - else - kbdev->devfreq = NULL; - kbase_devfreq_work_term(kbdev); + kbdev->devfreq = NULL; + + kbase_devfreq_term_core_mask_table(kbdev); return err; } @@ -710,7 +749,7 @@ void kbase_devfreq_term(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Term Mali devfreq\n"); -#ifdef CONFIG_DEVFREQ_THERMAL +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) if (kbdev->devfreq_cooling) devfreq_cooling_unregister(kbdev->devfreq_cooling); @@ -719,6 +758,8 @@ void kbase_devfreq_term(struct kbase_device *kbdev) devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + kbase_devfreq_work_term(kbdev); + err = devfreq_remove_device(kbdev->devfreq); if (err) dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); @@ -726,6 +767,4 @@ void kbase_devfreq_term(struct kbase_device *kbdev) kbdev->devfreq = NULL; kbase_devfreq_term_core_mask_table(kbdev); - - kbase_devfreq_work_term(kbdev); } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h index 8c976b2..901827e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _BASE_DEVFREQ_H_ @@ -43,4 +42,20 @@ void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq); void kbase_devfreq_enqueue_work(struct kbase_device *kbdev, enum kbase_devfreq_work_type work_type); +/** + * kbase_devfreq_opp_translate - Translate nominal OPP frequency from devicetree + * into real frequency & voltage pair, along with + * core mask + * @kbdev: Device pointer + * @freq: Nominal frequency + * @core_mask: Pointer to u64 to store core mask to + * @freqs: Pointer to array of frequencies + * @volts: Pointer to array of voltages + * + * This function will only perform translation if an operating-points-v2-mali + * table is present in devicetree. If one is not present then it will return an + * untranslated frequency (and corresponding voltage) and all cores enabled. + */ +void kbase_devfreq_opp_translate(struct kbase_device *kbdev, unsigned long freq, + u64 *core_mask, unsigned long *freqs, unsigned long *volts); #endif /* _BASE_DEVFREQ_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h old mode 100644 new mode 100755 diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index 352afa1..11088db 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -26,7 +24,7 @@ */ #include -#include +#include #include #include @@ -41,8 +39,19 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, registers.l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); + registers.core_features = 0; +#if !MALI_USE_CSF + /* TGOx */ registers.core_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); +#else /* !MALI_USE_CSF */ + if (((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TGRX) || + ((registers.gpu_id & GPU_ID2_PRODUCT_MODEL) == + GPU_ID2_PRODUCT_TVAX)) + registers.core_features = + kbase_reg_read(kbdev, GPU_CONTROL_REG(CORE_FEATURES)); +#endif /* MALI_USE_CSF */ registers.tiler_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_FEATURES)); registers.mem_features = kbase_reg_read(kbdev, @@ -51,12 +60,20 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, GPU_CONTROL_REG(MMU_FEATURES)); registers.as_present = kbase_reg_read(kbdev, GPU_CONTROL_REG(AS_PRESENT)); +#if !MALI_USE_CSF registers.js_present = kbase_reg_read(kbdev, GPU_CONTROL_REG(JS_PRESENT)); +#else /* !MALI_USE_CSF */ + registers.js_present = 0; +#endif /* !MALI_USE_CSF */ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) +#if !MALI_USE_CSF registers.js_features[i] = kbase_reg_read(kbdev, GPU_CONTROL_REG(JS_FEATURES_REG(i))); +#else /* !MALI_USE_CSF */ + registers.js_features[i] = 0; +#endif /* !MALI_USE_CSF */ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) registers.texture_features[i] = kbase_reg_read(kbdev, @@ -93,13 +110,49 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, registers.stack_present_hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(STACK_PRESENT_HI)); - if (!kbase_is_gpu_lost(kbdev)) { + if (registers.gpu_id >= GPU_ID2_PRODUCT_MAKE(11, 8, 5, 2)) { + registers.gpu_features_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FEATURES_LO)); + registers.gpu_features_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FEATURES_HI)); + } else { + registers.gpu_features_lo = 0; + registers.gpu_features_hi = 0; + } + + if (!kbase_is_gpu_removed(kbdev)) { *regdump = registers; return 0; } else return -EIO; } +int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, + struct kbase_current_config_regdump *curr_config_regdump) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config_regdump)) + return -EINVAL; + + curr_config_regdump->mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES)); + + curr_config_regdump->shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO)); + curr_config_regdump->shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI)); + + curr_config_regdump->l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO)); + curr_config_regdump->l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI)); + + if (WARN_ON(kbase_is_gpu_removed(kbdev))) + return -EIO; + + return 0; + +} + int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { @@ -112,7 +165,7 @@ int kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, coherency_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(COHERENCY_FEATURES)); - if (kbase_is_gpu_lost(kbdev)) + if (kbase_is_gpu_removed(kbdev)) return -EIO; regdump->coherency_features = coherency_features; @@ -135,11 +188,15 @@ int kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev, if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) { u32 l2_features = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_FEATURES)); + u32 l2_config = + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); + - if (kbase_is_gpu_lost(kbdev)) + if (kbase_is_gpu_removed(kbdev)) return -EIO; regdump->l2_features = l2_features; + regdump->l2_config = l2_config; } return 0; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 8b320c7..d7edf30 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * GPU backend instrumentation APIs. */ @@ -29,7 +26,7 @@ #include #include #include -#include +#include #include @@ -71,12 +68,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, /* Configure */ prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS - if (kbdev->hwcnt.backend.use_secondary_override) +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + prfcnt_config |= kbdev->hwcnt.backend.override_counter_set + << PRFCNT_CONFIG_SETSELECT_SHIFT; #else - if (enable->use_secondary) + prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; #endif - prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), prfcnt_config | PRFCNT_CONFIG_MODE_OFF); @@ -87,7 +84,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, enable->dump_buffer >> 32); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), - enable->jm_bm); + enable->fe_bm); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), enable->shader_bm); @@ -110,7 +107,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, err = 0; - dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %pK", kctx); return err; out_err: return err; @@ -170,11 +167,10 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %pK", kctx); err = 0; - out: return err; } @@ -194,7 +190,8 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { /* HW counters are disabled or another dump is ongoing, or we're - * resetting */ + * resetting + */ goto unlock; } @@ -204,7 +201,6 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; - /* Reconfigure the dump address */ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kbdev->hwcnt.addr & 0xFFFFFFFF); @@ -218,14 +214,13 @@ int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_PRFCNT_SAMPLE); - dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + dev_dbg(kbdev->dev, "HW counters dumping done for context %pK", kctx); err = 0; unlock: spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - return err; } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); @@ -254,40 +249,6 @@ bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, } KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); -void kbasep_cache_clean_worker(struct work_struct *data) -{ - struct kbase_device *kbdev; - unsigned long flags, pm_flags; - - kbdev = container_of(data, struct kbase_device, - hwcnt.backend.cache_clean_work); - - spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags); - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - - /* Clean and invalidate the caches so we're sure the mmu tables for the - * dump buffer is valid. - */ - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_REQUEST_CLEAN); - kbase_gpu_start_cache_clean_nolock(kbdev); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags); - - kbase_gpu_wait_cache_clean(kbdev); - - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == - KBASE_INSTR_STATE_REQUEST_CLEAN); - /* All finished and idle */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); -} - - void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) { unsigned long flags; @@ -298,20 +259,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { - if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) { - /* All finished and idle */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - } else { - int ret; - /* Always clean and invalidate the cache after a successful dump - */ - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; - ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, - &kbdev->hwcnt.backend.cache_clean_work); - KBASE_DEBUG_ASSERT(ret); - } + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); } spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -353,7 +304,8 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Check it's the context previously set up and we're not already - * dumping */ + * dumping + */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) goto out; @@ -373,39 +325,45 @@ KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); int kbase_instr_backend_init(struct kbase_device *kbdev) { - int ret = 0; + spin_lock_init(&kbdev->hwcnt.lock); kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; init_waitqueue_head(&kbdev->hwcnt.backend.wait); - INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, - kbasep_cache_clean_worker); - kbdev->hwcnt.backend.triggered = 0; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS - kbdev->hwcnt.backend.use_secondary_override = false; +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS +/* Use the build time option for the override default. */ +#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) + kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY; +#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) + kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY; +#else + /* Default to primary */ + kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY; #endif - - kbdev->hwcnt.backend.cache_clean_wq = - alloc_workqueue("Mali cache cleaning workqueue", 0, 1); - if (NULL == kbdev->hwcnt.backend.cache_clean_wq) - ret = -EINVAL; - - return ret; +#endif + return 0; } void kbase_instr_backend_term(struct kbase_device *kbdev) { - destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); + CSTD_UNUSED(kbdev); } -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev) { - debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR, - kbdev->mali_debugfs_directory, - &kbdev->hwcnt.backend.use_secondary_override); + /* No validation is done on the debugfs input. Invalid input could cause + * performance counter errors. This is acceptable since this is a debug + * only feature and users should know what they are doing. + * + * Valid inputs are the values accepted bythe SET_SELECT bits of the + * PRFCNT_CONFIG register as defined in the architecture specification. + */ + debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR, + kbdev->mali_debugfs_directory, + (u8 *)&kbdev->hwcnt.backend.override_counter_set); } #endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h index 9930968..e356348 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2016, 2018, 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -27,6 +26,8 @@ #ifndef _KBASE_INSTR_DEFS_H_ #define _KBASE_INSTR_DEFS_H_ +#include + /* * Instrumentation State Machine States */ @@ -37,8 +38,6 @@ enum kbase_instr_state { KBASE_INSTR_STATE_IDLE, /* Hardware is currently dumping a frame. */ KBASE_INSTR_STATE_DUMPING, - /* We've requested a clean to occur on a workqueue */ - KBASE_INSTR_STATE_REQUEST_CLEAN, /* An error has occured during DUMPING (page fault). */ KBASE_INSTR_STATE_FAULT }; @@ -47,14 +46,11 @@ enum kbase_instr_state { struct kbase_instr_backend { wait_queue_head_t wait; int triggered; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS - bool use_secondary_override; +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS + enum kbase_hwcnt_physical_set override_counter_set; #endif enum kbase_instr_state state; - struct workqueue_struct *cache_clean_wq; - struct work_struct cache_clean_work; }; #endif /* _KBASE_INSTR_DEFS_H_ */ - diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h index 2254b9f..332cc69 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Backend-specific HW access instrumentation APIs */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h index ca3c048..2671ce5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c index 8696c6a..a29f7ef 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,17 +17,15 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include -#include +#include #include #include -#if !defined(CONFIG_MALI_NO_MALI) +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 @@ -72,7 +71,12 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data) dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); +#if MALI_USE_CSF + /* call the csf interrupt handler */ + kbase_csf_interrupt(kbdev, val); +#else kbase_job_done(kbdev, val); +#endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -210,24 +214,25 @@ int kbase_set_custom_irq_handler(struct kbase_device *kbdev, int result = 0; irq_handler_t requested_irq_handler = NULL; - KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && - (GPU_IRQ_HANDLER >= irq_type)); + KBASE_DEBUG_ASSERT((irq_type >= JOB_IRQ_HANDLER) && + (irq_type <= GPU_IRQ_HANDLER)); /* Release previous handler */ if (kbdev->irqs[irq_type].irq) free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); - requested_irq_handler = (NULL != custom_handler) ? custom_handler : - kbase_handler_table[irq_type]; + requested_irq_handler = (custom_handler != NULL) ? + custom_handler : + kbase_handler_table[irq_type]; - if (0 != request_irq(kbdev->irqs[irq_type].irq, - requested_irq_handler, + if (request_irq(kbdev->irqs[irq_type].irq, requested_irq_handler, kbdev->irqs[irq_type].flags | IRQF_SHARED, - dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { + dev_name(kbdev->dev), + kbase_tag(kbdev, irq_type)) != 0) { result = -EINVAL; dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", kbdev->irqs[irq_type].irq, irq_type); -#ifdef CONFIG_SPARSE_IRQ +#if IS_ENABLED(CONFIG_SPARSE_IRQ) dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); #endif /* CONFIG_SPARSE_IRQ */ } @@ -456,7 +461,7 @@ int kbase_install_interrupts(struct kbase_device *kbdev) if (err) { dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", kbdev->irqs[i].irq, i); -#ifdef CONFIG_SPARSE_IRQ +#if IS_ENABLED(CONFIG_SPARSE_IRQ) dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); #endif /* CONFIG_SPARSE_IRQ */ goto release; @@ -496,4 +501,4 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_synchronize_irqs); -#endif /* !defined(CONFIG_MALI_NO_MALI) */ +#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c index bb4f548..888aa59 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Register backend context / address space management */ @@ -58,8 +56,10 @@ static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, lockdep_assert_held(&js_devdata->runpool_mutex); lockdep_assert_held(&kbdev->hwaccess_lock); +#if !MALI_USE_CSF /* Attribute handling */ kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); +#endif /* Allow it to run jobs */ kbasep_js_set_submit_allowed(js_devdata, kctx); @@ -188,8 +188,8 @@ int kbase_backend_find_and_release_free_address_space( } /* Context was retained while locks were dropped, - * continue looking for free AS */ - + * continue looking for free AS + */ mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 7cda61a..e29ace7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Register-based HW access backend specific definitions */ @@ -78,11 +76,12 @@ struct slot_rb { * The hwaccess_lock (a spinlock) must be held when accessing this structure */ struct kbase_backend_data { +#if !MALI_USE_CSF struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; - struct hrtimer scheduling_timer; bool timer_running; +#endif bool suspend_timer; atomic_t reset_gpu; @@ -92,13 +91,16 @@ struct kbase_backend_data { /* kbase_prepare_to_reset_gpu has been called */ #define KBASE_RESET_GPU_PREPARED 1 /* kbase_reset_gpu has been called - the reset will now definitely happen - * within the timeout period */ + * within the timeout period + */ #define KBASE_RESET_GPU_COMMITTED 2 /* The GPU reset process is currently occuring (timeout has expired or - * kbasep_try_reset_gpu_early was called) */ + * kbasep_try_reset_gpu_early was called) + */ #define KBASE_RESET_GPU_HAPPENING 3 /* Reset the GPU silently, used when resetting the GPU as part of normal - * behavior (e.g. when exiting protected mode). */ + * behavior (e.g. when exiting protected mode). + */ #define KBASE_RESET_GPU_SILENT 4 struct workqueue_struct *reset_workq; struct work_struct reset_work; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index fa6bc83..ae0377f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -33,16 +32,20 @@ #include #include #include +#include #include -#include +#include #include #include +#include static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev); +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, + const u64 affinity, const u64 limited_core_mask); static u64 kbase_job_write_affinity(struct kbase_device *kbdev, base_jd_core_req core_req, - int js) + int js, const u64 limited_core_mask) { u64 affinity; @@ -71,14 +74,21 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, */ if (js == 2 && num_core_groups > 1) affinity &= coherency_info->group[1].core_mask; - else + else if (num_core_groups > 1) affinity &= coherency_info->group[0].core_mask; + else + affinity &= kbdev->gpu_props.curr_config.shader_present; } else { /* Use all cores */ affinity = kbdev->pm.backend.shaders_avail & kbdev->pm.debug_core_mask[js]; } + if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { + /* Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK by applying the limited core mask. */ + affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + } + if (unlikely(!affinity)) { #ifdef CONFIG_MALI_DEBUG u64 shaders_ready = @@ -88,6 +98,16 @@ static u64 kbase_job_write_affinity(struct kbase_device *kbdev, #endif affinity = kbdev->pm.backend.shaders_avail; + + if (core_req & BASE_JD_REQ_LIMITED_CORE_MASK) { + /* Limiting affinity again to make sure it only enables shader cores with backed TLS memory. */ + affinity = kbasep_apply_limited_core_mask(kbdev, affinity, limited_core_mask); + +#ifdef CONFIG_MALI_DEBUG + /* affinity should never be 0 */ + WARN_ON(!affinity); +#endif + } } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), @@ -168,7 +188,7 @@ static u64 select_job_chain(struct kbase_jd_atom *katom) } dev_dbg(kctx->kbdev->dev, - "Selected job chain 0x%llx for end atom %p in state %d\n", + "Selected job chain 0x%llx for end atom %pK in state %d\n", jc, (void *)katom, (int)rp->state); katom->jc = jc; @@ -192,7 +212,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, /* Command register must be available */ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); - dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %p\n", + dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n", jc_head, (void *)katom); kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), @@ -200,10 +220,12 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), jc_head >> 32); - affinity = kbase_job_write_affinity(kbdev, katom->core_req, js); + affinity = kbase_job_write_affinity(kbdev, katom->core_req, js, + kctx->limited_core_mask); /* start MMU, medium priority, cache clean/flush on end, clean/flush on - * start */ + * start + */ cfg = kctx->as_nr; if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION) && @@ -255,7 +277,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom->start_timestamp = ktime_get(); /* GO ! */ - dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx", + dev_dbg(kbdev->dev, "JS: Submitting atom %pK from ctx %pK to js[%d] with head=0x%llx", katom, kctx, js, jc_head); KBASE_KTRACE_ADD_JM_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, @@ -277,7 +299,8 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); -#ifdef CONFIG_GPU_TRACEPOINTS + kbase_kinstr_jm_atom_hw_submit(katom); +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ char js_string[16]; @@ -328,7 +351,8 @@ static void kbasep_job_slot_update_head_start_timestamp( /* Only update the timestamp if it's a better estimate * than what's currently stored. This is because our * estimate that accounts for the throttle time may be - * too much of an overestimate */ + * too much of an overestimate + */ katom->start_timestamp = end_timestamp; } } @@ -371,9 +395,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) /* treat failed slots as finished slots */ u32 finished = (done & 0xFFFF) | failed; - /* Note: This is inherently unfair, as we always check - * for lower numbered interrupts before the higher - * numbered ones.*/ + /* Note: This is inherently unfair, as we always check for lower + * numbered interrupts before the higher numbered ones. + */ i = ffs(finished) - 1; KBASE_DEBUG_ASSERT(i >= 0); @@ -385,7 +409,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if (failed & (1u << i)) { /* read out the job slot status code if the job - * slot reported failure */ + * slot reported failure + */ completion_code = kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS)); @@ -399,7 +424,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can - * be resumed */ + * be resumed + */ job_tail = (u64)kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_TAIL_LO)) | ((u64)kbase_reg_read(kbdev, @@ -408,21 +434,26 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) } else if (completion_code == BASE_JD_EVENT_NOT_STARTED) { /* PRLAM-10673 can cause a TERMINATED - * job to come back as NOT_STARTED, but - * the error interrupt helps us detect - * it */ + * job to come back as NOT_STARTED, + * but the error interrupt helps us + * detect it + */ completion_code = BASE_JD_EVENT_TERMINATED; } kbase_gpu_irq_evict(kbdev, i, completion_code); - /* Some jobs that encounter a BUS FAULT may result in corrupted - * state causing future jobs to hang. Reset GPU before - * allowing any other jobs on the slot to continue. */ + /* Some jobs that encounter a BUS FAULT may + * result in corrupted state causing future + * jobs to hang. Reset GPU before allowing + * any other jobs on the slot to continue. + */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) { if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) { - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked( + kbdev, + RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } @@ -480,7 +511,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) if ((rawstat >> (i + 16)) & 1) { /* There is a failed job that we've - * missed - add it back to active */ + * missed - add it back to active + */ active |= (1u << i); } } @@ -582,7 +614,8 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, } /* We are about to issue a soft stop, so mark the atom as having - * been soft stopped */ + * been soft stopped + */ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPED; /* Mark the point where we issue the soft-stop command */ @@ -692,12 +725,40 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx) kbase_job_slot_hardstop(kctx, i, NULL); } +/** + * kbase_is_existing_atom_submitted_later_than_ready + * @ready: sequence number of the ready atom + * @existing: sequence number of the existing atom + * + * Returns true if the existing atom has been submitted later than the + * ready atom. It is used to understand if an atom that is ready has been + * submitted earlier than the currently running atom, so that the currently + * running atom should be preempted to allow the ready atom to run. + */ +static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing) +{ + /* No seq_nr set? */ + if (!ready || !existing) + return false; + + /* Efficiently handle the unlikely case of wrapping. + * The following code assumes that the delta between the sequence number + * of the two atoms is less than INT64_MAX. + * In the extremely unlikely case where the delta is higher, the comparison + * defaults for no preemption. + * The code also assumes that the conversion from unsigned to signed types + * works because the signed integers are 2's complement. + */ + return (s64)(ready - existing) < 0; +} + void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; int js = target_katom->slot_nr; int priority = target_katom->sched_priority; + int seq_nr = target_katom->seq_nr; int i; bool stop_sent = false; @@ -719,7 +780,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, (katom->kctx != kctx)) continue; - if (katom->sched_priority > priority) { + if ((katom->sched_priority > priority) || + (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) { if (!stop_sent) KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED( kbdev, @@ -749,7 +811,7 @@ static int softstop_start_rp_nolock( if (!(katom->core_req & BASE_JD_REQ_START_RENDERPASS)) { dev_dbg(kctx->kbdev->dev, - "Atom %p on job slot is not start RP\n", (void *)katom); + "Atom %pK on job slot is not start RP\n", (void *)katom); return -EPERM; } @@ -762,13 +824,13 @@ static int softstop_start_rp_nolock( rp->state != KBASE_JD_RP_RETRY)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %p\n", + dev_dbg(kctx->kbdev->dev, "OOM in state %d with region %pK\n", (int)rp->state, (void *)reg); if (WARN_ON(katom != rp->start_katom)) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "Adding region %p to list %p\n", + dev_dbg(kctx->kbdev->dev, "Adding region %pK to list %pK\n", (void *)reg, (void *)&rp->oom_reg_list); list_move_tail(®->link, &rp->oom_reg_list); dev_dbg(kctx->kbdev->dev, "Added region to list\n"); @@ -813,9 +875,9 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) if (timeout != 0) goto exit; - if (kbase_prepare_to_reset_gpu(kbdev)) { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) { dev_err(kbdev->dev, - "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", + "Issuing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", ZAP_TIMEOUT); kbase_reset_gpu(kbdev); } @@ -823,7 +885,7 @@ void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) /* Wait for the reset to complete */ kbase_reset_gpu_wait(kbdev); exit: - dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + dev_dbg(kbdev->dev, "Zap: Finished Context %pK", kctx); /* Ensure that the signallers of the waitqs have finished */ mutex_lock(&kctx->jctx.lock); @@ -884,7 +946,7 @@ KBASE_EXPORT_TEST_API(kbase_job_slot_term); void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, struct kbase_jd_atom *target_katom, u32 sw_flags) { - dev_dbg(kbdev->dev, "Soft-stop atom %p with flags 0x%x (s:%d)\n", + dev_dbg(kbdev->dev, "Soft-stop atom %pK with flags 0x%x (s:%d)\n", target_katom, sw_flags, js); KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); @@ -988,6 +1050,33 @@ void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, } } +int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) +{ + WARN(true, "%s Not implemented for JM GPUs", __func__); + return -EINVAL; +} + +int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) +{ + WARN(true, "%s Not implemented for JM GPUs", __func__); + return -EINVAL; +} + +void kbase_reset_gpu_allow(struct kbase_device *kbdev) +{ + WARN(true, "%s Not implemented for JM GPUs", __func__); +} + +void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) +{ + WARN(true, "%s Not implemented for JM GPUs", __func__); +} + +void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) +{ + WARN(true, "%s Not implemented for JM GPUs", __func__); +} + static void kbase_debug_dump_registers(struct kbase_device *kbdev) { int i; @@ -1054,13 +1143,15 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* Make sure the timer has completed - this cannot be done from * interrupt context, so this cannot be done within - * kbasep_try_reset_gpu_early. */ + * kbasep_try_reset_gpu_early. + */ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { /* This would re-activate the GPU. Since it's already idle, - * there's no need to reset it */ + * there's no need to reset it + */ atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING); kbase_disjoint_state_down(kbdev); @@ -1081,14 +1172,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbdev->irq_reset_flush = true; /* Disable IRQ to avoid IRQ handlers to kick in after releasing the - * spinlock; this also clears any outstanding interrupts */ + * spinlock; this also clears any outstanding interrupts + */ kbase_pm_disable_interrupts_nolock(kbdev); spin_unlock(&kbdev->mmu_mask_change); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* Ensure that any IRQ handlers have finished - * Must be done without any locks IRQ handlers will take */ + * Must be done without any locks IRQ handlers will take + */ kbase_synchronize_irqs(kbdev); /* Flush out any in-flight work items */ @@ -1099,7 +1192,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TMIX_8463)) { /* Ensure that L2 is not transitioning when we send the reset - * command */ + * command + */ while (--max_loops && kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) ; @@ -1114,14 +1208,16 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) /* All slot have been soft-stopped and we've waited * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we * assume that anything that is still left on the GPU is stuck there and - * we'll kill it when we reset the GPU */ + * we'll kill it when we reset the GPU + */ if (!silent) dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); /* Output the state of some interesting registers to help in the - * debugging of GPU resets */ + * debugging of GPU resets + */ if (!silent) kbase_debug_dump_registers(kbdev); @@ -1160,7 +1256,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_pm_update_cores_state(kbdev); /* Synchronously request and wait for those cores, because if - * instrumentation is enabled it would need them immediately. */ + * instrumentation is enabled it would need them immediately. + */ kbase_pm_wait_for_desired_state(kbdev); mutex_unlock(&kbdev->pm.lock); @@ -1237,7 +1334,8 @@ static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) /* Check that the reset has been committed to (i.e. kbase_reset_gpu has * been called), and that no other thread beat this thread to starting - * the reset */ + * the reset + */ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != KBASE_RESET_GPU_COMMITTED) { @@ -1261,6 +1359,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) /** * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU * @kbdev: kbase device + * @flags: Bitfield indicating impact of reset (see flag defines) * * This function just soft-stops all the slots to ensure that as many jobs as * possible are saved. @@ -1271,12 +1370,23 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags) { int i; + CSTD_UNUSED(flags); KBASE_DEBUG_ASSERT(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_gpu_lost(kbdev)) { + /* GPU access has been removed, reset will be done by + * Arbiter instead + */ + return false; + } +#endif + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING, KBASE_RESET_GPU_PREPARED) != @@ -1293,14 +1403,14 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) return true; } -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) { - unsigned long flags; + unsigned long lock_flags; bool ret; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = kbase_prepare_to_reset_gpu_locked(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(&kbdev->hwaccess_lock, lock_flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev, flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, lock_flags); return ret; } @@ -1321,7 +1431,8 @@ void kbase_reset_gpu(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev); /* Note this is an assert/atomic_set because it is a software issue for - * a race to be occuring here */ + * a race to be occurring here + */ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_PREPARED); atomic_set(&kbdev->hwaccess.backend.reset_gpu, @@ -1344,7 +1455,8 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev); /* Note this is an assert/atomic_set because it is a software issue for - * a race to be occuring here */ + * a race to be occurring here + */ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == KBASE_RESET_GPU_PREPARED); atomic_set(&kbdev->hwaccess.backend.reset_gpu, @@ -1419,3 +1531,21 @@ void kbase_reset_gpu_term(struct kbase_device *kbdev) { destroy_workqueue(kbdev->hwaccess.backend.reset_workq); } + +static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev, + const u64 affinity, const u64 limited_core_mask) +{ + const u64 result = affinity & limited_core_mask; + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, + "Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n", + (unsigned long int)affinity, + (unsigned long int)result, + (unsigned long int)limited_core_mask); +#else + CSTD_UNUSED(kbdev); +#endif + + return result; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index 1419b59..1039e85 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Job Manager backend-specific low-level APIs. */ @@ -34,7 +31,7 @@ #include #include -#include +#include /** * kbase_job_submit_nolock() - Submit a job to a certain job-slot @@ -62,7 +59,7 @@ void kbase_job_submit_nolock(struct kbase_device *kbdev, void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, u64 job_tail, ktime_t *end_timestamp); -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) static inline char *kbasep_make_job_slot_string(int js, char *js_string, size_t js_size) { @@ -71,11 +68,13 @@ static inline char *kbasep_make_job_slot_string(int js, char *js_string, } #endif +#if !MALI_USE_CSF static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, struct kbase_context *kctx) { return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)); } +#endif /** @@ -94,6 +93,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, int js); +#if !MALI_USE_CSF /** * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop * on the specified atom @@ -112,6 +112,7 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, u32 action, base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); +#endif /* !MALI_USE_CSF */ /** * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index 4e4ed05..8ee897f 100755 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Register-based HW access backend specific APIs */ @@ -33,16 +31,19 @@ #include #include #include +#include #include -#include +#include #include #include /* Return whether the specified ringbuffer is empty. HW access lock must be - * held */ + * held + */ #define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) /* Return number of atoms currently in the specified ringbuffer. HW access lock - * must be held */ + * must be held + */ #define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) static void kbase_gpu_release_atom(struct kbase_device *kbdev, @@ -253,6 +254,8 @@ static bool kbase_gpu_check_secure_atoms(struct kbase_device *kbdev, int kbase_backend_slot_free(struct kbase_device *kbdev, int js) { + lockdep_assert_held(&kbdev->hwaccess_lock); + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != KBASE_RESET_GPU_NOT_PENDING) { /* The GPU is being reset - so prevent submission */ @@ -278,15 +281,19 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: + kbase_kinstr_jm_atom_hw_release(katom); /* Inform power management at start/finish of atom so it can * update its GPU utilisation metrics. Mark atom as not - * submitted beforehand. */ + * submitted beforehand. + */ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; kbase_pm_metrics_update(kbdev, end_timestamp); + /* Inform platform at start/finish of atom */ + kbasep_platform_event_atom_complete(katom); + if (katom->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter_nolock(kbdev); - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features @@ -296,6 +303,8 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, &kbdev->gpu_props.props.raw_props.js_features [katom->slot_nr]); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ @@ -540,7 +549,8 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev); /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV * should ensure that we are not already transitiong, and that - * there are no atoms currently on the GPU. */ + * there are no atoms currently on the GPU. + */ WARN_ON(kbdev->protected_mode_transition); WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); /* If hwcnt is disabled, it means we didn't clean up correctly @@ -566,19 +576,15 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, /* We couldn't disable atomically, so kick off a worker */ if (!kbdev->protected_mode_hwcnt_disabled) { -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, - &kbdev->protected_mode_hwcnt_disable_work); -#else - queue_work(system_highpri_wq, + kbase_hwcnt_context_queue_work( + kbdev->hwcnt_gpu_ctx, &kbdev->protected_mode_hwcnt_disable_work); -#endif return -EAGAIN; } - /* Once reaching this point GPU must be - * switched to protected mode or hwcnt - * re-enabled. */ + /* Once reaching this point GPU must be switched to protected + * mode or hwcnt re-enabled. + */ if (kbase_pm_protected_entry_override_enable(kbdev)) return -EAGAIN; @@ -618,7 +624,7 @@ static int kbase_jm_enter_protected_mode(struct kbase_device *kbdev, KBASE_PM_CORE_L2) || kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2) || - kbase_is_gpu_lost(kbdev)) { + kbase_is_gpu_removed(kbdev)) { /* * The L2 is still powered, wait for all * the users to finish with it before doing @@ -718,7 +724,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev); /* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV * should ensure that we are not already transitiong, and that - * there are no atoms currently on the GPU. */ + * there are no atoms currently on the GPU. + */ WARN_ON(kbdev->protected_mode_transition); WARN_ON(kbase_gpu_atoms_submitted_any(kbdev)); @@ -764,8 +771,8 @@ static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order */ + * already removed - as atoms must be returned in order + */ if (idx == 0 || katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); @@ -811,7 +818,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->hwaccess_lock); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_reset_gpu_is_active(kbdev) || kbase_is_gpu_lost(kbdev)) + if (kbase_reset_gpu_is_active(kbdev) || + kbase_is_gpu_removed(kbdev)) #else if (kbase_reset_gpu_is_active(kbdev)) #endif @@ -843,7 +851,7 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; + KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV; /* ***TRANSITION TO HIGHER STATE*** */ /* fallthrough */ @@ -907,12 +915,14 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Set EVENT_DONE so this atom will be - completed, not unpulled. */ + * completed, not unpulled. + */ katom[idx]->event_code = BASE_JD_EVENT_DONE; /* Only return if head atom or previous * atom already removed - as atoms must - * be returned in order. */ + * be returned in order. + */ if (idx == 0 || katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); @@ -943,7 +953,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) if (idx == 1) { /* Only submit if head atom or previous - * atom already submitted */ + * atom already submitted + */ if ((katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && katom[0]->gpu_rb_state != @@ -959,7 +970,8 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) } /* If inter-slot serialization in use then don't - * submit atom if any other slots are in use */ + * submit atom if any other slots are in use + */ if ((kbdev->serialize_jobs & KBASE_SERIALIZE_INTER_SLOT) && other_slots_busy(kbdev, js)) @@ -971,31 +983,37 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) break; #endif /* Check if this job needs the cycle counter - * enabled before submission */ + * enabled before submission + */ if (katom[idx]->core_req & BASE_JD_REQ_PERMON) kbase_pm_request_gpu_cycle_counter_l2_is_on( kbdev); kbase_job_hw_submit(kbdev, katom[idx], js); katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_SUBMITTED; + KBASE_ATOM_GPU_RB_SUBMITTED; + + /* ***TRANSITION TO HIGHER STATE*** */ + /* fallthrough */ + case KBASE_ATOM_GPU_RB_SUBMITTED: /* Inform power management at start/finish of * atom so it can update its GPU utilisation - * metrics. */ + * metrics. + */ kbase_pm_metrics_update(kbdev, &katom[idx]->start_timestamp); - /* ***TRANSITION TO HIGHER STATE*** */ - /* fallthrough */ - case KBASE_ATOM_GPU_RB_SUBMITTED: - /* Atom submitted to HW, nothing else to do */ + /* Inform platform at start/finish of atom */ + kbasep_platform_event_atom_submit(katom[idx]); + break; case KBASE_ATOM_GPU_RB_RETURN_TO_JS: /* Only return if head atom or previous atom * already removed - as atoms must be returned - * in order */ + * in order + */ if (idx == 0 || katom[0]->gpu_rb_state == KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { kbase_gpu_dequeue_atom(kbdev, js, NULL); @@ -1013,7 +1031,7 @@ void kbase_backend_run_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Backend running atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "Backend running atom %pK\n", (void *)katom); kbase_gpu_enqueue_atom(kbdev, katom); kbase_backend_slot_update(kbdev); @@ -1074,7 +1092,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, struct kbase_context *kctx = katom->kctx; dev_dbg(kbdev->dev, - "Atom %p completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", + "Atom %pK completed on hw with code 0x%x and job_tail 0x%llx (s:%d)\n", (void *)katom, completion_code, job_tail, js); lockdep_assert_held(&kbdev->hwaccess_lock); @@ -1098,7 +1116,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * BASE_JD_REQ_SKIP_CACHE_END is set, the GPU cache is not * flushed. To prevent future evictions causing possible memory * corruption we need to flush the cache manually before any - * affected memory gets reused. */ + * affected memory gets reused. + */ katom->need_cache_flush_cores_retained = true; } @@ -1181,7 +1200,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { /* Can not dequeue this atom yet - will be - * dequeued when atom at idx0 completes */ + * dequeued when atom at idx0 completes + */ katom_idx1->event_code = BASE_JD_EVENT_STOPPED; kbase_gpu_mark_atom_for_return(kbdev, katom_idx1); @@ -1194,7 +1214,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (job_tail != 0 && job_tail != katom->jc) { /* Some of the job has been executed */ dev_dbg(kbdev->dev, - "Update job chain address of atom %p to resume from 0x%llx\n", + "Update job chain address of atom %pK to resume from 0x%llx\n", (void *)katom, job_tail); katom->jc = job_tail; @@ -1214,7 +1234,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, * - Schedule out the parent context if necessary, and schedule a new * one in. */ -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) { /* The atom in the HEAD */ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, @@ -1255,7 +1275,7 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, if (katom) { dev_dbg(kbdev->dev, - "Cross-slot dependency %p has become runnable.\n", + "Cross-slot dependency %pK has become runnable.\n", (void *)katom); /* Check if there are lower priority jobs to soft stop */ @@ -1268,7 +1288,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, kbase_pm_update_state(kbdev); /* Job completion may have unblocked other atoms. Try to update all job - * slots */ + * slots + */ kbase_backend_slot_update(kbdev); } @@ -1319,7 +1340,8 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK; /* As the atom was not removed, increment the * index so that we read the correct atom in the - * next iteration. */ + * next iteration. + */ atom_idx++; continue; } @@ -1422,7 +1444,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, katom_idx0_valid = (katom_idx0 == katom); /* If idx0 is to be removed and idx1 is on the same context, * then idx1 must also be removed otherwise the atoms might be - * returned out of order */ + * returned out of order + */ if (katom_idx1) katom_idx1_valid = (katom_idx1 == katom) || (katom_idx0_valid && @@ -1469,7 +1492,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)) == 0) { /* idx0 has already completed - stop - * idx1 if needed*/ + * idx1 if needed + */ if (katom_idx1_valid) { kbase_gpu_stop_atom(kbdev, js, katom_idx1, @@ -1478,7 +1502,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, } } else { /* idx1 is in NEXT registers - attempt - * to remove */ + * to remove + */ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), @@ -1493,7 +1518,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, JS_HEAD_NEXT_HI)) != 0) { /* idx1 removed successfully, - * will be handled in IRQ */ + * will be handled in IRQ + */ kbase_gpu_remove_atom(kbdev, katom_idx1, action, true); @@ -1507,7 +1533,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, ret = true; } else if (katom_idx1_valid) { /* idx0 has already completed, - * stop idx1 if needed */ + * stop idx1 if needed + */ kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); @@ -1526,7 +1553,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, * flow was also interrupted, and this function * might not enter disjoint state e.g. if we * don't actually do a hard stop on the head - * atom */ + * atom + */ kbase_gpu_stop_atom(kbdev, js, katom_idx0, action); ret = true; @@ -1554,7 +1582,8 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, ret = true; } else { /* idx1 is in NEXT registers - attempt to - * remove */ + * remove + */ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP); @@ -1564,13 +1593,15 @@ bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0) { /* idx1 removed successfully, will be - * handled in IRQ once idx0 completes */ + * handled in IRQ once idx0 completes + */ kbase_gpu_remove_atom(kbdev, katom_idx1, action, false); } else { /* idx0 has already completed - stop - * idx1 */ + * idx1 + */ kbase_gpu_stop_atom(kbdev, js, katom_idx1, action); @@ -1644,7 +1675,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev) if (katom) dev_info(kbdev->dev, - " js%d idx%d : katom=%p gpu_rb_state=%d\n", + " js%d idx%d : katom=%pK gpu_rb_state=%d\n", js, idx, katom, katom->gpu_rb_state); else dev_info(kbdev->dev, " js%d idx%d : empty\n", diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h index c3b9f2d..d3ff203 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Register-based HW access backend specific APIs */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index fcc0437..02d7cdb 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Register-based HW access backend specific job scheduler APIs */ @@ -31,13 +29,14 @@ #include #include +#if !MALI_USE_CSF /* * Hold the runpool_mutex for this */ static inline bool timer_callback_should_run(struct kbase_device *kbdev) { struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - s8 nr_running_ctxs; + int nr_running_ctxs; lockdep_assert_held(&kbdev->js_data.runpool_mutex); @@ -47,7 +46,8 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) /* nr_contexts_pullable is updated with the runpool_mutex. However, the * locking in the caller gives us a barrier that ensures - * nr_contexts_pullable is up-to-date for reading */ + * nr_contexts_pullable is up-to-date for reading + */ nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); #ifdef CONFIG_MALI_DEBUG @@ -69,10 +69,10 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev) * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). */ { - s8 nr_compute_ctxs = + int nr_compute_ctxs = kbasep_js_ctx_attr_count_on_runpool(kbdev, KBASEP_JS_CTX_ATTR_COMPUTE); - s8 nr_noncompute_ctxs = nr_running_ctxs - + int nr_noncompute_ctxs = nr_running_ctxs - nr_compute_ctxs; return (bool) (nr_compute_ctxs >= 2 || @@ -113,7 +113,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) if (atom != NULL) { /* The current version of the model doesn't support - * Soft-Stop */ + * Soft-Stop + */ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { u32 ticks = atom->ticks++; @@ -141,7 +142,8 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * new soft_stop timeout. This ensures that * atoms do not miss any of the timeouts due to * races between this worker and the thread - * changing the timeouts. */ + * changing the timeouts. + */ if (backend->timeouts_updated && ticks > soft_stop_ticks) ticks = atom->ticks = soft_stop_ticks; @@ -171,10 +173,11 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) * * Similarly, if it's about to be * decreased, the last job from another - * context has already finished, so it's - * not too bad that we observe the older - * value and register a disjoint event - * when we try soft-stopping */ + * context has already finished, so + * it's not too bad that we observe the + * older value and register a disjoint + * event when we try soft-stopping + */ if (js_devdata->nr_user_contexts_running >= disjoint_threshold) softstop_flags |= @@ -252,9 +255,9 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) } } if (reset_needed) { - dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); + dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issuing GPU soft-reset to resolve."); - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } /* the timer is re-issued if there is contexts in the run-pool */ @@ -270,9 +273,11 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } +#endif /* !MALI_USE_CSF */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) { +#if !MALI_USE_CSF struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_backend_data *backend = &kbdev->hwaccess.backend; unsigned long flags; @@ -284,11 +289,12 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /* From now on, return value of timer_callback_should_run() will - * also cause the timer to not requeue itself. Its return value - * cannot change, because it depends on variables updated with - * the runpool_mutex held, which the caller of this must also - * hold */ + /* From now on, return value of timer_callback_should_run() + * will also cause the timer to not requeue itself. Its return + * value cannot change, because it depends on variables updated + * with the runpool_mutex held, which the caller of this must + * also hold + */ hrtimer_cancel(&backend->scheduling_timer); } @@ -303,25 +309,36 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } +#else /* !MALI_USE_CSF */ + CSTD_UNUSED(kbdev); +#endif /* !MALI_USE_CSF */ } int kbase_backend_timer_init(struct kbase_device *kbdev) { +#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; backend->timer_running = false; +#else /* !MALI_USE_CSF */ + CSTD_UNUSED(kbdev); +#endif /* !MALI_USE_CSF */ return 0; } void kbase_backend_timer_term(struct kbase_device *kbdev) { +#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_cancel(&backend->scheduling_timer); +#else /* !MALI_USE_CSF */ + CSTD_UNUSED(kbdev); +#endif /* !MALI_USE_CSF */ } void kbase_backend_timer_suspend(struct kbase_device *kbdev) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h index 6576e55..5284288 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Register-based HW access backend specific job scheduler APIs */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c index e67d12b..7131546 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,14 +17,12 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include #include -#include +#include #include "mali_kbase_l2_mmu_config.h" /** @@ -56,23 +54,34 @@ struct l2_mmu_config_limit { /* * Zero represents no limit * - * For LBEX TBEX TTRX and TNAX: + * For LBEX TBEX TBAX TTRX and TNAX: * The value represents the number of outstanding reads (6 bits) or writes (5 bits) * * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h */ static const struct l2_mmu_config_limit limits[] = { - /* GPU read write */ - {GPU_ID2_PRODUCT_LBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} }, - {GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, - {GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} }, - {GPU_ID2_PRODUCT_TGOX, - {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, - {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, - {GPU_ID2_PRODUCT_TNOX, - {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, - {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + /* GPU, read, write */ + {GPU_ID2_PRODUCT_LBEX, + {0, GENMASK(10, 5), 5}, + {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TBEX, + {0, GENMASK(10, 5), 5}, + {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TBAX, + {0, GENMASK(10, 5), 5}, + {0, GENMASK(16, 12), 12} }, + {GPU_ID2_PRODUCT_TTRX, + {0, GENMASK(12, 7), 7}, + {0, GENMASK(17, 13), 13} }, + {GPU_ID2_PRODUCT_TNAX, + {0, GENMASK(12, 7), 7}, + {0, GENMASK(17, 13), 13} }, + {GPU_ID2_PRODUCT_TGOX, + {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, + {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, + {GPU_ID2_PRODUCT_TNOX, + {KBASE_3BIT_AID_32, GENMASK(14, 12), 12}, + {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} }, }; int kbase_set_mmu_quirks(struct kbase_device *kbdev) @@ -100,7 +109,7 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev) mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)); - if (kbase_is_gpu_lost(kbdev)) + if (kbase_is_gpu_removed(kbdev)) return -EIO; mmu_config &= ~(limit.read.mask | limit.write.mask); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h index 0c779ac..07014ad 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h @@ -1,31 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ -/* - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c index e33fe0b..077c234 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * "Always on" power management policy */ @@ -61,7 +58,11 @@ const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { always_on_term, /* term */ always_on_shaders_needed, /* shaders_needed */ always_on_get_core_active, /* get_core_active */ + NULL, /* handle_event */ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ +#if MALI_USE_CSF + ALWAYS_ON_PM_SCHED_FLAGS, /* pm_sched_flags */ +#endif }; KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h index e7927cf..98d35da 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015,2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * "Always on" power management policy */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 576c9f2..cc791df 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -1,11 +1,12 @@ - /* +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * GPU backend implementation of base kernel power management APIs */ @@ -30,14 +28,16 @@ #include #include +#if !MALI_USE_CSF #include #include #include +#endif /* !MALI_USE_CSF */ #include #include #include #include -#include +#include static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data); static void kbase_pm_hwcnt_disable_worker(struct work_struct *data); @@ -106,6 +106,11 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev) if (callbacks) callbacks->power_on_callback(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) + dev_err(kbdev->dev, "Attempting to power on while GPU lost\n"); +#endif + kbdev->pm.backend.gpu_powered = true; } @@ -139,24 +144,35 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.backend.ca_cores_enabled = ~0ull; kbdev->pm.backend.gpu_powered = false; + kbdev->pm.backend.gpu_ready = false; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT - kbdev->pm.gpu_lost = false; + kbase_pm_set_gpu_lost(kbdev, false); #endif #ifdef CONFIG_MALI_DEBUG kbdev->pm.backend.driver_ready_for_irqs = false; #endif /* CONFIG_MALI_DEBUG */ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); +#if !MALI_USE_CSF /* Initialise the metrics subsystem */ ret = kbasep_pm_metrics_init(kbdev); if (ret) return ret; +#else + mutex_init(&kbdev->pm.backend.policy_change_lock); + kbdev->pm.backend.policy_change_clamp_state_to_off = false; + /* Due to dependency on kbase_ipa_control, the metrics subsystem can't + * be initialized here. + */ + CSTD_UNUSED(ret); +#endif init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); kbdev->pm.backend.reset_done = false; init_waitqueue_head(&kbdev->pm.zero_active_count_wait); + init_waitqueue_head(&kbdev->pm.resume_wait); kbdev->pm.active_count = 0; spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); @@ -213,7 +229,9 @@ pm_state_machine_fail: kbase_pm_policy_term(kbdev); kbase_pm_ca_term(kbdev); workq_fail: +#if !MALI_USE_CSF kbasep_pm_metrics_term(kbdev); +#endif return -EINVAL; } @@ -222,7 +240,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) lockdep_assert_held(&kbdev->pm.lock); /* Turn clocks and interrupts on - no-op if we haven't done a previous - * kbase_pm_clock_off() */ + * kbase_pm_clock_off() + */ kbase_pm_clock_on(kbdev, is_resume); if (!is_resume) { @@ -240,7 +259,8 @@ void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) kbase_pm_update_cores_state(kbdev); /* NOTE: We don't wait to reach the desired state, since running atoms - * will wait for that state to be reached anyway */ + * will wait for that state to be reached anyway + */ } static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) @@ -251,13 +271,20 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) struct kbase_pm_backend_data *backend = &pm->backend; unsigned long flags; +#if !MALI_USE_CSF /* Wait for power transitions to complete. We do this with no locks held * so that we don't deadlock with any pending workqueues. */ kbase_pm_wait_for_desired_state(kbdev); +#endif kbase_pm_lock(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_gpu_lost(kbdev)) + backend->poweron_required = false; +#endif + if (!backend->poweron_required) { unsigned long flags; @@ -278,6 +305,14 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) kbase_flush_mmu_wqs(kbdev); kbase_pm_lock(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* poweron_required may have changed while pm lock + * was released. + */ + if (kbase_pm_is_gpu_lost(kbdev)) + backend->poweron_required = false; +#endif + /* Turn off clock now that fault have been handled. We * dropped locks so poweron_required may have changed - * power back on if this is the case (effectively only @@ -296,9 +331,14 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data) if (backend->poweron_required) { backend->poweron_required = false; kbdev->pm.backend.l2_desired = true; +#if MALI_USE_CSF + kbdev->pm.backend.mcu_desired = true; +#endif kbase_pm_update_state(kbdev); kbase_pm_update_cores_state_nolock(kbdev); +#if !MALI_USE_CSF kbase_backend_slot_update(kbdev); +#endif /* !MALI_USE_CSF */ } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -451,12 +491,22 @@ static void kbase_pm_hwcnt_disable_worker(struct work_struct *data) */ backend->hwcnt_disabled = true; kbase_pm_update_state(kbdev); +#if !MALI_USE_CSF kbase_backend_slot_update(kbdev); +#endif /* !MALI_USE_CSF */ } else { /* PM state was updated while we were doing the disable, * so we need to undo the disable we just performed. */ +#if MALI_USE_CSF + unsigned long lock_flags; + + kbase_csf_scheduler_spin_lock(kbdev, &lock_flags); +#endif kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, lock_flags); +#endif } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -476,8 +526,12 @@ void kbase_pm_do_poweroff(struct kbase_device *kbdev) if (kbdev->pm.backend.poweroff_wait_in_progress) goto unlock_hwaccess; +#if MALI_USE_CSF + kbdev->pm.backend.mcu_desired = false; +#else /* Force all cores off */ kbdev->pm.backend.shaders_desired = false; +#endif kbdev->pm.backend.l2_desired = false; kbdev->pm.backend.poweroff_wait_in_progress = true; @@ -528,20 +582,35 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); /* Power up the GPU, don't enable IRQs as we are not ready to receive - * them. */ + * them + */ ret = kbase_pm_init_hw(kbdev, flags); if (ret) { kbase_pm_unlock(kbdev); return ret; } - +#if MALI_USE_CSF + kbdev->pm.debug_core_mask = + kbdev->gpu_props.props.raw_props.shader_present; + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + /* Set the initial value for 'shaders_avail'. It would be later + * modified only from the MCU state machine, when the shader core + * allocation enable mask request has completed. So its value would + * indicate the mask of cores that are currently being used by FW for + * the allocation of endpoints requested by CSGs. + */ + kbdev->pm.backend.shaders_avail = kbase_pm_ca_get_core_mask(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +#else kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] = kbdev->pm.debug_core_mask[1] = kbdev->pm.debug_core_mask[2] = kbdev->gpu_props.props.raw_props.shader_present; +#endif /* Pretend the GPU is active to prevent a power policy turning the GPU - * cores off */ + * cores off + */ kbdev->pm.active_count = 1; spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, @@ -553,13 +622,27 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, irq_flags); /* We are ready to receive IRQ's now as power policy is set up, so - * enable them now. */ + * enable them now. + */ #ifdef CONFIG_MALI_DEBUG kbdev->pm.backend.driver_ready_for_irqs = true; #endif kbase_pm_enable_interrupts(kbdev); + WARN_ON(!kbdev->pm.backend.gpu_powered); + /* GPU has been powered up (by kbase_pm_init_hw) and interrupts have + * been enabled, so GPU is ready for use and PM state machine can be + * exercised from this point onwards. + */ + kbdev->pm.backend.gpu_ready = true; + /* Turn on the GPU and any cores needed by the policy */ +#if MALI_USE_CSF + /* Turn on the L2 caches, needed for firmware boot */ + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbdev->pm.backend.l2_desired = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); +#endif kbase_pm_do_poweron(kbdev, false); kbase_pm_unlock(kbdev); @@ -573,6 +656,8 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) mutex_lock(&kbdev->pm.lock); kbase_pm_do_poweroff(kbdev); mutex_unlock(&kbdev->pm.lock); + + kbase_pm_wait_for_poweroff_complete(kbdev); } KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); @@ -587,10 +672,15 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) if (kbdev->pm.backend.hwcnt_disabled) { unsigned long flags; - +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif } /* Free any resources the policy allocated */ @@ -598,8 +688,16 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev) kbase_pm_policy_term(kbdev); kbase_pm_ca_term(kbdev); +#if !MALI_USE_CSF /* Shut down the metrics subsystem */ kbasep_pm_metrics_term(kbdev); +#else + if (WARN_ON(mutex_is_locked(&kbdev->pm.backend.policy_change_lock))) { + mutex_lock(&kbdev->pm.backend.policy_change_lock); + mutex_unlock(&kbdev->pm.backend.policy_change_lock); + } + mutex_destroy(&kbdev->pm.backend.policy_change_lock); +#endif destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq); } @@ -611,11 +709,24 @@ void kbase_pm_power_changed(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_update_state(kbdev); +#if !MALI_USE_CSF kbase_backend_slot_update(kbdev); +#endif /* !MALI_USE_CSF */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } +#if MALI_USE_CSF +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kbdev->pm.lock); + + kbdev->pm.debug_core_mask = new_core_mask; + kbase_pm_update_dynamic_cores_onoff(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask); +#else void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, u64 new_core_mask_js1, u64 new_core_mask_js2) @@ -636,6 +747,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, kbase_pm_update_dynamic_cores_onoff(kbdev); } +#endif /* MALI_USE_CSF */ void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) { @@ -651,12 +763,15 @@ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) { /* Force power off the GPU and all cores (regardless of policy), only * after the PM active count reaches zero (otherwise, we risk turning it - * off prematurely) */ + * off prematurely) + */ kbase_pm_lock(kbdev); kbase_pm_do_poweroff(kbdev); +#if !MALI_USE_CSF kbase_backend_timer_suspend(kbdev); +#endif /* !MALI_USE_CSF */ kbase_pm_unlock(kbdev); @@ -672,12 +787,19 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT - kbdev->pm.gpu_lost = false; + if (kbase_pm_is_gpu_lost(kbdev)) { + dev_dbg(kbdev->dev, "%s: GPU lost in progress\n", __func__); + kbase_pm_unlock(kbdev); + return; + } #endif kbase_pm_do_poweron(kbdev, true); +#if !MALI_USE_CSF kbase_backend_timer_resume(kbdev); +#endif /* !MALI_USE_CSF */ + wake_up_all(&kbdev->pm.resume_wait); kbase_pm_unlock(kbdev); } @@ -685,63 +807,50 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev) { unsigned long flags; - struct kbase_pm_backend_data *backend = &kbdev->pm.backend; ktime_t end_timestamp = ktime_get(); + struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state; - /* Full GPU reset will have been done by hypervisor, so cancel */ - atomic_set(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING); - hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); - - /* GPU is no longer mapped to VM. So no interrupts will be received - * and Mali registers have been replaced by dummy RAM - */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_lock(&kbdev->mmu_mask_change); - kbdev->irq_reset_flush = true; - spin_unlock(&kbdev->mmu_mask_change); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - kbase_synchronize_irqs(kbdev); - kbase_flush_mmu_wqs(kbdev); - kbdev->irq_reset_flush = false; + if (!kbdev->arb.arb_if) + return; - /* Clear all jobs running on the GPU */ mutex_lock(&kbdev->pm.lock); - kbdev->pm.gpu_lost = true; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->protected_mode = false; - if (!kbdev->pm.backend.protected_entry_transition_override) - kbase_backend_reset(kbdev, &end_timestamp); - kbase_pm_metrics_update(kbdev, NULL); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_lock(&arb_vm_state->vm_state_lock); + if (kbdev->pm.backend.gpu_powered && + !kbase_pm_is_gpu_lost(kbdev)) { + kbase_pm_set_gpu_lost(kbdev, true); + + /* GPU is no longer mapped to VM. So no interrupts will + * be received and Mali registers have been replaced by + * dummy RAM + */ + WARN(!kbase_is_gpu_removed(kbdev), + "GPU is still available after GPU lost event\n"); - /* Cancel any pending HWC dumps */ - spin_lock_irqsave(&kbdev->hwcnt.lock, flags); - kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; - kbdev->hwcnt.backend.triggered = 1; - wake_up(&kbdev->hwcnt.backend.wait); - spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + /* Full GPU reset will have been done by hypervisor, so + * cancel + */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + kbase_synchronize_irqs(kbdev); - /* Wait for all threads keeping GPU active to complete */ - mutex_unlock(&kbdev->pm.lock); - wait_event(kbdev->pm.zero_active_count_wait, - kbdev->pm.active_count == 0); - mutex_lock(&kbdev->pm.lock); + /* Clear all jobs running on the GPU */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = false; + kbase_backend_reset(kbdev, &end_timestamp); + kbase_pm_metrics_update(kbdev, NULL); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - /* Update state to GPU off */ - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->pm.backend.shaders_desired = false; - kbdev->pm.backend.l2_desired = false; - backend->l2_state = KBASE_L2_OFF; - backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF; - kbdev->pm.backend.gpu_powered = false; - backend->poweroff_wait_in_progress = false; - KBASE_KTRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, 0); - wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); - kbase_gpu_cache_clean_wait_complete(kbdev); - backend->poweroff_wait_in_progress = false; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - wake_up(&kbdev->pm.backend.poweroff_wait); + /* Cancel any pending HWC dumps */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + } + mutex_unlock(&arb_vm_state->vm_state_lock); mutex_unlock(&kbdev->pm.lock); } + #endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c index e7eef26..efc620f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -27,9 +26,6 @@ #include #include #include -#ifdef CONFIG_MALI_NO_MALI -#include -#endif #include int kbase_pm_ca_init(struct kbase_device *kbdev) @@ -59,6 +55,14 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + if (!(core_mask & kbdev->pm.debug_core_mask)) { + dev_err(kbdev->dev, + "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", + core_mask, kbdev->pm.debug_core_mask); + goto unlock; + } +#else if (!(core_mask & kbdev->pm.debug_core_mask_all)) { dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n", core_mask, kbdev->pm.debug_core_mask_all); @@ -69,6 +73,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask) dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled"); goto unlock; } +#endif /* MALI_USE_CSF */ pm_backend->ca_cores_enabled = core_mask; @@ -80,21 +85,32 @@ unlock: dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n", pm_backend->ca_cores_enabled); } +KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask); #endif u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) { -#ifdef CONFIG_MALI_DEVFREQ - struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend; +#if MALI_USE_CSF + u64 debug_core_mask = kbdev->pm.debug_core_mask; +#else + u64 debug_core_mask = kbdev->pm.debug_core_mask_all; #endif lockdep_assert_held(&kbdev->hwaccess_lock); #ifdef CONFIG_MALI_DEVFREQ - return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all; + /* + * Although in the init we let the pm_backend->ca_cores_enabled to be + * the max config (it uses the base_gpu_props), at this function we need + * to limit it to be a subgroup of the curr config, otherwise the + * shaders state machine on the PM does not evolve. + */ + return kbdev->gpu_props.curr_config.shader_present & + kbdev->pm.backend.ca_cores_enabled & + debug_core_mask; #else - return kbdev->gpu_props.props.raw_props.shader_present & - kbdev->pm.debug_core_mask_all; + return kbdev->gpu_props.curr_config.shader_present & + debug_core_mask; #endif } @@ -104,8 +120,8 @@ u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev) { lockdep_assert_held(&kbdev->hwaccess_lock); -#ifdef CONFIG_MALI_NO_MALI - return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1); +#if MALI_USE_CSF + return kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); #else return kbdev->pm.backend.pm_shaders_core_mask; #endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h index 5423e96..8d169c3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h index f67ec65..41f3c14 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_devfreq.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c index 9eef44a..f40b753 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * "Coarse Demand" power management policy */ @@ -60,7 +57,11 @@ const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { coarse_demand_term, /* term */ coarse_demand_shaders_needed, /* shaders_needed */ coarse_demand_get_core_active, /* get_core_active */ + NULL, /* handle_event */ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +#if MALI_USE_CSF + COARSE_ON_DEMAND_PM_SCHED_FLAGS, /* pm_sched_flags */ +#endif }; KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h index 304e5d7..5e3f17e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2015,2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * "Coarse Demand" power management policy */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index f4bcf3e..1a0572b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -29,9 +28,6 @@ #include "mali_kbase_pm_always_on.h" #include "mali_kbase_pm_coarse_demand.h" -#if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_pm_always_on_demand.h" -#endif /* Forward definition - see mali_kbase.h */ struct kbase_device; @@ -62,24 +58,9 @@ enum kbase_pm_core_type { KBASE_PM_CORE_STACK = STACK_PRESENT_LO }; -/** +/* * enum kbase_l2_core_state - The states used for the L2 cache & tiler power * state machine. - * - * @KBASE_L2_OFF: The L2 cache and tiler are off - * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on - * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. - * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being - * enabled - * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled - * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being - * disabled - * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest - * clock. Conditionally used. - * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off - * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off - * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state - * are unknown */ enum kbase_l2_core_state { #define KBASEP_L2_STATE(n) KBASE_L2_ ## n, @@ -87,45 +68,19 @@ enum kbase_l2_core_state { #undef KBASEP_L2_STATE }; -/** +#if MALI_USE_CSF +/* + * enum kbase_mcu_state - The states used for the MCU state machine. + */ +enum kbase_mcu_state { +#define KBASEP_MCU_STATE(n) KBASE_MCU_ ## n, +#include "mali_kbase_pm_mcu_states.h" +#undef KBASEP_MCU_STATE +}; +#endif + +/* * enum kbase_shader_core_state - The states used for the shaders' state machine. - * - * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have - * been requested to power on and hwcnt - * is being disabled - * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been - * requested to power on. Or after doing - * partial shader on/off, checking whether - * it's the desired state. - * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt - * already enabled. - * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks - * are on, hwcnt disabled, and checks - * to powering down or re-enabling - * hwcnt. - * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to - * power off, but they remain on for the - * duration of the hysteresis timer - * @KBASE_SHADERS_WAIT_GPU_IDLE: The shaders partial poweroff needs to reach - * a state where jobs on the GPU are finished - * including jobs currently running and in the - * GPU queue because of GPU2017-861 - * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired - * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the - * level 2 cache is being flushed. - * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders - * are ready to be powered off. - * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks - * have been requested to power off - * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are - * off, but the tick timer - * cancellation is still - * pending. - * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power - * states are unknown */ enum kbase_shader_core_state { #define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n, @@ -137,28 +92,40 @@ enum kbase_shader_core_state { * struct kbasep_pm_metrics - Metrics data collected for use by the power * management framework. * - * @time_busy: number of ns the GPU was busy executing jobs since the - * @time_period_start timestamp. - * @time_idle: number of ns since time_period_start the GPU was not executing - * jobs since the @time_period_start timestamp. - * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that - * if two CL jobs were active for 400ns, this value would be updated - * with 800. - * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that - * if two GL jobs were active for 400ns, this value would be updated - * with 800. + * @time_busy: the amount of time the GPU was busy executing jobs since the + * @time_period_start timestamp, in units of 256ns. This also includes + * time_in_protm, the time spent in protected mode, since it's assumed + * the GPU was busy 100% during this period. + * @time_idle: the amount of time the GPU was not executing jobs since the + * time_period_start timestamp, measured in units of 256ns. + * @time_in_protm: The amount of time the GPU has spent in protected mode since + * the time_period_start timestamp, measured in units of 256ns. + * @busy_cl: the amount of time the GPU was busy executing CL jobs. Note that + * if two CL jobs were active for 256ns, this value would be updated + * with 2 (2x256ns). + * @busy_gl: the amount of time the GPU was busy executing GL jobs. Note that + * if two GL jobs were active for 256ns, this value would be updated + * with 2 (2x256ns). */ struct kbasep_pm_metrics { u32 time_busy; u32 time_idle; +#if MALI_USE_CSF + u32 time_in_protm; +#else u32 busy_cl[2]; u32 busy_gl; +#endif }; /** * struct kbasep_pm_metrics_state - State required to collect the metrics in * struct kbasep_pm_metrics * @time_period_start: time at which busy/idle measurements started + * @ipa_control_client: Handle returned on registering DVFS as a + * kbase_ipa_control client + * @skip_gpu_active_sanity_check: Decide whether to skip GPU_ACTIVE sanity + * check in DVFS utilisation calculation * @gpu_active: true when the GPU is executing jobs. false when * not. Updated when the job scheduler informs us a job in submitted * or removed from a GPU slot. @@ -170,6 +137,7 @@ struct kbasep_pm_metrics { * @values: The current values of the power management metrics. The * kbase_pm_get_dvfs_metrics() function is used to compare these * current values with the saved values from a previous invocation. + * @initialized: tracks whether metrics_state has been initialized or not. * @timer: timer to regularly make DVFS decisions based on the power * management metrics. * @timer_active: boolean indicating @timer is running @@ -178,9 +146,14 @@ struct kbasep_pm_metrics { */ struct kbasep_pm_metrics_state { ktime_t time_period_start; +#if MALI_USE_CSF + void *ipa_control_client; + bool skip_gpu_active_sanity_check; +#else bool gpu_active; u32 active_cl_ctx[2]; u32 active_gl_ctx[3]; +#endif spinlock_t lock; void *platform_data; @@ -189,6 +162,7 @@ struct kbasep_pm_metrics_state { struct kbasep_pm_metrics values; #ifdef CONFIG_MALI_MIDGARD_DVFS + bool initialized; struct hrtimer timer; bool timer_active; struct kbasep_pm_metrics dvfs_last; @@ -202,8 +176,12 @@ struct kbasep_pm_metrics_state { * @work: Work item which cancels the timer * @timer: Timer for powering off the shader cores * @configured_interval: Period of GPU poweroff timer - * @configured_ticks: User-configured number of ticks to wait after the shader - * power down request is received before turning off the cores + * @default_ticks: User-configured number of ticks to wait after the shader + * power down request is received before turning off the cores + * @configured_ticks: Power-policy configured number of ticks to wait after the + * shader power down request is received before turning off + * the cores. For simple power policies, this is equivalent + * to @default_ticks. * @remaining_ticks: Number of remaining timer ticks until shaders are powered off * @cancel_queued: True if the cancellation work item has been queued. This is * required to ensure that it is not queued twice, e.g. after @@ -217,6 +195,7 @@ struct kbasep_pm_tick_timer_state { struct hrtimer timer; ktime_t configured_interval; + unsigned int default_ticks; unsigned int configured_ticks; unsigned int remaining_ticks; @@ -227,9 +206,6 @@ struct kbasep_pm_tick_timer_state { union kbase_pm_policy_data { struct kbasep_pm_policy_always_on always_on; struct kbasep_pm_policy_coarse_demand coarse_demand; -#if !MALI_CUSTOMER_RELEASE - struct kbasep_pm_policy_always_on_demand always_on_demand; -#endif }; /** @@ -240,7 +216,8 @@ union kbase_pm_policy_data { * * @pm_current_policy: The policy that is currently actively controlling the * power state. - * @pm_policy_data: Private data for current PM policy + * @pm_policy_data: Private data for current PM policy. This is automatically + * zeroed when a policy change occurs. * @reset_done: Flag when a reset is complete * @reset_done_wait: Wait queue to wait for changes to @reset_done * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter @@ -254,6 +231,11 @@ union kbase_pm_policy_data { * variable should be protected by: both the hwaccess_lock * spinlock and the pm.lock mutex for writes; or at least * one of either lock for reads. + * @gpu_ready: Indicates whether the GPU is in a state in which it is + * safe to perform PM changes. When false, the PM state + * machine needs to wait before making changes to the GPU + * power policy, DevFreq or core_mask, so as to avoid these + * changing while implicit GPU resets are ongoing. * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It * holds the cores enabled in a hardware counters dump, * and may differ from @shaders_avail when under different @@ -294,6 +276,8 @@ union kbase_pm_policy_data { * @callback_soft_reset: Optional callback to software reset the GPU. See * &struct kbase_pm_callback_conf * @ca_cores_enabled: Cores that are currently available + * @mcu_state: The current state of the micro-control unit, only applicable + * to GPUs that have such a component * @l2_state: The current state of the L2 cache state machine. See * &enum kbase_l2_core_state * @l2_desired: True if the L2 cache should be powered on by the L2 cache state @@ -303,10 +287,10 @@ union kbase_pm_policy_data { * @shaders_avail: This is updated by the state machine when it is in a state * where it can write to the SHADER_PWRON or PWROFF registers * to have the same set of available cores as specified by - * @shaders_desired_mask. So it would eventually have the same - * value as @shaders_desired_mask and would precisely indicate - * the cores that are currently available. This is internal to - * shader state machine and should *not* be modified elsewhere. + * @shaders_desired_mask. So would precisely indicate the cores + * that are currently available. This is internal to shader + * state machine of JM GPUs and should *not* be modified + * elsewhere. * @shaders_desired_mask: This is updated by the state machine when it is in * a state where it can handle changes to the core * availability (either by DVFS or sysfs). This is @@ -318,6 +302,16 @@ union kbase_pm_policy_data { * cores may be different, but there should be transitions in * progress that will eventually achieve this state (assuming * that the policy doesn't change its mind in the mean time). + * @mcu_desired: True if the micro-control unit should be powered on + * @policy_change_clamp_state_to_off: Signaling the backend is in PM policy + * change transition, needs the mcu/L2 to be brought back to the + * off state and remain in that state until the flag is cleared. + * @csf_pm_sched_flags: CSF Dynamic PM control flags in accordance to the + * current active PM policy. This field is updated whenever a + * new policy is activated. + * @policy_change_lock: Used to serialize the policy change calls. In CSF case, + * the change of policy may involve the scheduler to + * suspend running CSGs and then reconfigure the MCU. * @in_reset: True if a GPU is resetting and normal power manager operation is * suspended * @partial_shaderoff: True if we want to partial power off shader cores, @@ -373,6 +367,7 @@ struct kbase_pm_backend_data { wait_queue_head_t gpu_in_desired_state_wait; bool gpu_powered; + bool gpu_ready; u64 pm_shaders_core_mask; @@ -406,10 +401,19 @@ struct kbase_pm_backend_data { u64 ca_cores_enabled; +#if MALI_USE_CSF + enum kbase_mcu_state mcu_state; +#endif enum kbase_l2_core_state l2_state; enum kbase_shader_core_state shaders_state; u64 shaders_avail; u64 shaders_desired_mask; +#if MALI_USE_CSF + bool mcu_desired; + bool policy_change_clamp_state_to_off; + unsigned int csf_pm_sched_flags; + struct mutex policy_change_lock; +#endif bool l2_desired; bool l2_always_on; bool shaders_desired; @@ -433,6 +437,23 @@ struct kbase_pm_backend_data { struct work_struct gpu_clock_control_work; }; +#if MALI_USE_CSF +/* CSF PM flag, signaling that the MCU CORE should be kept on */ +#define CSF_DYNAMIC_PM_CORE_KEEP_ON (1 << 0) +/* CSF PM flag, signaling no scheduler suspension on idle groups */ +#define CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE (1 << 1) +/* CSF PM flag, signaling no scheduler suspension on no runnable groups */ +#define CSF_DYNAMIC_PM_SCHED_NO_SUSPEND (1 << 2) + +/* The following flags corresponds to existing defined PM policies */ +#define ALWAYS_ON_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_CORE_KEEP_ON | \ + CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE | \ + CSF_DYNAMIC_PM_SCHED_NO_SUSPEND) +#define COARSE_ON_DEMAND_PM_SCHED_FLAGS (0) +#if !MALI_CUSTOMER_RELEASE +#define ALWAYS_ON_DEMAND_PM_SCHED_FLAGS (CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE) +#endif +#endif /* List of policy IDs */ enum kbase_pm_policy_id { @@ -443,6 +464,33 @@ enum kbase_pm_policy_id { KBASE_PM_POLICY_ID_ALWAYS_ON }; +/** + * enum kbase_pm_policy_event - PM Policy event ID + */ +enum kbase_pm_policy_event { + /** + * @KBASE_PM_POLICY_EVENT_IDLE: Indicates that the GPU power state + * model has determined that the GPU has gone idle. + */ + KBASE_PM_POLICY_EVENT_IDLE, + /** + * @KBASE_PM_POLICY_EVENT_POWER_ON: Indicates that the GPU state model + * is preparing to power on the GPU. + */ + KBASE_PM_POLICY_EVENT_POWER_ON, + /** + * @KBASE_PM_POLICY_EVENT_TIMER_HIT: Indicates that the GPU became + * active while the Shader Tick Timer was holding the GPU in a powered + * on state. + */ + KBASE_PM_POLICY_EVENT_TIMER_HIT, + /** + * @KBASE_PM_POLICY_EVENT_TIMER_MISS: Indicates that the GPU did not + * become active before the Shader Tick Timer timeout occurred. + */ + KBASE_PM_POLICY_EVENT_TIMER_MISS, +}; + /** * struct kbase_pm_policy - Power policy structure. * @@ -455,15 +503,22 @@ enum kbase_pm_policy_id { * @shaders_needed: Function called to find out if shader cores are needed * @get_core_active: Function called to get the current overall GPU power * state + * @handle_event: Function called when a PM policy event occurs. Should be + * set to NULL if the power policy doesn't require any + * event notifications. * @id: Field indicating an ID for this policy. This is not * necessarily the same as its index in the list returned * by kbase_pm_list_policies(). * It is used purely for debugging. + * @pm_sched_flags: Policy associated with CSF PM scheduling operational flags. + * Pre-defined required flags exist for each of the + * ARM released policies, such as 'always_on', 'coarse_demand' + * and etc. */ struct kbase_pm_policy { char *name; - /** + /* * Function called when the policy is selected * * This should initialize the kbdev->pm.pm_policy_data structure. It @@ -477,7 +532,7 @@ struct kbase_pm_policy { */ void (*init)(struct kbase_device *kbdev); - /** + /* * Function called when the policy is unselected. * * @kbdev: The kbase device structure for the device (must be a @@ -485,7 +540,7 @@ struct kbase_pm_policy { */ void (*term)(struct kbase_device *kbdev); - /** + /* * Function called to find out if shader cores are needed * * This needs to at least satisfy kbdev->pm.backend.shaders_desired, @@ -498,7 +553,7 @@ struct kbase_pm_policy { */ bool (*shaders_needed)(struct kbase_device *kbdev); - /** + /* * Function called to get the current overall GPU power state * * This function must meet or exceed the requirements for power @@ -511,7 +566,26 @@ struct kbase_pm_policy { */ bool (*get_core_active)(struct kbase_device *kbdev); + /** + * Function called when a power event occurs + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @event: The id of the power event that has occurred + */ + void (*handle_event)(struct kbase_device *kbdev, + enum kbase_pm_policy_event event); + enum kbase_pm_policy_id id; + +#if MALI_USE_CSF + /* Policy associated with CSF PM scheduling operational flags. + * There are pre-defined required flags exist for each of the + * ARM released policies, such as 'always_on', 'coarse_demand' + * and etc. + */ + unsigned int pm_sched_flags; +#endif }; #endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 6b821f7..bcada93 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -32,12 +30,18 @@ #include #include #include + +#if MALI_USE_CSF +#include +#else #include +#endif /* !MALI_USE_CSF */ + #include #include #include #include -#include +#include #include #include #include @@ -45,6 +49,9 @@ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#if MALI_USE_CSF +#include +#endif #include @@ -89,6 +96,28 @@ static u64 kbase_pm_get_state( enum kbase_pm_core_type core_type, enum kbasep_pm_action action); +#if MALI_USE_CSF +bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (unlikely(!kbdev->csf.firmware_inited)) + return false; + + if (kbdev->csf.scheduler.pm_active_count) + return true; + + /* MCU is supposed to be ON, only when scheduler.pm_active_count is + * non zero. But for always_on policy, the MCU needs to be kept on, + * unless policy changing transition needs it off. + */ + + return (kbdev->pm.backend.mcu_desired && + kbase_pm_no_mcu_core_pwroff(kbdev) && + !kbdev->pm.backend.policy_change_clamp_state_to_off); +} +#endif + bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) { if (kbdev->pm.backend.protected_entry_transition_override) @@ -102,6 +131,11 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev) !kbdev->pm.backend.shaders_desired) return false; +#if MALI_USE_CSF + if (kbdev->pm.backend.policy_change_clamp_state_to_off) + return false; +#endif + return kbdev->pm.backend.l2_desired; } @@ -210,7 +244,7 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, return (u32)core_type + (u32)action; } -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) static void mali_cci_flush_l2(struct kbase_device *kbdev) { const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; @@ -233,7 +267,8 @@ static void mali_cci_flush_l2(struct kbase_device *kbdev) GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); /* Wait for cache flush to complete before continuing, exit on - * gpu resets or loop expiry. */ + * gpu resets or loop expiry. + */ while (((raw & mask) == 0) && --loops) { raw = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)); @@ -372,9 +407,9 @@ u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, switch (type) { case KBASE_PM_CORE_L2: - return kbdev->gpu_props.props.raw_props.l2_present; + return kbdev->gpu_props.curr_config.l2_present; case KBASE_PM_CORE_SHADER: - return kbdev->gpu_props.props.raw_props.shader_present; + return kbdev->gpu_props.curr_config.shader_present; case KBASE_PM_CORE_TILER: return kbdev->gpu_props.props.raw_props.tiler_present; case KBASE_PM_CORE_STACK: @@ -468,14 +503,10 @@ static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev) */ if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) { backend->hwcnt_disabled = true; + } else { -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, - &backend->hwcnt_disable_work); -#else - queue_work(system_highpri_wq, - &backend->hwcnt_disable_work); -#endif + kbase_hwcnt_context_queue_work(kbdev->hwcnt_gpu_ctx, + &backend->hwcnt_disable_work); } } @@ -493,7 +524,8 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) * Skip if size and hash are not given explicitly, * which means default values are used. */ - if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0)) + if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0) && + (!kbdev->l2_hash_values_override)) return; val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG)); @@ -504,13 +536,25 @@ static void kbase_pm_l2_config_override(struct kbase_device *kbdev) } if (kbdev->l2_hash_override) { + WARN_ON(kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); val &= ~L2_CONFIG_HASH_MASK; val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT); + } else if (kbdev->l2_hash_values_override) { + int i; + + WARN_ON(!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)); + val &= ~L2_CONFIG_ASN_HASH_ENABLE_MASK; + val |= (0x1 << L2_CONFIG_ASN_HASH_ENABLE_SHIFT); + + for (i = 0; i < ASN_HASH_COUNT; i++) { + dev_dbg(kbdev->dev, "Program 0x%x to ASN_HASH[%d]\n", + kbdev->l2_hash_values[i], i); + kbase_reg_write(kbdev, GPU_CONTROL_REG(ASN_HASH(i)), + kbdev->l2_hash_values[i]); + } } dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val); - - /* Write L2_CONFIG to override */ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val); } @@ -523,6 +567,278 @@ static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev) queue_work(system_wq, &backend->gpu_clock_control_work); } +#if MALI_USE_CSF +static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state) +{ + const char *const strings[] = { +#define KBASEP_MCU_STATE(n) #n, +#include "mali_kbase_pm_mcu_states.h" +#undef KBASEP_MCU_STATE + }; + if (WARN_ON((size_t)state >= ARRAY_SIZE(strings))) + return "Bad MCU state"; + else + return strings[state]; +} + +static inline bool kbase_pm_handle_mcu_core_attr_update(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + bool timer_update; + bool core_mask_update; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + WARN_ON(backend->mcu_state != KBASE_MCU_ON); + + /* This function is only for cases where the MCU managing Cores, if + * the firmware mode is with host control, do nothing here. + */ + if (unlikely(kbdev->csf.firmware_hctl_core_pwr)) + return false; + + core_mask_update = + backend->shaders_avail != backend->shaders_desired_mask; + + timer_update = kbdev->csf.mcu_core_pwroff_dur_count != + kbdev->csf.mcu_core_pwroff_reg_shadow; + + if (core_mask_update || timer_update) + kbase_csf_firmware_update_core_attr(kbdev, timer_update, + core_mask_update, backend->shaders_desired_mask); + + return (core_mask_update || timer_update); +} + +static int kbase_pm_mcu_update_state(struct kbase_device *kbdev) +{ + struct kbase_pm_backend_data *backend = &kbdev->pm.backend; + enum kbase_mcu_state prev_state; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* + * Initial load of firmare should have been done to + * exercise the MCU state machine. + */ + if (unlikely(!kbdev->csf.firmware_inited)) { + WARN_ON(backend->mcu_state != KBASE_MCU_OFF); + return -EIO; + } + + do { + u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER); + u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + /* mask off ready from trans in case transitions finished + * between the register reads + */ + shaders_trans &= ~shaders_ready; + + prev_state = backend->mcu_state; + + switch (backend->mcu_state) { + case KBASE_MCU_OFF: + if (kbase_pm_is_mcu_desired(kbdev) && + !backend->policy_change_clamp_state_to_off && + backend->l2_state == KBASE_L2_ON) { + kbase_csf_firmware_trigger_reload(kbdev); + backend->mcu_state = KBASE_MCU_PEND_ON_RELOAD; + } + break; + + case KBASE_MCU_PEND_ON_RELOAD: + if (kbdev->csf.firmware_reloaded) { + backend->shaders_desired_mask = + kbase_pm_ca_get_core_mask(kbdev); + kbase_csf_firmware_global_reinit(kbdev, + backend->shaders_desired_mask); + backend->mcu_state = + KBASE_MCU_ON_GLB_REINIT_PEND; + } + break; + + case KBASE_MCU_ON_GLB_REINIT_PEND: + if (kbase_csf_firmware_global_reinit_complete(kbdev)) { + backend->shaders_avail = + backend->shaders_desired_mask; + backend->pm_shaders_core_mask = 0; + if (kbdev->csf.firmware_hctl_core_pwr) { + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail, ACTION_PWRON); + backend->mcu_state = + KBASE_MCU_HCTL_SHADERS_PEND_ON; + } else + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + } + break; + + case KBASE_MCU_HCTL_SHADERS_PEND_ON: + if (!shaders_trans && + shaders_ready == backend->shaders_avail) { + /* Cores now stable, notify MCU the stable mask */ + kbase_csf_firmware_update_core_attr(kbdev, + false, true, shaders_ready); + + backend->pm_shaders_core_mask = shaders_ready; + backend->mcu_state = + KBASE_MCU_HCTL_CORES_NOTIFY_PEND; + } + break; + + case KBASE_MCU_HCTL_CORES_NOTIFY_PEND: + /* Wait for the acknowledgement */ + if (kbase_csf_firmware_core_attr_updated(kbdev)) + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + break; + + case KBASE_MCU_ON_HWCNT_ENABLE: + backend->hwcnt_desired = true; + if (backend->hwcnt_disabled) { + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable( + kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + backend->hwcnt_disabled = false; + } + backend->mcu_state = KBASE_MCU_ON; + break; + + case KBASE_MCU_ON: + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); + + if (!kbase_pm_is_mcu_desired(kbdev)) + backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; + else if (kbdev->csf.firmware_hctl_core_pwr) { + /* Host control add additional Cores to be active */ + if (backend->shaders_desired_mask & ~shaders_ready) { + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) + kbase_pm_trigger_hwcnt_disable(kbdev); + backend->mcu_state = + KBASE_MCU_HCTL_MCU_ON_RECHECK; + } + } else if (kbase_pm_handle_mcu_core_attr_update(kbdev)) + kbdev->pm.backend.mcu_state = + KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND; + break; + + case KBASE_MCU_HCTL_MCU_ON_RECHECK: + backend->shaders_desired_mask = kbase_pm_ca_get_core_mask(kbdev); + + if (!backend->hwcnt_disabled) { + /* Wait for being disabled */ + ; + } else if (!kbase_pm_is_mcu_desired(kbdev)) { + /* Converging to MCU powering down flow */ + backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE; + } else if (backend->shaders_desired_mask & ~shaders_ready) { + /* set cores ready but not available to + * meet SHADERS_PEND_ON check pass + */ + backend->shaders_avail = + (backend->shaders_desired_mask | shaders_ready); + + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + backend->shaders_avail & ~shaders_ready, + ACTION_PWRON); + backend->mcu_state = + KBASE_MCU_HCTL_SHADERS_PEND_ON; + } else { + backend->mcu_state = + KBASE_MCU_HCTL_SHADERS_PEND_ON; + } + break; + + case KBASE_MCU_ON_CORE_ATTR_UPDATE_PEND: + if (kbase_csf_firmware_core_attr_updated(kbdev)) { + backend->shaders_avail = + backend->shaders_desired_mask; + backend->mcu_state = KBASE_MCU_ON; + } + break; + + case KBASE_MCU_ON_HWCNT_DISABLE: + if (kbase_pm_is_mcu_desired(kbdev)) { + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + break; + } + + backend->hwcnt_desired = false; + if (!backend->hwcnt_disabled) + kbase_pm_trigger_hwcnt_disable(kbdev); + + if (backend->hwcnt_disabled) + backend->mcu_state = KBASE_MCU_ON_HALT; + break; + + case KBASE_MCU_ON_HALT: + if (!kbase_pm_is_mcu_desired(kbdev)) { + kbase_csf_firmware_trigger_mcu_halt(kbdev); + backend->mcu_state = KBASE_MCU_ON_PEND_HALT; + } else + backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE; + break; + + case KBASE_MCU_ON_PEND_HALT: + if (kbase_csf_firmware_mcu_halted(kbdev)) { + if (kbdev->csf.firmware_hctl_core_pwr) + backend->mcu_state = + KBASE_MCU_HCTL_SHADERS_READY_OFF; + else + backend->mcu_state = KBASE_MCU_POWER_DOWN; + } + break; + + case KBASE_MCU_HCTL_SHADERS_READY_OFF: + kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, + shaders_ready, ACTION_PWROFF); + backend->mcu_state = + KBASE_MCU_HCTL_SHADERS_PEND_OFF; + break; + + case KBASE_MCU_HCTL_SHADERS_PEND_OFF: + if (!shaders_trans && !shaders_ready) { + backend->pm_shaders_core_mask = 0; + backend->mcu_state = KBASE_MCU_POWER_DOWN; + } + break; + + case KBASE_MCU_POWER_DOWN: + kbase_csf_firmware_disable_mcu(kbdev); + backend->mcu_state = KBASE_MCU_PEND_OFF; + break; + + case KBASE_MCU_PEND_OFF: + /* wait synchronously for the MCU to get disabled */ + kbase_csf_firmware_disable_mcu_wait(kbdev); + backend->mcu_state = KBASE_MCU_OFF; + break; + + case KBASE_MCU_RESET_WAIT: + /* Reset complete */ + if (!backend->in_reset) + backend->mcu_state = KBASE_MCU_OFF; + break; + + default: + WARN(1, "Invalid state in mcu_state: %d", + backend->mcu_state); + } + + if (backend->mcu_state != prev_state) + dev_dbg(kbdev->dev, "MCU state transition: %s to %s\n", + kbase_mcu_state_to_string(prev_state), + kbase_mcu_state_to_string(backend->mcu_state)); + + } while (backend->mcu_state != prev_state); + + return 0; +} +#endif + static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) { const char *const strings[] = { @@ -539,8 +855,10 @@ static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state) static int kbase_pm_l2_update_state(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; - u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present; + u64 l2_present = kbdev->gpu_props.curr_config.l2_present; +#if !MALI_USE_CSF u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present; +#endif enum kbase_l2_core_state prev_state; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -551,35 +869,49 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) KBASE_PM_CORE_L2); u64 l2_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2); + +#if !MALI_USE_CSF u64 tiler_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_TILER); u64 tiler_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); +#endif /* * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ - if (kbase_is_gpu_lost(kbdev)) - return -EIO; + if (kbase_is_gpu_removed(kbdev) +#ifdef CONFIG_MALI_ARBITER_SUPPORT + || kbase_pm_is_gpu_lost(kbdev)) { +#else + ) { +#endif + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_OFF; + backend->l2_state = KBASE_L2_OFF; + dev_dbg(kbdev->dev, "GPU lost has occurred - L2 off\n"); + break; + } /* mask off ready from trans in case transitions finished * between the register reads */ l2_trans &= ~l2_ready; +#if !MALI_USE_CSF tiler_trans &= ~tiler_ready; - +#endif prev_state = backend->l2_state; switch (backend->l2_state) { case KBASE_L2_OFF: if (kbase_pm_is_l2_desired(kbdev)) { /* - * Set the desired config for L2 before powering - * it on + * Set the desired config for L2 before + * powering it on */ kbase_pm_l2_config_override(kbdev); - +#if !MALI_USE_CSF /* L2 is required, power on. Powering on the * tiler will also power the first L2 cache. */ @@ -593,14 +925,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, l2_present & ~1, ACTION_PWRON); +#else + /* With CSF firmware, Host driver doesn't need to + * handle power management with both shader and tiler cores. + * The CSF firmware will power up the cores appropriately. + * So only power the l2 cache explicitly. + */ + kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2, + l2_present, ACTION_PWRON); +#endif backend->l2_state = KBASE_L2_PEND_ON; } break; case KBASE_L2_PEND_ON: +#if !MALI_USE_CSF if (!l2_trans && l2_ready == l2_present && !tiler_trans && tiler_ready == tiler_present) { - KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready); + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, + tiler_ready); +#else + if (!l2_trans && l2_ready == l2_present) { + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, + l2_ready); +#endif /* * Ensure snoops are enabled after L2 is powered * up. Note that kbase keeps track of the snoop @@ -658,22 +1006,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) break; case KBASE_L2_ON_HWCNT_ENABLE: +#if !MALI_USE_CSF backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); backend->hwcnt_disabled = false; } +#endif backend->l2_state = KBASE_L2_ON; break; case KBASE_L2_ON: if (!kbase_pm_is_l2_desired(kbdev)) { +#if !MALI_USE_CSF /* Do not power off L2 until the shaders and * core stacks are off. */ if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) break; +#else + /* Do not power off L2 until the MCU has been stopped */ + if (backend->mcu_state != KBASE_MCU_OFF) + break; +#endif /* We need to make sure hardware counters are * disabled before powering down the L2, to @@ -690,6 +1046,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) break; case KBASE_L2_ON_HWCNT_DISABLE: +#if !MALI_USE_CSF /* If the L2 became desired while we were waiting on the * worker to do the actual hwcnt disable (which might * happen if some work was submitted immediately after @@ -719,6 +1076,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) if (!backend->hwcnt_disabled) { kbase_pm_trigger_hwcnt_disable(kbdev); } +#endif if (backend->hwcnt_disabled) { if (kbdev->pm.backend.gpu_clock_slow_down_wa) @@ -769,9 +1127,11 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev) */ kbase_gpu_start_cache_clean_nolock( kbdev); - +#if !MALI_USE_CSF KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u); - +#else + KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u); +#endif backend->l2_state = KBASE_L2_PEND_OFF; break; @@ -877,6 +1237,7 @@ static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev) } } +#if !MALI_USE_CSF static const char *kbase_shader_core_state_to_string( enum kbase_shader_core_state state) { @@ -898,7 +1259,6 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) &kbdev->pm.backend.shader_tick_timer; enum kbase_shader_core_state prev_state; u64 stacks_avail = 0; - int err = 0; lockdep_assert_held(&kbdev->hwaccess_lock); @@ -924,8 +1284,15 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores * are vulnerable to corruption if gpu is lost */ - if (kbase_is_gpu_lost(kbdev)) { - err = -EIO; + if (kbase_is_gpu_removed(kbdev) +#ifdef CONFIG_MALI_ARBITER_SUPPORT + || kbase_pm_is_gpu_lost(kbdev)) { +#else + ) { +#endif + backend->shaders_state = + KBASE_SHADERS_OFF_CORESTACK_OFF; + dev_dbg(kbdev->dev, "GPU lost has occurred - shaders off\n"); break; } @@ -976,6 +1343,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER, backend->shaders_avail, ACTION_PWRON); + if (backend->pm_current_policy && + backend->pm_current_policy->handle_event) + backend->pm_current_policy->handle_event( + kbdev, + KBASE_PM_POLICY_EVENT_POWER_ON); + backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON; } break; @@ -986,8 +1359,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->pm_shaders_core_mask = shaders_ready; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { +#if MALI_USE_CSF + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, + &flags); +#endif kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, + flags); +#endif backend->hwcnt_disabled = false; } @@ -1018,6 +1401,12 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) /* Wait for being disabled */ ; } else if (!backend->shaders_desired) { + if (backend->pm_current_policy && + backend->pm_current_policy->handle_event) + backend->pm_current_policy->handle_event( + kbdev, + KBASE_PM_POLICY_EVENT_IDLE); + if (kbdev->pm.backend.protected_transition_override || #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_is_suspending(kbdev) || @@ -1078,9 +1467,21 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) } if (backend->shaders_desired) { + if (backend->pm_current_policy && + backend->pm_current_policy->handle_event) + backend->pm_current_policy->handle_event( + kbdev, + KBASE_PM_POLICY_EVENT_TIMER_HIT); + stt->remaining_ticks = 0; backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK; } else if (stt->remaining_ticks == 0) { + if (backend->pm_current_policy && + backend->pm_current_policy->handle_event) + backend->pm_current_policy->handle_event( + kbdev, + KBASE_PM_POLICY_EVENT_TIMER_MISS); + backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON; #ifdef CONFIG_MALI_ARBITER_SUPPORT } else if (kbase_pm_is_suspending(kbdev) || @@ -1167,8 +1568,18 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) backend->pm_shaders_core_mask = 0; backend->hwcnt_desired = true; if (backend->hwcnt_disabled) { +#if MALI_USE_CSF + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, + &flags); +#endif kbase_hwcnt_context_enable( kbdev->hwcnt_gpu_ctx); +#if MALI_USE_CSF + kbase_csf_scheduler_spin_unlock(kbdev, + flags); +#endif backend->hwcnt_disabled = false; } backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF; @@ -1195,8 +1606,9 @@ static int kbase_pm_shaders_update_state(struct kbase_device *kbdev) } while (backend->shaders_state != prev_state); - return err; + return 0; } +#endif static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) { @@ -1211,12 +1623,21 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.l2_state != KBASE_L2_OFF) in_desired_state = false; +#if !MALI_USE_CSF if (kbdev->pm.backend.shaders_desired && kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON) in_desired_state = false; else if (!kbdev->pm.backend.shaders_desired && kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF) in_desired_state = false; +#else + if (kbase_pm_is_mcu_desired(kbdev) && + kbdev->pm.backend.mcu_state != KBASE_MCU_ON) + in_desired_state = false; + else if (!kbase_pm_is_mcu_desired(kbdev) && + kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) + in_desired_state = false; +#endif return in_desired_state; } @@ -1280,17 +1701,22 @@ static void kbase_pm_trace_power_state(struct kbase_device *kbdev) void kbase_pm_update_state(struct kbase_device *kbdev) { +#if !MALI_USE_CSF enum kbase_shader_core_state prev_shaders_state = kbdev->pm.backend.shaders_state; +#else + enum kbase_mcu_state prev_mcu_state = kbdev->pm.backend.mcu_state; +#endif lockdep_assert_held(&kbdev->hwaccess_lock); - if (!kbdev->pm.backend.gpu_powered) - return; /* Do nothing if the GPU is off */ + if (!kbdev->pm.backend.gpu_ready) + return; /* Do nothing if the GPU is not ready */ if (kbase_pm_l2_update_state(kbdev)) return; +#if !MALI_USE_CSF if (kbase_pm_shaders_update_state(kbdev)) return; @@ -1304,9 +1730,20 @@ void kbase_pm_update_state(struct kbase_device *kbdev) if (kbase_pm_l2_update_state(kbdev)) return; } +#else + if (kbase_pm_mcu_update_state(kbdev)) + return; + + if (prev_mcu_state != KBASE_MCU_OFF && + kbdev->pm.backend.mcu_state == KBASE_MCU_OFF) { + if (kbase_pm_l2_update_state(kbdev)) + return; + } +#endif if (kbase_pm_is_in_desired_state_nolock(kbdev)) { - KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, kbdev->pm.backend.shaders_avail); + KBASE_KTRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, + kbdev->pm.backend.shaders_avail); kbase_pm_trace_power_state(kbdev); @@ -1363,7 +1800,8 @@ int kbase_pm_state_machine_init(struct kbase_device *kbdev) hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stt->timer.function = shader_tick_timer_callback; stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); - stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + stt->default_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + stt->configured_ticks = stt->default_ticks; return 0; } @@ -1382,7 +1820,19 @@ void kbase_pm_reset_start_locked(struct kbase_device *kbdev) backend->in_reset = true; backend->l2_state = KBASE_L2_RESET_WAIT; +#if !MALI_USE_CSF backend->shaders_state = KBASE_SHADERS_RESET_WAIT; +#else + /* MCU state machine is exercised only after the initial load/boot + * of the firmware. + */ + if (likely(kbdev->csf.firmware_inited)) { + backend->mcu_state = KBASE_MCU_RESET_WAIT; + kbdev->csf.firmware_reload_needed = true; + } else { + WARN_ON(backend->mcu_state != KBASE_MCU_OFF); + } +#endif /* We're in a reset, so hwcnt will have been synchronously disabled by * this function's caller as part of the reset process. We therefore @@ -1422,15 +1872,28 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) /* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has * aborted due to a fatal signal. If the time spent waiting has exceeded this - * threshold then there is most likely a hardware issue. */ -#define PM_TIMEOUT (5*HZ) /* 5s */ + * threshold then there is most likely a hardware issue. + */ +#define PM_TIMEOUT_MS (5000) /* 5s */ static void kbase_pm_timed_out(struct kbase_device *kbdev) { + unsigned long flags; + dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); +#if !MALI_USE_CSF + CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); dev_err(kbdev->dev, "\tShader=%016llx\n", kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); +#else + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + dev_err(kbdev->dev, "\tMCU desired = %d\n", + kbase_pm_is_mcu_desired(kbdev)); + dev_err(kbdev->dev, "\tMCU sw state = %d\n", + kbdev->pm.backend.mcu_state); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif dev_err(kbdev->dev, "Current state :\n"); dev_err(kbdev->dev, "\tShader=%08x%08x\n", kbase_reg_read(kbdev, @@ -1447,6 +1910,10 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) GPU_CONTROL_REG(L2_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_LO))); +#if MALI_USE_CSF + dev_err(kbdev->dev, "\tMCU status = %d\n", + kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); +#endif dev_err(kbdev->dev, "Cores transitioning :\n"); dev_err(kbdev->dev, "\tShader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( @@ -1465,49 +1932,87 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) L2_PWRTRANS_LO))); dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } -void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) +int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) { unsigned long flags; unsigned long timeout; - int err; + long remaining; + int err = 0; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - timeout = jiffies + PM_TIMEOUT; +#if MALI_USE_CSF + timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS); +#else + timeout = msecs_to_jiffies(PM_TIMEOUT_MS); +#endif /* Wait for cores */ - err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state_with_l2_powered(kbdev)); +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + remaining = wait_event_killable_timeout( +#else + remaining = wait_event_timeout( +#endif + kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state_with_l2_powered(kbdev), timeout); - if (err < 0 && time_after(jiffies, timeout)) + if (!remaining) { kbase_pm_timed_out(kbdev); + err = -ETIMEDOUT; + } else if (remaining < 0) { + dev_info( + kbdev->dev, + "Wait for desired PM state with L2 powered got interrupted"); + err = (int)remaining; + } + + return err; } -void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) { unsigned long flags; - unsigned long timeout; - int err; + long remaining; +#if MALI_USE_CSF + long timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS); +#else + long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); +#endif + int err = 0; /* Let the state machine latch the most recent desired state. */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - timeout = jiffies + PM_TIMEOUT; - /* Wait for cores */ - err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev)); +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + remaining = wait_event_killable_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), timeout); +#else + remaining = wait_event_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), timeout); +#endif - if (err < 0 && time_after(jiffies, timeout)) + if (!remaining) { kbase_pm_timed_out(kbdev); + err = -ETIMEDOUT; + } else if (remaining < 0) { + dev_info(kbdev->dev, + "Wait for desired PM state got interrupted"); + err = (int)remaining; + } + + return err; } KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); @@ -1515,7 +2020,7 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; - KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(kbdev != NULL); /* * Clear all interrupts, * and unmask them all. @@ -1529,14 +2034,19 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); +#if MALI_USE_CSF + /* Enable only the Page fault bits part */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); +#else kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); +#endif } KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(kbdev != NULL); /* * Mask all interrupts, * and clear them all. @@ -1563,6 +2073,23 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); +#if MALI_USE_CSF +static void update_user_reg_page_mapping(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->csf.mali_file_inode) { + /* This would zap the pte corresponding to the mapping of User + * register page for all the Kbase contexts. + */ + unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping, + BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, + PAGE_SIZE, 1); + } +} +#endif + + /* * pmu layout: * 0x0000: PMU TAG (RO) (0xCAFECAFE) @@ -1574,10 +2101,20 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) bool reset_required = is_resume; unsigned long flags; - KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(kbdev != NULL); +#if !MALI_USE_CSF lockdep_assert_held(&kbdev->js_data.runpool_mutex); +#endif /* !MALI_USE_CSF */ lockdep_assert_held(&kbdev->pm.lock); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (WARN_ON(kbase_pm_is_gpu_lost(kbdev))) { + dev_err(kbdev->dev, + "%s: Cannot power up while GPU lost", __func__); + return; + } +#endif + if (kbdev->pm.backend.gpu_powered) { /* Already turned on */ if (kbdev->poweroff_pending) @@ -1602,11 +2139,40 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) kbdev->pm.backend.gpu_powered = true; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + /* GPU has been turned on, can switch to actual register page */ + update_user_reg_page_mapping(kbdev); +#endif + if (reset_required) { /* GPU state was lost, reset GPU to ensure it is in a - * consistent state */ + * consistent state + */ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + else { + if (kbdev->arb.arb_if) { + struct kbase_arbiter_vm_state *arb_vm_state = + kbdev->pm.arb_vm_state; + + /* In the case that the GPU has just been granted by + * the Arbiter, a reset will have already been done. + * However, it is still necessary to initialize the GPU. + */ + if (arb_vm_state->vm_arb_starting) + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS | + PM_NO_RESET); + } + } + /* + * This point means that the GPU trasitioned to ON. So there is a chance + * that a repartitioning occurred. In this case the current config + * should be read again. + */ + kbase_gpuprops_get_curr_config_props(kbdev, + &kbdev->gpu_props.curr_config); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1628,7 +2194,19 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) /* Turn on the L2 caches */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.gpu_ready = true; kbdev->pm.backend.l2_desired = true; +#if MALI_USE_CSF + if (reset_required) { + /* GPU reset was done after the power on, so send the post + * reset event instead. This is okay as GPU power off event + * is same as pre GPU reset event. + */ + kbase_ipa_control_handle_gpu_reset_post(kbdev); + } else { + kbase_ipa_control_handle_gpu_power_on(kbdev); + } +#endif kbase_pm_update_state(kbdev); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } @@ -1639,7 +2217,7 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) { unsigned long flags; - KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(kbdev != NULL); lockdep_assert_held(&kbdev->pm.lock); /* ASSERT that the cores should now be unavailable. No lock needed. */ @@ -1663,16 +2241,38 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev) if (atomic_read(&kbdev->faults_pending)) { /* Page/bus faults are still being processed. The GPU can not - * be powered off until they have completed */ + * be powered off until they have completed + */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return false; } kbase_pm_cache_snoop_disable(kbdev); +#if MALI_USE_CSF + kbase_ipa_control_handle_gpu_power_off(kbdev); +#endif + + kbdev->pm.backend.gpu_ready = false; /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbase_pm_is_gpu_lost(kbdev)) { + /* Ensure we unblock any threads that are stuck waiting + * for the GPU + */ + kbase_gpu_cache_clean_wait_complete(kbdev); + } +#endif + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + +#if MALI_USE_CSF + /* GPU is about to be turned off, switch to dummy page */ + update_user_reg_page_mapping(kbdev); +#endif + #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ @@ -1720,19 +2320,23 @@ static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) struct kbasep_reset_timeout_data *rtdata = container_of(timer, struct kbasep_reset_timeout_data, timer); - rtdata->timed_out = 1; + rtdata->timed_out = true; /* Set the wait queue to wake up kbase_pm_init_hw even though the reset - * hasn't completed */ + * hasn't completed + */ kbase_pm_reset_done(rtdata->kbdev); return HRTIMER_NORESTART; } -static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) +static int kbase_set_gpu_quirks(struct kbase_device *kbdev, const u32 prod_id) { - u32 hw_quirks_jm = kbase_reg_read(kbdev, - GPU_CONTROL_REG(JM_CONFIG)); +#if MALI_USE_CSF + kbdev->hw_quirks_gpu = + kbase_reg_read(kbdev, GPU_CONTROL_REG(CSF_CONFIG)); +#else + u32 hw_quirks_gpu = kbase_reg_read(kbdev, GPU_CONTROL_REG(JM_CONFIG)); if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) { /* Only for tMIx */ @@ -1746,38 +2350,38 @@ static int kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id) */ if (coherency_features == COHERENCY_FEATURE_BIT(COHERENCY_ACE)) { - hw_quirks_jm |= (COHERENCY_ACE_LITE | - COHERENCY_ACE) << - JM_FORCE_COHERENCY_FEATURES_SHIFT; + hw_quirks_gpu |= (COHERENCY_ACE_LITE | COHERENCY_ACE) + << JM_FORCE_COHERENCY_FEATURES_SHIFT; } } - if (kbase_is_gpu_lost(kbdev)) + if (kbase_is_gpu_removed(kbdev)) return -EIO; - kbdev->hw_quirks_jm = hw_quirks_jm; + kbdev->hw_quirks_gpu = hw_quirks_gpu; +#endif /* !MALI_USE_CSF */ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) { int default_idvs_group_size = 0xF; - u32 tmp; + u32 group_size = 0; - if (of_property_read_u32(kbdev->dev->of_node, - "idvs-group-size", &tmp)) - tmp = default_idvs_group_size; + if (of_property_read_u32(kbdev->dev->of_node, "idvs-group-size", + &group_size)) + group_size = default_idvs_group_size; - if (tmp > IDVS_GROUP_MAX_SIZE) { + if (group_size > IDVS_GROUP_MAX_SIZE) { dev_err(kbdev->dev, "idvs-group-size of %d is too large. Maximum value is %d", - tmp, IDVS_GROUP_MAX_SIZE); - tmp = default_idvs_group_size; + group_size, IDVS_GROUP_MAX_SIZE); + group_size = default_idvs_group_size; } - kbdev->hw_quirks_jm |= tmp << IDVS_GROUP_SIZE_SHIFT; + kbdev->hw_quirks_gpu |= group_size << IDVS_GROUP_SIZE_SHIFT; } #define MANUAL_POWER_CONTROL ((u32)(1 << 8)) if (corestack_driver_control) - kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL; + kbdev->hw_quirks_gpu |= MANUAL_POWER_CONTROL; return 0; } @@ -1787,7 +2391,7 @@ static int kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id) u32 hw_quirks_sc = kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)); - if (kbase_is_gpu_lost(kbdev)) + if (kbase_is_gpu_removed(kbdev)) return -EIO; if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ @@ -1811,7 +2415,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev) u32 hw_quirks_tiler = kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)); - if (kbase_is_gpu_lost(kbdev)) + if (kbase_is_gpu_removed(kbdev)) return -EIO; /* Set tiler clock gate override if required */ @@ -1831,18 +2435,17 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) GPU_ID_VERSION_PRODUCT_ID_SHIFT; int error = 0; - kbdev->hw_quirks_jm = 0; + kbdev->hw_quirks_gpu = 0; kbdev->hw_quirks_sc = 0; kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_jm", - &kbdev->hw_quirks_jm)) { + if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { dev_info(kbdev->dev, - "Found quirks_jm = [0x%x] in Devicetree\n", - kbdev->hw_quirks_jm); + "Found quirks_gpu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_gpu); } else { - error = kbase_set_jm_quirks(kbdev, prod_id); + error = kbase_set_gpu_quirks(kbdev, prod_id); if (error) return error; } @@ -1891,15 +2494,20 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), kbdev->hw_quirks_mmu); +#if MALI_USE_CSF + kbase_reg_write(kbdev, GPU_CONTROL_REG(CSF_CONFIG), + kbdev->hw_quirks_gpu); +#else kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG), - kbdev->hw_quirks_jm); + kbdev->hw_quirks_gpu); +#endif } void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) { if ((kbdev->current_gpu_coherency_mode == COHERENCY_ACE) && !kbdev->cci_snoop_enabled) { -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) if (kbdev->snoop_enable_smc != 0) kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); #endif /* CONFIG_ARM64 */ @@ -1911,7 +2519,7 @@ void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) { if (kbdev->cci_snoop_enabled) { -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) if (kbdev->snoop_disable_smc != 0) { mali_cci_flush_l2(kbdev); kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); @@ -1922,6 +2530,7 @@ void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) } } +#if !MALI_USE_CSF static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) { unsigned long irq_flags; @@ -1934,6 +2543,7 @@ static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev) } spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } +#endif static int kbase_pm_do_reset(struct kbase_device *kbdev) { @@ -1960,7 +2570,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Initialize a structure for tracking the status of the reset */ rtdata.kbdev = kbdev; - rtdata.timed_out = 0; + rtdata.timed_out = false; /* Create a timer to use as a timeout on the reset */ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1972,7 +2582,7 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) /* Wait for the RESET_COMPLETED interrupt to be raised */ kbase_pm_wait_for_reset(kbdev); - if (rtdata.timed_out == 0) { + if (!rtdata.timed_out) { /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); @@ -1980,46 +2590,60 @@ static int kbase_pm_do_reset(struct kbase_device *kbdev) } /* No interrupt has been received - check if the RAWSTAT register says - * the reset has completed */ + * the reset has completed + */ if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & - RESET_COMPLETED) - || kbase_is_gpu_lost(kbdev)) { + RESET_COMPLETED)) { /* The interrupt is set in the RAWSTAT; this suggests that the - * interrupts are not getting to the CPU */ + * interrupts are not getting to the CPU + */ dev_err(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); /* If interrupts aren't working we can't continue. */ destroy_hrtimer_on_stack(&rtdata.timer); return -EINVAL; } + if (kbase_is_gpu_removed(kbdev)) { + dev_dbg(kbdev->dev, "GPU has been removed, reset no longer needed.\n"); + destroy_hrtimer_on_stack(&rtdata.timer); + return -EINVAL; + } + /* The GPU doesn't seem to be responding to the reset so try a hard - * reset */ - dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", - RESET_TIMEOUT); - KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_HARD_RESET); + * reset, but only when NOT in arbitration mode. + */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (!kbdev->arb.arb_if) { +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); + KBASE_KTRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET); - /* Restart the timer to wait for the hard reset to complete */ - rtdata.timed_out = 0; + /* Restart the timer to wait for the hard reset to complete */ + rtdata.timed_out = false; - hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), - HRTIMER_MODE_REL); + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); - /* Wait for the RESET_COMPLETED interrupt to be raised */ - kbase_pm_wait_for_reset(kbdev); + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); - if (rtdata.timed_out == 0) { - /* GPU has been reset */ - hrtimer_cancel(&rtdata.timer); - destroy_hrtimer_on_stack(&rtdata.timer); - return 0; - } + if (!rtdata.timed_out) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + return 0; + } - destroy_hrtimer_on_stack(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); - dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", - RESET_TIMEOUT); + dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", + RESET_TIMEOUT); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ return -EINVAL; } @@ -2041,9 +2665,9 @@ int kbase_pm_protected_mode_disable(struct kbase_device *const kbdev) int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { unsigned long irq_flags; - int err; + int err = 0; - KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(kbdev != NULL); lockdep_assert_held(&kbdev->pm.lock); /* Ensure the clock is on before attempting to access the hardware */ @@ -2055,7 +2679,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) } /* Ensure interrupts are off to begin with, this also clears any - * outstanding interrupts */ + * outstanding interrupts + */ kbase_pm_disable_interrupts(kbdev); /* Ensure cache snoops are disabled before reset. */ kbase_pm_cache_snoop_disable(kbdev); @@ -2069,10 +2694,24 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); /* Soft reset the GPU */ - err = kbdev->protected_ops->protected_mode_disable( - kbdev->protected_dev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (!(flags & PM_NO_RESET)) +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + err = kbdev->protected_ops->protected_mode_disable( + kbdev->protected_dev); spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); +#if MALI_USE_CSF + if (kbdev->protected_mode) { + unsigned long flags; + + kbase_ipa_control_protm_exited(kbdev); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + } +#endif kbdev->protected_mode = false; spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); @@ -2093,7 +2732,8 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) GPU_STATUS_PROTECTED_MODE_ACTIVE); /* If cycle counter was in use re-enable it, enable_irqs will only be - * false when called from kbase_pm_powerup */ + * false when called from kbase_pm_powerup + */ if (kbdev->pm.backend.gpu_cycle_counter_requests && (flags & PM_ENABLE_IRQS)) { kbase_pm_enable_interrupts(kbdev); @@ -2116,12 +2756,14 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) kbase_pm_enable_interrupts(kbdev); exit: +#if !MALI_USE_CSF if (!kbdev->pm.backend.protected_entry_transition_override) { /* Re-enable GPU hardware counters if we're resetting from * protected mode. */ reenable_protected_mode_hwcnt(kbdev); } +#endif return err; } @@ -2148,12 +2790,21 @@ kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, flags); - ++kbdev->pm.backend.gpu_cycle_counter_requests; - if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) + if (kbdev->pm.backend.gpu_cycle_counter_requests == 1) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_START); + else { + /* This might happen after GPU reset. + * Then counter needs to be kicked. + */ + if (!(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_CYCLE_COUNT_ACTIVE)) { + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START); + } + } spin_unlock_irqrestore( &kbdev->pm.backend.gpu_cycle_counter_requests_lock, @@ -2169,6 +2820,8 @@ void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < INT_MAX); + kbase_pm_wait_for_l2_powered(kbdev); + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); } @@ -2203,7 +2856,7 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev) --kbdev->pm.backend.gpu_cycle_counter_requests; - if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) + if (kbdev->pm.backend.gpu_cycle_counter_requests == 0) kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_CYCLE_COUNT_STOP); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index 95f10e0..70d009e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Power management API definitions used internally by GPU backend */ @@ -31,7 +28,7 @@ #include -#include "mali_kbase_pm_ca.h" +#include "backend/gpu/mali_kbase_pm_ca.h" #include "mali_kbase_pm_policy.h" @@ -205,6 +202,30 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); */ void kbase_pm_reset_done(struct kbase_device *kbdev); +#if MALI_USE_CSF +/** + * kbase_pm_wait_for_desired_state - Wait for the desired power state to be + * reached + * + * Wait for the L2 and MCU state machines to reach the states corresponding + * to the values of 'kbase_pm_is_l2_desired' and 'kbase_pm_is_mcu_desired'. + * + * The usual use-case for this is to ensure that all parts of GPU have been + * powered up after performing a GPU Reset. + * + * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock, + * because this function will take that lock itself. + * + * NOTE: This may not wait until the correct state is reached if there is a + * power off in progress and kbase_pm_context_active() was called instead of + * kbase_csf_scheduler_pm_active(). + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success, error code on error + */ +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); +#else /** * kbase_pm_wait_for_desired_state - Wait for the desired power state to be * reached @@ -224,15 +245,17 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * kbase_pm_wait_for_poweroff_complete() * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success, error code on error */ -void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); +#endif /** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * - * Wait for the L2 to be powered on, and for the L2 and shader state machines to - * stabilise by reaching the states corresponding to the values of 'l2_desired' - * and 'shaders_desired'. + * Wait for the L2 to be powered on, and for the L2 and the state machines of + * its dependent stack components to stabilise. * * kbdev->pm.active_count must be non-zero when calling this function. * @@ -240,8 +263,10 @@ void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); * because this function will take that lock itself. * * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 on success, error code on error */ -void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); +int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev); /** * kbase_pm_update_dynamic_cores_onoff - Update the L2 and shader power state @@ -467,7 +492,8 @@ void kbase_pm_register_access_enable(struct kbase_device *kbdev); void kbase_pm_register_access_disable(struct kbase_device *kbdev); /* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline - * function */ + * function + */ /** * kbase_pm_metrics_is_active - Check if the power management metrics @@ -511,8 +537,22 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, #ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +/** + * kbase_platform_dvfs_event - Report utilisation to DVFS code for CSF GPU + * + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @kbdev: The kbase device structure for the device (must be a + * valid pointer) + * @utilisation: The current calculated utilisation by the metrics system. + * Return: Returns 0 on failure and non zero on success. + */ +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation); +#else /** - * kbase_platform_dvfs_event - Report utilisation to DVFS code + * kbase_platform_dvfs_event - Report utilisation to DVFS code for JM GPU * * Function provided by platform specific code when DVFS is enabled to allow * the power management metrics system to report utilisation. @@ -525,11 +565,12 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, * group. * Return: Returns 0 on failure and non zero on success. */ - int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, - u32 util_gl_share, u32 util_cl_share[2]); + u32 util_gl_share, u32 util_cl_share[2]); #endif +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + void kbase_pm_power_changed(struct kbase_device *kbdev); /** @@ -683,6 +724,72 @@ extern bool corestack_driver_control; */ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); +#if MALI_USE_CSF +/** + * kbase_pm_is_mcu_desired - Check whether MCU is desired + * + * @kbdev: Device pointer + * + * This shall be called to check whether MCU needs to be enabled. + * + * Return: true if MCU needs to be enabled. + */ +bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev); + +/** + * kbase_pm_idle_groups_sched_suspendable - Check whether the scheduler can be + * suspended to low power state when all + * the CSGs are idle + * + * @kbdev: Device pointer + * + * Return: true if allowed to enter the suspended state. + */ +static inline +bool kbase_pm_idle_groups_sched_suspendable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return !(kbdev->pm.backend.csf_pm_sched_flags & + CSF_DYNAMIC_PM_SCHED_IGNORE_IDLE); +} + +/** + * kbase_pm_no_runnables_sched_suspendable - Check whether the scheduler can be + * suspended to low power state when + * there are no runnable CSGs. + * + * @kbdev: Device pointer + * + * Return: true if allowed to enter the suspended state. + */ +static inline +bool kbase_pm_no_runnables_sched_suspendable(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return !(kbdev->pm.backend.csf_pm_sched_flags & + CSF_DYNAMIC_PM_SCHED_NO_SUSPEND); +} + +/** + * kbase_pm_no_mcu_core_pwroff - Check whether the PM is required to keep the + * MCU core powered in accordance to the active + * power management policy + * + * @kbdev: Device pointer + * + * Return: true if the MCU is to retain powered. + */ +static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return kbdev->pm.backend.csf_pm_sched_flags & + CSF_DYNAMIC_PM_CORE_KEEP_ON; +} +#endif + /** * kbase_pm_lock - Lock all necessary mutexes to perform PM actions * @@ -692,7 +799,9 @@ bool kbase_pm_is_l2_desired(struct kbase_device *kbdev); */ static inline void kbase_pm_lock(struct kbase_device *kbdev) { +#if !MALI_USE_CSF mutex_lock(&kbdev->js_data.runpool_mutex); +#endif /* !MALI_USE_CSF */ mutex_lock(&kbdev->pm.lock); } @@ -704,7 +813,9 @@ static inline void kbase_pm_lock(struct kbase_device *kbdev) static inline void kbase_pm_unlock(struct kbase_device *kbdev) { mutex_unlock(&kbdev->pm.lock); +#if !MALI_USE_CSF mutex_unlock(&kbdev->js_data.runpool_mutex); +#endif /* !MALI_USE_CSF */ } #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h index 12cb051..ef72f60 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -25,6 +24,19 @@ * The function-like macro KBASEP_L2_STATE() must be defined before including * this header file. This header file can be included multiple times in the * same compilation unit with different definitions of KBASEP_L2_STATE(). + * + * @OFF: The L2 cache and tiler are off + * @PEND_ON: The L2 cache and tiler are powering on + * @RESTORE_CLOCKS: The GPU clock is restored. Conditionally used. + * @ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being enabled + * @ON: The L2 cache and tiler are on, and hwcnt is enabled + * @ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being disabled + * @SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest clock. + * Conditionally used. + * @POWER_DOWN: The L2 cache and tiler are about to be powered off + * @PEND_OFF: The L2 cache and tiler are powering off + * @RESET_WAIT: The GPU is resetting, L2 cache and tiler power state are + * unknown */ KBASEP_L2_STATE(OFF) KBASEP_L2_STATE(PEND_ON) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h new file mode 100644 index 0000000..4e99928 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_mcu_states.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Backend-specific Power Manager MCU state definitions. + * The function-like macro KBASEP_MCU_STATE() must be defined before including + * this header file. This header file can be included multiple times in the + * same compilation unit with different definitions of KBASEP_MCU_STATE(). + * + * @OFF: The MCU is powered off. + * @PEND_ON_RELOAD: The warm boot of MCU or cold boot of MCU (with + * firmware reloading) is in progress. + * @ON_GLB_REINIT_PEND: The MCU is enabled and Global configuration + * requests have been sent to the firmware. + * @ON_HWCNT_ENABLE: The Global requests have completed and MCU is now + * ready for use and hwcnt is being enabled. + * @ON: The MCU is active and hwcnt has been enabled. + * @ON_CORE_ATTR_UPDATE_PEND: The MCU is active and mask of enabled shader cores + * is being updated. + * @ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled. + * @ON_HALT: The MCU is on and hwcnt has been disabled, MCU + * halt would be triggered. + * @ON_PEND_HALT: MCU halt in progress, confirmation pending. + * @POWER_DOWN: MCU halted operations, pending being disabled. + * @PEND_OFF: MCU is being disabled, pending on powering off. + * @RESET_WAIT: The GPU is resetting, MCU state is unknown. + */ +KBASEP_MCU_STATE(OFF) +KBASEP_MCU_STATE(PEND_ON_RELOAD) +KBASEP_MCU_STATE(ON_GLB_REINIT_PEND) +KBASEP_MCU_STATE(ON_HWCNT_ENABLE) +KBASEP_MCU_STATE(ON) +KBASEP_MCU_STATE(ON_CORE_ATTR_UPDATE_PEND) +KBASEP_MCU_STATE(ON_HWCNT_DISABLE) +KBASEP_MCU_STATE(ON_HALT) +KBASEP_MCU_STATE(ON_PEND_HALT) +KBASEP_MCU_STATE(POWER_DOWN) +KBASEP_MCU_STATE(PEND_OFF) +KBASEP_MCU_STATE(RESET_WAIT) +/* Additional MCU states with HOST_CONTROL_SHADERS */ +KBASEP_MCU_STATE(HCTL_SHADERS_PEND_ON) +KBASEP_MCU_STATE(HCTL_CORES_NOTIFY_PEND) +KBASEP_MCU_STATE(HCTL_MCU_ON_RECHECK) +KBASEP_MCU_STATE(HCTL_SHADERS_READY_OFF) +KBASEP_MCU_STATE(HCTL_SHADERS_PEND_OFF) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c index de3babe..69e8dd3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Metrics for power management */ @@ -29,22 +26,28 @@ #include #include #include + +#if MALI_USE_CSF +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include +#else #include +#endif /* !MALI_USE_CSF */ + #include #include -/* When VSync is being hit aim for utilisation between 70-90% */ -#define KBASE_PM_VSYNC_MIN_UTILISATION 70 -#define KBASE_PM_VSYNC_MAX_UTILISATION 90 -/* Otherwise aim for 10-40% */ -#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 -#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 - /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly - * under 11s. Exceeding this will cause overflow */ + * under 11s. Exceeding this will cause overflow + */ #define KBASE_PM_TIME_SHIFT 8 +#if MALI_USE_CSF +/* To get the GPU_ACTIVE value in nano seconds unit */ +#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) +#endif + #ifdef CONFIG_MALI_MIDGARD_DVFS static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) { @@ -71,11 +74,45 @@ static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) int kbasep_pm_metrics_init(struct kbase_device *kbdev) { - KBASE_DEBUG_ASSERT(kbdev != NULL); +#if MALI_USE_CSF + struct kbase_ipa_control_perf_counter perf_counter; + int err; + /* One counter group */ + const size_t NUM_PERF_COUNTERS = 1; + + KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; + kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.values.time_busy = 0; + kbdev->pm.backend.metrics.values.time_idle = 0; + kbdev->pm.backend.metrics.values.time_in_protm = 0; + + perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; + /* Normalize values by GPU frequency */ + perf_counter.gpu_norm = true; + + /* We need the GPU_ACTIVE counter, which is in the CSHW group */ + perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW; + + /* We need the GPU_ACTIVE counter */ + perf_counter.idx = GPU_ACTIVE_CNT_IDX; + + err = kbase_ipa_control_register( + kbdev, &perf_counter, NUM_PERF_COUNTERS, + &kbdev->pm.backend.metrics.ipa_control_client); + if (err) { + dev_err(kbdev->dev, + "Failed to register IPA with kbase_ipa_control: err=%d", + err); + return -1; + } +#else + KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.gpu_active = false; kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; @@ -89,16 +126,25 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev) kbdev->pm.backend.metrics.values.busy_cl[1] = 0; kbdev->pm.backend.metrics.values.busy_gl = 0; +#endif spin_lock_init(&kbdev->pm.backend.metrics.lock); #ifdef CONFIG_MALI_MIDGARD_DVFS hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; - + kbdev->pm.backend.metrics.initialized = true; kbase_pm_metrics_start(kbdev); #endif /* CONFIG_MALI_MIDGARD_DVFS */ +#if MALI_USE_CSF + /* The sanity check on the GPU_ACTIVE performance counter + * is skipped for Juno platforms that have timing problems. + */ + kbdev->pm.backend.metrics.skip_gpu_active_sanity_check = + of_machine_is_compatible("arm,juno"); +#endif + return 0; } KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); @@ -115,7 +161,13 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + kbdev->pm.backend.metrics.initialized = false; #endif /* CONFIG_MALI_MIDGARD_DVFS */ + +#if MALI_USE_CSF + kbase_ipa_control_unregister( + kbdev, kbdev->pm.backend.metrics.ipa_control_client); +#endif } KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); @@ -123,8 +175,117 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this * function */ +#if MALI_USE_CSF +#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) +{ + int err; + u64 gpu_active_counter; + u64 protected_time; + ktime_t now; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + /* Query IPA_CONTROL for the latest GPU-active and protected-time + * info. + */ + err = kbase_ipa_control_query( + kbdev, kbdev->pm.backend.metrics.ipa_control_client, + &gpu_active_counter, 1, &protected_time); + + /* Read the timestamp after reading the GPU_ACTIVE counter value. + * This ensures the time gap between the 2 reads is consistent for + * a meaningful comparison between the increment of GPU_ACTIVE and + * elapsed time. The lock taken inside kbase_ipa_control_query() + * function can cause lot of variation. + */ + now = ktime_get(); + + if (err) { + dev_err(kbdev->dev, + "Failed to query the increment of GPU_ACTIVE counter: err=%d", + err); + } else { + u64 diff_ns; + s64 diff_ns_signed; + u32 ns_time; + ktime_t diff = ktime_sub( + now, kbdev->pm.backend.metrics.time_period_start); + + diff_ns_signed = ktime_to_ns(diff); + + if (diff_ns_signed < 0) + return; + + diff_ns = (u64)diff_ns_signed; + +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) + /* The GPU_ACTIVE counter shouldn't clock-up more time than has + * actually elapsed - but still some margin needs to be given + * when doing the comparison. There could be some drift between + * the CPU and GPU clock. + * + * Can do the check only in a real driver build, as an arbitrary + * value for GPU_ACTIVE can be fed into dummy model in no_mali + * configuration which may not correspond to the real elapsed + * time. + */ + if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { + /* Use a margin value that is approximately 1% of the time + * difference. + */ + u64 margin_ns = diff_ns >> 6; + if (gpu_active_counter > (diff_ns + margin_ns)) { + dev_info( + kbdev->dev, + "GPU activity takes longer than time interval: %llu ns > %llu ns", + (unsigned long long)gpu_active_counter, + (unsigned long long)diff_ns); + } + } +#endif + /* Calculate time difference in units of 256ns */ + ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT); + + /* Add protected_time to gpu_active_counter so that time in + * protected mode is included in the apparent GPU active time, + * then convert it from units of 1ns to units of 256ns, to + * match what JM GPUs use. The assumption is made here that the + * GPU is 100% busy while in protected mode, so we should add + * this since the GPU can't (and thus won't) update these + * counters while it's actually in protected mode. + * + * Perform the add after dividing each value down, to reduce + * the chances of overflows. + */ + protected_time >>= KBASE_PM_TIME_SHIFT; + gpu_active_counter >>= KBASE_PM_TIME_SHIFT; + gpu_active_counter += protected_time; + + /* Ensure the following equations don't go wrong if ns_time is + * slightly larger than gpu_active_counter somehow + */ + gpu_active_counter = MIN(gpu_active_counter, ns_time); + + kbdev->pm.backend.metrics.values.time_busy += + gpu_active_counter; + + kbdev->pm.backend.metrics.values.time_idle += + ns_time - gpu_active_counter; + + /* Also make time in protected mode available explicitly, + * so users of this data have this info, too. + */ + kbdev->pm.backend.metrics.values.time_in_protm += + protected_time; + } + + kbdev->pm.backend.metrics.time_period_start = now; +} +#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ +#else static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, - ktime_t now) + ktime_t now) { ktime_t diff; @@ -149,12 +310,13 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, if (kbdev->pm.backend.metrics.active_gl_ctx[2]) kbdev->pm.backend.metrics.values.busy_gl += ns_time; } else { - kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff) - >> KBASE_PM_TIME_SHIFT); + kbdev->pm.backend.metrics.values.time_idle += + (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); } kbdev->pm.backend.metrics.time_period_start = now; } +#endif /* MALI_USE_CSF */ #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, @@ -165,14 +327,23 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, unsigned long flags; spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); +#if MALI_USE_CSF + kbase_pm_get_dvfs_utilisation_calc(kbdev); +#else kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get()); +#endif memset(diff, 0, sizeof(*diff)); diff->time_busy = cur->time_busy - last->time_busy; diff->time_idle = cur->time_idle - last->time_idle; + +#if MALI_USE_CSF + diff->time_in_protm = cur->time_in_protm - last->time_in_protm; +#else diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; diff->busy_gl = cur->busy_gl - last->busy_gl; +#endif *last = *cur; @@ -184,26 +355,42 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); #ifdef CONFIG_MALI_MIDGARD_DVFS void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) { - int utilisation, util_gl_share; - int util_cl_share[2]; - int busy; + int utilisation; struct kbasep_pm_metrics *diff; +#if !MALI_USE_CSF + int busy; + int util_gl_share; + int util_cl_share[2]; +#endif KBASE_DEBUG_ASSERT(kbdev != NULL); diff = &kbdev->pm.backend.metrics.dvfs_diff; - kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); + kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, + diff); utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u); +#if !MALI_USE_CSF busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); + util_gl_share = (100 * diff->busy_gl) / busy; util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; - kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, + util_cl_share); +#else + /* Note that, at present, we don't pass protected-mode time to the + * platform here. It's unlikely to be useful, however, as the platform + * probably just cares whether the GPU is busy or not; time in + * protected mode is already added to busy-time at this point, though, + * so we should be good. + */ + kbase_platform_dvfs_event(kbdev, utilisation); +#endif } bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) @@ -224,11 +411,20 @@ KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); void kbase_pm_metrics_start(struct kbase_device *kbdev) { unsigned long flags; + bool update = true; + + if (unlikely(!kbdev->pm.backend.metrics.initialized)) + return; spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbdev->pm.backend.metrics.timer_active = true; + if (!kbdev->pm.backend.metrics.timer_active) + kbdev->pm.backend.metrics.timer_active = true; + else + update = false; spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - hrtimer_start(&kbdev->pm.backend.metrics.timer, + + if (update) + hrtimer_start(&kbdev->pm.backend.metrics.timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), HRTIMER_MODE_REL); } @@ -236,16 +432,26 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev) void kbase_pm_metrics_stop(struct kbase_device *kbdev) { unsigned long flags; + bool update = true; + + if (unlikely(!kbdev->pm.backend.metrics.initialized)) + return; spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); - kbdev->pm.backend.metrics.timer_active = false; + if (kbdev->pm.backend.metrics.timer_active) + kbdev->pm.backend.metrics.timer_active = false; + else + update = false; spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); - hrtimer_cancel(&kbdev->pm.backend.metrics.timer); + + if (update) + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); } #endif /* CONFIG_MALI_MIDGARD_DVFS */ +#if !MALI_USE_CSF /** * kbase_pm_metrics_active_calc - Update PM active counts based on currently * running atoms @@ -270,7 +476,8 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); /* Head atom may have just completed, so if it isn't running - * then try the next atom */ + * then try the next atom + */ if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) katom = kbase_gpu_inspect(kbdev, js, 1); @@ -309,10 +516,12 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) timestamp = &now; } - /* Track how long CL and/or GL jobs have been busy for */ + /* Track how much of time has been spent busy or idle. For JM GPUs, + * this also evaluates how long CL and/or GL jobs have been busy for. + */ kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); kbase_pm_metrics_active_calc(kbdev); - spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); } +#endif /* !MALI_USE_CSF */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index 17ed21e..cf61ef8 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -28,27 +27,54 @@ #include #include #include +#include -static const struct kbase_pm_policy *const all_policy_list[] = { -#ifdef CONFIG_MALI_NO_MALI - &kbase_pm_always_on_policy_ops, - &kbase_pm_coarse_demand_policy_ops, -#if !MALI_CUSTOMER_RELEASE - &kbase_pm_always_on_demand_policy_ops, +#if MALI_USE_CSF && defined CONFIG_MALI_DEBUG +#include #endif -#else /* CONFIG_MALI_NO_MALI */ + +#include + +static const struct kbase_pm_policy *const all_policy_list[] = { &kbase_pm_coarse_demand_policy_ops, -#if !MALI_CUSTOMER_RELEASE - &kbase_pm_always_on_demand_policy_ops, -#endif &kbase_pm_always_on_policy_ops -#endif /* CONFIG_MALI_NO_MALI */ }; void kbase_pm_policy_init(struct kbase_device *kbdev) { - kbdev->pm.backend.pm_current_policy = all_policy_list[0]; - kbdev->pm.backend.pm_current_policy->init(kbdev); + const struct kbase_pm_policy *default_policy = all_policy_list[0]; + struct device_node *np = kbdev->dev->of_node; + const char *power_policy_name; + unsigned long flags; + int i; + + if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { + for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) + if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { + default_policy = all_policy_list[i]; + break; + } + } + +#if MALI_USE_CSF && defined(CONFIG_MALI_DEBUG) + /* Use always_on policy if module param fw_debug=1 is + * passed, to aid firmware debugging. + */ + if (fw_debug) + default_policy = &kbase_pm_always_on_policy_ops; +#endif + + default_policy->init(kbdev); + +#if MALI_USE_CSF + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.pm_current_policy = default_policy; + kbdev->pm.backend.csf_pm_sched_flags = default_policy->pm_sched_flags; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#else + CSTD_UNUSED(flags); + kbdev->pm.backend.pm_current_policy = default_policy; +#endif } void kbase_pm_policy_term(struct kbase_device *kbdev) @@ -93,13 +119,17 @@ void kbase_pm_update_active(struct kbase_device *kbdev) pm->backend.invoke_poweroff_wait_wq_when_l2_off = false; pm->backend.poweroff_wait_in_progress = false; pm->backend.l2_desired = true; +#if MALI_USE_CSF + pm->backend.mcu_desired = true; +#endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_do_poweron(kbdev, false); } } else { /* It is an error for the power policy to power off the GPU - * when there are contexts active */ + * when there are contexts active + */ KBASE_DEBUG_ASSERT(pm->active_count == 0); pm->backend.poweron_required = false; @@ -127,6 +157,16 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) return; if (kbdev->pm.backend.poweroff_wait_in_progress) return; + +#if MALI_USE_CSF + CSTD_UNUSED(shaders_desired); + /* Invoke the MCU state machine to send a request to FW for updating + * the mask of shader cores that can be used for allocation of + * endpoints requested by CSGs. + */ + if (kbase_pm_is_mcu_desired(kbdev)) + kbase_pm_update_state(kbdev); +#else /* In protected transition, don't allow outside shader core request * affect transition, return directly */ @@ -138,6 +178,7 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev) if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) { kbase_pm_update_state(kbdev); } +#endif } void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) @@ -153,11 +194,20 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) if (kbdev->pm.backend.protected_transition_override) /* We are trying to change in/out of protected mode - force all - * cores off so that the L2 powers down */ + * cores off so that the L2 powers down + */ shaders_desired = false; else shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev); +#if MALI_USE_CSF + /* On CSF GPUs, Host driver isn't supposed to do the power management + * for shader cores. CSF firmware will power up the cores appropriately + * and so from Driver's standpoint 'shaders_desired' flag shall always + * remain 0. + */ + shaders_desired = false; +#endif if (kbdev->pm.backend.shaders_desired != shaders_desired) { KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, kbdev->pm.backend.shaders_desired); @@ -197,20 +247,106 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) KBASE_EXPORT_TEST_API(kbase_pm_get_policy); +#if MALI_USE_CSF +static int policy_change_wait_for_L2_off(struct kbase_device *kbdev) +{ +#define WAIT_DURATION_MS (3000) + long remaining; + long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS); + int err = 0; + + /* Wait for L2 becoming off, by which the MCU is also implicitly off + * since the L2 state machine would only start its power-down + * sequence when the MCU is in off state. The L2 off is required + * as the tiler may need to be power cycled for MCU reconfiguration + * for host control of shader cores. + */ +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + remaining = wait_event_killable_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); +#else + remaining = wait_event_timeout( + kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.l2_state == KBASE_L2_OFF, timeout); +#endif + + if (!remaining) { + err = -ETIMEDOUT; + } else if (remaining < 0) { + dev_info(kbdev->dev, + "Wait for L2_off got interrupted"); + err = (int)remaining; + } + + dev_dbg(kbdev->dev, "%s: err=%d mcu_state=%d, L2_state=%d\n", __func__, + err, kbdev->pm.backend.mcu_state, kbdev->pm.backend.l2_state); + + return err; +} +#endif + void kbase_pm_set_policy(struct kbase_device *kbdev, const struct kbase_pm_policy *new_policy) { const struct kbase_pm_policy *old_policy; unsigned long flags; +#if MALI_USE_CSF + unsigned int new_policy_csf_pm_sched_flags; + bool sched_suspend; + bool reset_gpu = false; +#endif KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(new_policy != NULL); KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); +#if MALI_USE_CSF + /* Serialize calls on kbase_pm_set_policy() */ + mutex_lock(&kbdev->pm.backend.policy_change_lock); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* policy_change_clamp_state_to_off, when needed, is set/cleared in + * this function, a very limited temporal scope for covering the + * change transition. + */ + WARN_ON(kbdev->pm.backend.policy_change_clamp_state_to_off); + new_policy_csf_pm_sched_flags = new_policy->pm_sched_flags; + + /* Requiring the scheduler PM suspend operation when changes involving + * the always_on policy, reflected by the CSF_DYNAMIC_PM_CORE_KEEP_ON + * flag bit. + */ + sched_suspend = kbdev->csf.firmware_inited && + (CSF_DYNAMIC_PM_CORE_KEEP_ON & + (new_policy_csf_pm_sched_flags | + kbdev->pm.backend.csf_pm_sched_flags)); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (sched_suspend) + kbase_csf_scheduler_pm_suspend(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* If the current active policy is always_on, one needs to clamp the + * MCU/L2 for reaching off-state + */ + if (sched_suspend) + kbdev->pm.backend.policy_change_clamp_state_to_off = + CSF_DYNAMIC_PM_CORE_KEEP_ON & kbdev->pm.backend.csf_pm_sched_flags; + + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (sched_suspend) + reset_gpu = policy_change_wait_for_L2_off(kbdev); +#endif + /* During a policy change we pretend the GPU is active */ /* A suspend won't happen here, because we're in a syscall from a - * userspace thread */ + * userspace thread + */ kbase_pm_context_active(kbdev); kbase_pm_lock(kbdev); @@ -225,25 +361,49 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, if (old_policy->term) old_policy->term(kbdev); + memset(&kbdev->pm.backend.pm_policy_data, 0, + sizeof(union kbase_pm_policy_data)); + KBASE_KTRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, new_policy->id); if (new_policy->init) new_policy->init(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbdev->pm.backend.pm_current_policy = new_policy; +#if MALI_USE_CSF + kbdev->pm.backend.csf_pm_sched_flags = new_policy_csf_pm_sched_flags; + /* New policy in place, release the clamping on mcu/L2 off state */ + kbdev->pm.backend.policy_change_clamp_state_to_off = false; + kbase_pm_update_state(kbdev); +#endif spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* If any core power state changes were previously attempted, but * couldn't be made because the policy was changing (current_policy was - * NULL), then re-try them here. */ + * NULL), then re-try them here. + */ kbase_pm_update_active(kbdev); kbase_pm_update_cores_state(kbdev); kbase_pm_unlock(kbdev); /* Now the policy change is finished, we release our fake context active - * reference */ + * reference + */ kbase_pm_context_idle(kbdev); + +#if MALI_USE_CSF + /* Reverse the suspension done */ + if (reset_gpu) { + dev_warn(kbdev->dev, "Resorting to GPU reset for policy change\n"); + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + kbase_reset_gpu_wait(kbdev); + } else if (sched_suspend) + kbase_csf_scheduler_pm_resume(kbdev); + + mutex_unlock(&kbdev->pm.backend.policy_change_lock); +#endif } KBASE_EXPORT_TEST_API(kbase_pm_set_policy); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h index f103ef0..e811365 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2015, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h index 2bd9e47..8622ef7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -26,6 +25,41 @@ * including this header file. This header file can be included multiple * times in the same compilation unit with different definitions of * KBASEP_SHADER_STATE(). + * + * @OFF_CORESTACK_OFF: The shaders and core stacks are off + * @OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have been + * requested to power on and hwcnt is being + * disabled + * @PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been + * requested to power on. Or after doing + * partial shader on/off, checking whether + * it's the desired state. + * @ON_CORESTACK_ON: The shaders and core stacks are on, and + * hwcnt already enabled. + * @ON_CORESTACK_ON_RECHECK: The shaders and core stacks are on, hwcnt + * disabled, and checks to powering down or + * re-enabling hwcnt. + * @WAIT_OFF_CORESTACK_ON: The shaders have been requested to power + * off, but they remain on for the duration + * of the hysteresis timer + * @WAIT_GPU_IDLE: The shaders partial poweroff needs to + * reach a state where jobs on the GPU are + * finished including jobs currently running + * and in the GPU queue because of + * GPU2017-861 + * @WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired + * @L2_FLUSHING_CORESTACK_ON: The core stacks are on and the level 2 + * cache is being flushed. + * @READY_OFF_CORESTACK_ON: The core stacks are on and the shaders are + * ready to be powered off. + * @PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders + * have been requested to power off + * @OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks + * have been requested to power off + * @OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are off, but the + * tick timer cancellation is still pending. + * @RESET_WAIT: The GPU is resetting, shader and core + * stack power states are unknown */ KBASEP_SHADER_STATE(OFF_CORESTACK_OFF) KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON) @@ -33,7 +67,9 @@ KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON) KBASEP_SHADER_STATE(ON_CORESTACK_ON) KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK) KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON) +#if !MALI_USE_CSF KBASEP_SHADER_STATE(WAIT_GPU_IDLE) +#endif /* !MALI_USE_CSF */ KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON) KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON) KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c index cb10518..d10e404 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,22 +17,20 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include -#include +#include #include -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec64 *ts) +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, + u64 *cycle_counter, + u64 *system_time, + struct timespec64 *ts) { u32 hi1, hi2; - kbase_pm_request_gpu_cycle_counter(kbdev); - if (cycle_counter) { /* Read hi, lo, hi to ensure a coherent u64 */ do { @@ -65,6 +64,46 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, #else ktime_get_raw_ts64(ts); #endif +} + +#if !MALI_USE_CSF +/** + * timedwait_cycle_count_active() - Timed wait till CYCLE_COUNT_ACTIVE is active + * + * @kbdev: Kbase device + * + * Return: true if CYCLE_COUNT_ACTIVE is active within the timeout. + */ +static bool timedwait_cycle_count_active(struct kbase_device *kbdev) +{ + bool success = false; + const unsigned int timeout = 100; + const unsigned long remaining = jiffies + msecs_to_jiffies(timeout); + while (time_is_after_jiffies(remaining)) { + if ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)) & + GPU_STATUS_CYCLE_COUNT_ACTIVE)) { + success = true; + break; + } + } + return success; +} +#endif + +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts) +{ +#if !MALI_USE_CSF + kbase_pm_request_gpu_cycle_counter(kbdev); + WARN_ONCE(kbdev->pm.backend.l2_state != KBASE_L2_ON, + "L2 not powered up"); + WARN_ONCE((!timedwait_cycle_count_active(kbdev)), + "Timed out on CYCLE_COUNT_ACTIVE"); +#endif + kbase_backend_get_gpu_time_norequest(kbdev, cycle_counter, system_time, + ts); +#if !MALI_USE_CSF kbase_pm_release_gpu_cycle_counter(kbdev); +#endif } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/build.bp index 51aeecd..979e06f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/build.bp +++ b/dvalin/kernel/drivers/gpu/arm/midgard/build.bp @@ -1,15 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * */ @@ -19,11 +25,17 @@ * both mali_kbase and the test modules. */ bob_defaults { name: "mali_kbase_shared_config_defaults", + defaults: [ + "kernel_defaults", + ], no_mali: { - kbuild_options: ["CONFIG_MALI_NO_MALI=y"], + kbuild_options: [ + "CONFIG_MALI_NO_MALI=y", + "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", + ], }, - mali_real_hw: { - kbuild_options: ["CONFIG_MALI_REAL_HW=y"], + gpu_has_csf: { + kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"], }, mali_devfreq: { kbuild_options: ["CONFIG_MALI_DEVFREQ=y"], @@ -31,8 +43,62 @@ bob_defaults { mali_midgard_dvfs: { kbuild_options: ["CONFIG_MALI_MIDGARD_DVFS=y"], }, + mali_gator_support: { + kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], + }, + mali_midgard_enable_trace: { + kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"], + }, + mali_dma_fence: { + kbuild_options: ["CONFIG_MALI_DMA_FENCE=y"], + }, + mali_arbiter_support: { + kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], + }, + mali_dma_buf_map_on_demand: { + kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], + }, + mali_dma_buf_legacy_compat: { + kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], + }, + mali_2mb_alloc: { + kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], + }, + mali_memory_fully_backed: { + kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], + }, + mali_corestack: { + kbuild_options: ["CONFIG_MALI_CORESTACK=y"], + }, + mali_real_hw: { + kbuild_options: ["CONFIG_MALI_REAL_HW=y"], + }, + mali_error_inject_none: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"], + }, + mali_error_inject_track_list: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"], + }, + mali_error_inject_random: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], + }, + mali_error_inject: { + kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], + }, + mali_gem5_build: { + kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], + }, mali_debug: { - kbuild_options: ["CONFIG_MALI_DEBUG=y"], + kbuild_options: [ + "CONFIG_MALI_DEBUG=y", + "MALI_KERNEL_TEST_API={{.debug}}", + ], + }, + mali_fence_debug: { + kbuild_options: ["CONFIG_MALI_FENCE_DEBUG=y"], + }, + mali_system_trace: { + kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], }, buslog: { kbuild_options: ["CONFIG_MALI_BUSLOG=y"], @@ -43,44 +109,60 @@ bob_defaults { cinstr_gwt: { kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"], }, - mali_gator_support: { - kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"], + cinstr_primary_hwc: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_PRIMARY=y"], }, - mali_midgard_enable_trace: { - kbuild_options: ["CONFIG_MALI_MIDGARD_ENABLE_TRACE=y"], + cinstr_secondary_hwc: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], }, - mali_system_trace: { - kbuild_options: ["CONFIG_MALI_SYSTEM_TRACE=y"], + cinstr_tertiary_hwc: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_TERTIARY=y"], }, - mali_pwrsoft_765: { - kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], + cinstr_hwc_set_select_via_debug_fs: { + kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS=y"], }, - mali_memory_fully_backed: { - kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], + mali_job_dump: { + kbuild_options: ["CONFIG_MALI_JOB_DUMP"], }, - mali_dma_buf_map_on_demand: { - kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"], - }, - mali_dma_buf_legacy_compat: { - kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"], + mali_pwrsoft_765: { + kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"], }, - mali_arbiter_support: { - kbuild_options: ["CONFIG_MALI_ARBITER_SUPPORT=y"], + mali_hw_errata_1485982_not_affected: { + kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], }, - mali_gem5_build: { - kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"], + mali_hw_errata_1485982_use_clock_alternative: { + kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], }, kbuild_options: [ - "MALI_UNIT_TEST={{.unit_test_code}}", + "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", "MALI_CUSTOMER_RELEASE={{.release}}", + "MALI_UNIT_TEST={{.unit_test_code}}", "MALI_USE_CSF={{.gpu_has_csf}}", - "MALI_KERNEL_TEST_API={{.debug}}", + "MALI_JIT_PRESSURE_LIMIT_BASE={{.jit_pressure_limit_base}}", + + // Start of CS experimental features definitions. + // If there is nothing below, definition should be added as follows: + // "MALI_EXPERIMENTAL_FEATURE={{.experimental_feature}}" + // experimental_feature above comes from Mconfig in + // /product/base/ + // However, in Mconfig, experimental_feature should be looked up (for + // similar explanation to this one) as ALLCAPS, i.e. + // EXPERIMENTAL_FEATURE. + // + // IMPORTANT: MALI_CS_EXPERIMENTAL should NEVER be defined below as it + // is an umbrella feature that would be open for inappropriate use + // (catch-all for experimental CS code without separating it into + // different features). + "MALI_INCREMENTAL_RENDERING={{.incremental_rendering}}", + "GPU_TIMESTAMP_CORRECTION={{.gpu_timestamp_correction}}", ], - defaults: ["kernel_defaults"], } bob_kernel_module { name: "mali_kbase", + defaults: [ + "mali_kbase_shared_config_defaults", + ], srcs: [ "*.c", "*.h", @@ -90,6 +172,7 @@ bob_kernel_module { "backend/gpu/Kbuild", "context/*.c", "context/*.h", + "context/Kbuild", "ipa/*.c", "ipa/*.h", "ipa/Kbuild", @@ -98,52 +181,23 @@ bob_kernel_module { "platform/*/*.h", "platform/*/Kbuild", "thirdparty/*.c", + "thirdparty/Kbuild", "debug/*.c", "debug/*.h", + "debug/Kbuild", "device/*.c", "device/*.h", + "device/Kbuild", "gpu/*.c", "gpu/*.h", + "gpu/Kbuild", "tl/*.c", "tl/*.h", + "tl/Kbuild", "mmu/*.c", "mmu/*.h", + "mmu/Kbuild", ], - kbuild_options: [ - "CONFIG_MALI_KUTF=n", - "CONFIG_MALI_MIDGARD=m", - "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}", - "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", - ], - buslog: { - extra_symbols: [ - "bus_logger", - ], - }, - mali_corestack: { - kbuild_options: ["CONFIG_MALI_CORESTACK=y"], - }, - mali_error_inject: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"], - }, - mali_error_inject_random: { - kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"], - }, - cinstr_secondary_hwc: { - kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"], - }, - cinstr_secondary_hwc_via_debug_fs: { - kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"], - }, - mali_2mb_alloc: { - kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"], - }, - mali_hw_errata_1485982_not_affected: { - kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"], - }, - mali_hw_errata_1485982_use_clock_alternative: { - kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"], - }, gpu_has_job_manager: { srcs: [ "context/backend/*_jm.c", @@ -155,6 +209,8 @@ bob_kernel_module { "jm/*.h", "tl/backend/*_jm.c", "mmu/backend/*_jm.c", + "ipa/backend/*_jm.c", + "ipa/backend/*_jm.h", ], }, gpu_has_csf: { @@ -163,6 +219,9 @@ bob_kernel_module { "csf/*.c", "csf/*.h", "csf/Kbuild", + "csf/ipa_control/*.c", + "csf/ipa_control/*.h", + "csf/ipa_control/Kbuild", "debug/backend/*_csf.c", "debug/backend/*_csf.h", "device/backend/*_csf.c", @@ -170,6 +229,8 @@ bob_kernel_module { "gpu/backend/*_csf.h", "tl/backend/*_csf.c", "mmu/backend/*_csf.c", + "ipa/backend/*_csf.c", + "ipa/backend/*_csf.h", ], }, mali_arbiter_support: { @@ -179,5 +240,13 @@ bob_kernel_module { "arbiter/Kbuild", ], }, - defaults: ["mali_kbase_shared_config_defaults"], + kbuild_options: [ + "CONFIG_MALI_MIDGARD=m", + "CONFIG_MALI_KUTF=n", + ], + buslog: { + extra_symbols: [ + "bus_logger", + ], + }, } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild new file mode 100644 index 0000000..1d9c00a --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/Kbuild @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += context/mali_kbase_context.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += context/backend/mali_kbase_context_csf.o +else + mali_kbase-y += context/backend/mali_kbase_context_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c new file mode 100644 index 0000000..1ce806f --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_csf.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Base kernel context APIs for CSF GPUs + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) +#include +#include +#include +#include +#include +#include + +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + kbase_debug_mem_view_init(kctx); + kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); + kbase_jit_debugfs_init(kctx); + kbase_csf_queue_group_debugfs_init(kctx); + kbase_csf_kcpu_debugfs_init(kctx); + kbase_csf_tiler_heap_debugfs_init(kctx); + kbase_csf_cpu_queue_debugfs_init(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + debugfs_remove_recursive(kctx->kctx_dentry); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#else +void kbase_context_debugfs_init(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); + +void kbase_context_debugfs_term(struct kbase_context *const kctx) +{ + CSTD_UNUSED(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); +#endif /* CONFIG_DEBUG_FS */ + +static void kbase_context_free(struct kbase_context *kctx) +{ + kbase_timeline_post_kbase_context_destroy(kctx); + + vfree(kctx); +} + +static const struct kbase_context_init context_init[] = { + { NULL, kbase_context_free, NULL }, + { kbase_context_common_init, kbase_context_common_term, + "Common context initialization failed" }, + { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, + "Memory pool group initialization failed" }, + { kbase_mem_evictable_init, kbase_mem_evictable_deinit, + "Memory evictable initialization failed" }, + { kbase_context_mmu_init, kbase_context_mmu_term, + "MMU initialization failed" }, + { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, + "Memory alloc page failed" }, + { kbase_region_tracker_init, kbase_region_tracker_term, + "Region tracker initialization failed" }, + { kbase_sticky_resource_init, kbase_context_sticky_resource_term, + "Sticky resource initialization failed" }, + { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, + { kbase_csf_ctx_init, kbase_csf_ctx_term, + "CSF context initialization failed" }, + { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, + "Adding kctx to device failed" }, +}; + +static void kbase_context_term_partial( + struct kbase_context *kctx, + unsigned int i) +{ + while (i-- > 0) { + if (context_init[i].term) + context_init[i].term(kctx); + } +} + +struct kbase_context *kbase_create_context(struct kbase_device *kbdev, + bool is_compat, + base_context_create_flags const flags, + unsigned long const api_version, + struct file *const filp) +{ + struct kbase_context *kctx; + unsigned int i = 0; + + if (WARN_ON(!kbdev)) + return NULL; + + /* Validate flags */ + if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))) + return NULL; + + /* zero-inited as lot of code assume it's zero'ed out on create */ + kctx = vzalloc(sizeof(*kctx)); + if (WARN_ON(!kctx)) + return NULL; + + kctx->kbdev = kbdev; + kctx->api_version = api_version; + kctx->filp = filp; + kctx->create_flags = flags; + + if (is_compat) + kbase_ctx_flag_set(kctx, KCTX_COMPAT); +#if defined(CONFIG_64BIT) + else + kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); +#endif /* defined(CONFIG_64BIT) */ + + for (i = 0; i < ARRAY_SIZE(context_init); i++) { + int err = 0; + + if (context_init[i].init) + err = context_init[i].init(kctx); + + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + context_init[i].err_mes, err); + + /* kctx should be freed by kbase_context_free(). + * Otherwise it will result in memory leak. + */ + WARN_ON(i == 0); + + kbase_context_term_partial(kctx, i); + return NULL; + } + } + + return kctx; +} +KBASE_EXPORT_SYMBOL(kbase_create_context); + +void kbase_destroy_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return; + + kbdev = kctx->kbdev; + if (WARN_ON(!kbdev)) + return; + + /* Context termination could happen whilst the system suspend of + * the GPU device is ongoing or has completed. It has been seen on + * Customer side that a hang could occur if context termination is + * not blocked until the resume of GPU device. + */ + while (kbase_pm_context_active_handle_suspend( + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_info(kbdev->dev, + "Suspend in progress when destroying context"); + wait_event(kbdev->pm.resume_wait, + !kbase_pm_is_suspending(kbdev)); + } + + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); + + kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); + + kbase_pm_context_idle(kbdev); +} +KBASE_EXPORT_SYMBOL(kbase_destroy_context); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c index 2cd2551..8ce81e7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/backend/mali_kbase_context_jm.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -30,13 +28,13 @@ #include #include #include +#include #include #include #include #include -#include -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) #include #include @@ -46,14 +44,12 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx) kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx); kbase_jit_debugfs_init(kctx); kbasep_jd_debugfs_ctx_init(kctx); - kbase_debug_job_fault_context_init(kctx); } KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init); void kbase_context_debugfs_term(struct kbase_context *const kctx) { debugfs_remove_recursive(kctx->kctx_dentry); - kbase_debug_job_fault_context_term(kctx); } KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); #else @@ -70,6 +66,16 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx) KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term); #endif /* CONFIG_DEBUG_FS */ +static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx) +{ + return kbase_kinstr_jm_init(&kctx->kinstr_jm); +} + +static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx) +{ + kbase_kinstr_jm_term(kctx->kinstr_jm); +} + static int kbase_context_kbase_timer_setup(struct kbase_context *kctx) { kbase_timer_setup(&kctx->soft_job_timeout, @@ -98,32 +104,59 @@ static int kbase_context_submit_check(struct kbase_context *kctx) return 0; } +static void kbase_context_flush_jobs(struct kbase_context *kctx) +{ + kbase_jd_zap_context(kctx); + flush_workqueue(kctx->jctx.job_done_wq); +} + +static void kbase_context_free(struct kbase_context *kctx) +{ + kbase_timeline_post_kbase_context_destroy(kctx); + + vfree(kctx); +} + static const struct kbase_context_init context_init[] = { - {kbase_context_common_init, kbase_context_common_term, NULL}, - {kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, - "Memory pool goup initialization failed"}, - {kbase_mem_evictable_init, kbase_mem_evictable_deinit, - "Memory evictable initialization failed"}, - {kbasep_js_kctx_init, kbasep_js_kctx_term, - "JS kctx initialization failed"}, - {kbase_jd_init, kbase_jd_exit, - "JD initialization failed"}, - {kbase_event_init, kbase_event_cleanup, - "Event initialization failed"}, - {kbase_dma_fence_init, kbase_dma_fence_term, - "DMA fence initialization failed"}, - {kbase_context_mmu_init, kbase_context_mmu_term, - "MMU initialization failed"}, - {kbase_context_mem_alloc_page, kbase_context_mem_pool_free, - "Memory alloc page failed"}, - {kbase_region_tracker_init, kbase_region_tracker_term, - "Region tracker initialization failed"}, - {kbase_sticky_resource_init, kbase_context_sticky_resource_term, - "Sticky resource initialization failed"}, - {kbase_jit_init, kbase_jit_term, - "JIT initialization failed"}, - {kbase_context_kbase_timer_setup, NULL, NULL}, - {kbase_context_submit_check, NULL, NULL}, + { NULL, kbase_context_free, NULL }, + { kbase_context_common_init, kbase_context_common_term, + "Common context initialization failed" }, + { kbase_dma_fence_init, kbase_dma_fence_term, + "DMA fence initialization failed" }, + { kbase_context_mem_pool_group_init, kbase_context_mem_pool_group_term, + "Memory pool group initialization failed" }, + { kbase_mem_evictable_init, kbase_mem_evictable_deinit, + "Memory evictable initialization failed" }, + { kbase_context_mmu_init, kbase_context_mmu_term, + "MMU initialization failed" }, + { kbase_context_mem_alloc_page, kbase_context_mem_pool_free, + "Memory alloc page failed" }, + { kbase_region_tracker_init, kbase_region_tracker_term, + "Region tracker initialization failed" }, + { kbase_sticky_resource_init, kbase_context_sticky_resource_term, + "Sticky resource initialization failed" }, + { kbase_jit_init, kbase_jit_term, "JIT initialization failed" }, + { kbase_context_kbase_kinstr_jm_init, + kbase_context_kbase_kinstr_jm_term, + "JM instrumentation initialization failed" }, + { kbase_context_kbase_timer_setup, NULL, + "Timers initialization failed" }, + { kbase_event_init, kbase_event_cleanup, + "Event initialization failed" }, + { kbasep_js_kctx_init, kbasep_js_kctx_term, + "JS kctx initialization failed" }, + { kbase_jd_init, kbase_jd_exit, "JD initialization failed" }, + { kbase_context_submit_check, NULL, "Enabling job submission failed" }, +#if IS_ENABLED(CONFIG_DEBUG_FS) + { kbase_debug_job_fault_context_init, + kbase_debug_job_fault_context_term, + "Job fault context initialization failed" }, +#endif + { NULL, kbase_context_flush_jobs, NULL }, + { kbase_context_add_to_dev_list, kbase_context_remove_from_dev_list, + "Adding kctx to device failed" }, + { kbasep_platform_context_init, kbasep_platform_context_term, + "Platform callback for kctx initialization failed" }, }; static void kbase_context_term_partial( @@ -167,14 +200,23 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, #if defined(CONFIG_64BIT) else kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA); -#endif /* !defined(CONFIG_64BIT) */ +#endif /* defined(CONFIG_64BIT) */ for (i = 0; i < ARRAY_SIZE(context_init); i++) { - int err = context_init[i].init(kctx); + int err = 0; + + if (context_init[i].init) + err = context_init[i].init(kctx); if (err) { dev_err(kbdev->dev, "%s error = %d\n", context_init[i].err_mes, err); + + /* kctx should be freed by kbase_context_free(). + * Otherwise it will result in memory leak. + */ + WARN_ON(i == 0); + kbase_context_term_partial(kctx, i); return NULL; } @@ -195,17 +237,27 @@ void kbase_destroy_context(struct kbase_context *kctx) if (WARN_ON(!kbdev)) return; - /* Ensure the core is powered up for the destroy process - * A suspend won't happen here, because we're in a syscall - * from a userspace thread. + /* Context termination could happen whilst the system suspend of + * the GPU device is ongoing or has completed. It has been seen on + * Customer side that a hang could occur if context termination is + * not blocked until the resume of GPU device. */ - kbase_pm_context_active(kbdev); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_inc(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ + while (kbase_pm_context_active_handle_suspend( + kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + dev_dbg(kbdev->dev, + "Suspend in progress when destroying context"); + wait_event(kbdev->pm.resume_wait, + !kbase_pm_is_suspending(kbdev)); + } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_dec(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_mem_pool_group_mark_dying(&kctx->mem_pools); - kbase_jd_zap_context(kctx); - flush_workqueue(kctx->jctx.job_done_wq); - kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); kbase_pm_context_idle(kbdev); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c index 93fe431..b2e7025 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -28,17 +26,105 @@ #include #include #include -#include #include #include -#include #include #include #include +/** + * find_process_node - Used to traverse the process rb_tree to find if + * process exists already in process rb_tree. + * + * @node: Pointer to root node to start search. + * @tgid: Thread group PID to search for. + * + * Return: Pointer to kbase_process if exists otherwise NULL. + */ +static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid) +{ + struct kbase_process *kprcs = NULL; + + /* Check if the kctx creation request is from a existing process.*/ + while (node) { + struct kbase_process *prcs_node = + rb_entry(node, struct kbase_process, kprcs_node); + if (prcs_node->tgid == tgid) { + kprcs = prcs_node; + break; + } + + if (tgid < prcs_node->tgid) + node = node->rb_left; + else + node = node->rb_right; + } + + return kprcs; +} + +/** + * kbase_insert_kctx_to_process - Initialise kbase process context. + * + * @kctx: Pointer to kbase context. + * + * Here we initialise per process rb_tree managed by kbase_device. + * We maintain a rb_tree of each unique process that gets created. + * and Each process maintains a list of kbase context. + * This setup is currently used by kernel trace functionality + * to trace and visualise gpu memory consumption. + * + * Return: 0 on success and error number on failure. + */ +static int kbase_insert_kctx_to_process(struct kbase_context *kctx) +{ + struct rb_root *const prcs_root = &kctx->kbdev->process_root; + const pid_t tgid = kctx->tgid; + struct kbase_process *kprcs = NULL; + + lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + + kprcs = find_process_node(prcs_root->rb_node, tgid); + + /* if the kctx is from new process then create a new kbase_process + * and add it to the &kbase_device->rb_tree + */ + if (!kprcs) { + struct rb_node **new = &prcs_root->rb_node, *parent = NULL; + + kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL); + if (kprcs == NULL) + return -ENOMEM; + kprcs->tgid = tgid; + INIT_LIST_HEAD(&kprcs->kctx_list); + kprcs->dma_buf_root = RB_ROOT; + kprcs->total_gpu_pages = 0; + + while (*new) { + struct kbase_process *prcs_node; + + parent = *new; + prcs_node = rb_entry(parent, struct kbase_process, + kprcs_node); + if (tgid < prcs_node->tgid) + new = &(*new)->rb_left; + else + new = &(*new)->rb_right; + } + rb_link_node(&kprcs->kprcs_node, parent, new); + rb_insert_color(&kprcs->kprcs_node, prcs_root); + } + + kctx->kprcs = kprcs; + list_add(&kctx->kprcs_link, &kprcs->kctx_list); + + return 0; +} + int kbase_context_common_init(struct kbase_context *kctx) { const unsigned long cookies_mask = KBASE_COOKIE_MASK; + int err = 0; /* creating a context is considered a disjoint event */ kbase_disjoint_event(kctx->kbdev); @@ -66,30 +152,95 @@ int kbase_context_common_init(struct kbase_context *kctx) init_waitqueue_head(&kctx->event_queue); atomic_set(&kctx->event_count, 0); +#if !MALI_USE_CSF atomic_set(&kctx->event_closed, false); - - bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); - -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) atomic_set(&kctx->jctx.work_id, 0); +#endif #endif + bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG); + kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1; mutex_init(&kctx->legacy_hwcnt_lock); mutex_lock(&kctx->kbdev->kctx_list_lock); - list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); - KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id, - kctx->kbdev->gpu_props.props.raw_props.gpu_id); - KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id, - (u32)(kctx->tgid)); + err = kbase_insert_kctx_to_process(kctx); + if (err) + dev_err(kctx->kbdev->dev, + "(err:%d) failed to insert kctx to kbase_process\n", err); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + return err; +} + +int kbase_context_add_to_dev_list(struct kbase_context *kctx) +{ + if (WARN_ON(!kctx)) + return -EINVAL; + + if (WARN_ON(!kctx->kbdev)) + return -EINVAL; + + mutex_lock(&kctx->kbdev->kctx_list_lock); + list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list); + mutex_unlock(&kctx->kbdev->kctx_list_lock); + + kbase_timeline_post_kbase_context_create(kctx); + return 0; } +void kbase_context_remove_from_dev_list(struct kbase_context *kctx) +{ + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(!kctx->kbdev)) + return; + + kbase_timeline_pre_kbase_context_destroy(kctx); + + mutex_lock(&kctx->kbdev->kctx_list_lock); + list_del_init(&kctx->kctx_list_link); + mutex_unlock(&kctx->kbdev->kctx_list_lock); +} + +/** + * kbase_remove_kctx_from_process - remove a terminating context from + * the process list. + * + * @kctx: Pointer to kbase context. + * + * Remove the tracking of context from the list of contexts maintained under + * kbase process and if the list if empty then there no outstanding contexts + * we can remove the process node as well. + */ + +static void kbase_remove_kctx_from_process(struct kbase_context *kctx) +{ + struct kbase_process *kprcs = kctx->kprcs; + + lockdep_assert_held(&kctx->kbdev->kctx_list_lock); + list_del(&kctx->kprcs_link); + + /* if there are no outstanding contexts in current process node, + * we can remove it from the process rb_tree. + */ + if (list_empty(&kprcs->kctx_list)) { + rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root); + /* Add checks, so that the terminating process Should not + * hold any gpu_memory. + */ + WARN_ON(kprcs->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root)); + kfree(kprcs); + } +} + void kbase_context_common_term(struct kbase_context *kctx) { unsigned long flags; @@ -109,24 +260,10 @@ void kbase_context_common_term(struct kbase_context *kctx) WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); mutex_lock(&kctx->kbdev->kctx_list_lock); - - KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id); - - KBASE_TLSTREAM_TL_DEL_CTX(kctx->kbdev, kctx); - list_del(&kctx->kctx_list_link); + kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u); - - /* Flush the timeline stream, so the user can see the termination - * tracepoints being fired. - * The "if" statement below is for optimization. It is safe to call - * kbase_timeline_streams_flush when timeline is disabled. - */ - if (atomic_read(&kctx->kbdev->timeline_flags) != 0) - kbase_timeline_streams_flush(kctx->kbdev->timeline); - - vfree(kctx); } int kbase_context_mem_pool_group_init(struct kbase_context *kctx) @@ -144,11 +281,9 @@ void kbase_context_mem_pool_group_term(struct kbase_context *kctx) int kbase_context_mmu_init(struct kbase_context *kctx) { - kbase_mmu_init(kctx->kbdev, - &kctx->mmu, kctx, + return kbase_mmu_init( + kctx->kbdev, &kctx->mmu, kctx, base_context_mmu_group_id_get(kctx->create_flags)); - - return 0; } void kbase_context_mmu_term(struct kbase_context *kctx) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h index e4ed894..a0c51c9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,18 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * */ #ifndef _KBASE_CONTEXT_H_ @@ -117,25 +106,7 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx, static inline void kbase_ctx_flag_clear(struct kbase_context *kctx, enum kbase_context_flags flag) { -#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE - /* - * Earlier kernel versions doesn't have atomic_andnot() or - * atomic_and(). atomic_clear_mask() was only available on some - * architectures and removed on arm in v3.13 on arm and arm64. - * - * Use a compare-exchange loop to clear the flag on pre 4.3 kernels, - * when atomic_andnot() becomes available. - */ - int old, new; - - do { - old = atomic_read(&kctx->flags); - new = old & ~flag; - - } while (atomic_cmpxchg(&kctx->flags, old, new) != old); -#else atomic_andnot(flag, &kctx->flags); -#endif } /** diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h index 818cdbe..1cde739 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/context/mali_kbase_context_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,16 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ -/* - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. */ #include @@ -58,3 +49,6 @@ int kbase_context_mem_alloc_page(struct kbase_context *kctx); void kbase_context_mem_pool_free(struct kbase_context *kctx); void kbase_context_sticky_resource_term(struct kbase_context *kctx); + +int kbase_context_add_to_dev_list(struct kbase_context *kctx); +void kbase_context_remove_from_dev_list(struct kbase_context *kctx); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild new file mode 100644 index 0000000..765e419 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/Kbuild @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + csf/mali_kbase_csf_firmware_cfg.o \ + csf/mali_kbase_csf_trace_buffer.o \ + csf/mali_kbase_csf.o \ + csf/mali_kbase_csf_scheduler.o \ + csf/mali_kbase_csf_kcpu.o \ + csf/mali_kbase_csf_tiler_heap.o \ + csf/mali_kbase_csf_timeout.o \ + csf/mali_kbase_csf_tl_reader.o \ + csf/mali_kbase_csf_heap_context_alloc.o \ + csf/mali_kbase_csf_reset_gpu.o \ + csf/mali_kbase_csf_csg_debugfs.o \ + csf/mali_kbase_csf_kcpu_debugfs.o \ + csf/mali_kbase_csf_protected_memory.o \ + csf/mali_kbase_csf_tiler_heap_debugfs.o \ + csf/mali_kbase_csf_cpu_queue_debugfs.o + +mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o + + +ifeq ($(KBUILD_EXTMOD),) +# in-tree + -include $(src)/csf/ipa_control/Kbuild +else +# out-of-tree + include $(src)/csf/ipa_control/Kbuild +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild new file mode 100644 index 0000000..08824b2 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/Kbuild @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + csf/ipa_control/mali_kbase_csf_ipa_control.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c new file mode 100644 index 0000000..e23d681 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -0,0 +1,925 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include "mali_kbase_csf_ipa_control.h" + +/* + * Status flags from the STATUS register of the IPA Control interface. + */ +#define STATUS_COMMAND_ACTIVE ((u32)1 << 0) +#define STATUS_TIMER_ACTIVE ((u32)1 << 1) +#define STATUS_AUTO_ACTIVE ((u32)1 << 2) +#define STATUS_PROTECTED_MODE ((u32)1 << 8) +#define STATUS_RESET ((u32)1 << 9) +#define STATUS_TIMER_ENABLED ((u32)1 << 31) + +/* + * Commands for the COMMAND register of the IPA Control interface. + */ +#define COMMAND_NOP ((u32)0) +#define COMMAND_APPLY ((u32)1) +#define COMMAND_CLEAR ((u32)2) +#define COMMAND_SAMPLE ((u32)3) +#define COMMAND_PROTECTED_ACK ((u32)4) +#define COMMAND_RESET_ACK ((u32)5) + +/** + * Default value for the TIMER register of the IPA Control interface, + * expressed in milliseconds. + * + * The chosen value is a trade off between two requirements: the IPA Control + * interface should sample counters with a resolution in the order of + * milliseconds, while keeping GPU overhead as limited as possible. + */ +#define TIMER_DEFAULT_VALUE_MS ((u32)10) /* 10 milliseconds */ + +/** + * Number of timer events per second. + */ +#define TIMER_EVENTS_PER_SECOND ((u32)1000 / TIMER_DEFAULT_VALUE_MS) + +/** + * Maximum number of loops polling the GPU before we assume the GPU has hung. + */ +#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000) + +/** + * Number of bits used to configure a performance counter in SELECT registers. + */ +#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8) + +/** + * Maximum value of a performance counter. + */ +#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1) + +/** + * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency + * listener + * + * @listener: GPU clock frequency listener. + * @kbdev: Pointer to kbase device. + */ +struct kbase_ipa_control_listener_data { + struct kbase_clk_rate_listener listener; + struct kbase_device *kbdev; +}; + +static u32 timer_value(u32 gpu_rate) +{ + return gpu_rate / TIMER_EVENTS_PER_SECOND; +} + +static int wait_status(struct kbase_device *kbdev, u32 flags) +{ + unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS; + u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + + /* + * Wait for the STATUS register to indicate that flags have been + * cleared, in case a transition is pending. + */ + while (--max_loops && (status & flags)) + status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + if (max_loops == 0) { + dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck"); + return -EBUSY; + } + + return 0; +} + +static int apply_select_config(struct kbase_device *kbdev, u64 *select) +{ + int ret; + + u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX); + u32 select_cshw_hi = + (u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX); + u32 select_memsys_lo = + (u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX); + u32 select_memsys_hi = + (u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX); + u32 select_tiler_lo = + (u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX); + u32 select_tiler_hi = + (u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX); + u32 select_shader_lo = + (u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX); + u32 select_shader_hi = + (u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX); + + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO), + select_memsys_lo); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI), + select_memsys_hi); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO), + select_tiler_lo); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI), + select_tiler_hi); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO), + select_shader_lo); + kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI), + select_shader_hi); + + ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); + + if (!ret) + kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); + + return ret; +} + +static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx) +{ + u32 value_lo, value_hi; + + switch (type) { + case KBASE_IPA_CORE_TYPE_CSHW: + value_lo = kbase_reg_read( + kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx))); + value_hi = kbase_reg_read( + kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx))); + break; + case KBASE_IPA_CORE_TYPE_MEMSYS: + value_lo = kbase_reg_read( + kbdev, + IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx))); + value_hi = kbase_reg_read( + kbdev, + IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx))); + break; + case KBASE_IPA_CORE_TYPE_TILER: + value_lo = kbase_reg_read( + kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx))); + value_hi = kbase_reg_read( + kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx))); + break; + case KBASE_IPA_CORE_TYPE_SHADER: + value_lo = kbase_reg_read( + kbdev, + IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx))); + value_hi = kbase_reg_read( + kbdev, + IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx))); + break; + default: + WARN(1, "Unknown core type: %u\n", type); + value_lo = value_hi = 0; + break; + } + + return (((u64)value_hi << 32) | value_lo); +} + +static void build_select_config(struct kbase_ipa_control *ipa_ctrl, + u64 *select_config) +{ + size_t i; + + for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { + size_t j; + + select_config[i] = 0ULL; + + for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { + struct kbase_ipa_control_prfcnt_config *prfcnt_config = + &ipa_ctrl->blocks[i].select[j]; + + select_config[i] |= + ((u64)prfcnt_config->idx + << (IPA_CONTROL_SELECT_BITS_PER_CNT * j)); + } + } +} + +static inline void calc_prfcnt_delta(struct kbase_device *kbdev, + struct kbase_ipa_control_prfcnt *prfcnt, + bool gpu_ready) +{ + u64 delta_value, raw_value; + + if (gpu_ready) + raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, + prfcnt->select_idx); + else + raw_value = prfcnt->latest_raw_value; + + if (raw_value < prfcnt->latest_raw_value) { + delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) + + raw_value; + } else { + delta_value = raw_value - prfcnt->latest_raw_value; + } + + delta_value *= prfcnt->scaling_factor; + + if (!WARN_ON_ONCE(kbdev->csf.ipa_control.cur_gpu_rate == 0)) + if (prfcnt->gpu_norm) + delta_value /= kbdev->csf.ipa_control.cur_gpu_rate; + + prfcnt->latest_raw_value = raw_value; + + /* Accumulate the difference */ + prfcnt->accumulated_diff += delta_value; +} + +/** + * kbase_ipa_control_rate_change_notify - GPU frequency change callback + * + * @listener: Clock frequency change listener. + * @clk_index: Index of the clock for which the change has occurred. + * @clk_rate_hz: Clock frequency(Hz). + * + * This callback notifies kbase_ipa_control about GPU frequency changes. + * Only top-level clock changes are meaningful. GPU frequency updates + * affect all performance counters which require GPU normalization + * in every session. + */ +static void +kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, + u32 clk_index, u32 clk_rate_hz) +{ + if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) { + size_t i; + unsigned long flags; + struct kbase_ipa_control_listener_data *listener_data = + container_of(listener, + struct kbase_ipa_control_listener_data, + listener); + struct kbase_device *kbdev = listener_data->kbdev; + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (!kbdev->pm.backend.gpu_ready) { + dev_err(kbdev->dev, + "%s: GPU frequency cannot change while GPU is off", + __func__); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + /* Interrupts are already disabled and interrupt state is also saved */ + spin_lock(&ipa_ctrl->lock); + + for (i = 0; i < ipa_ctrl->num_active_sessions; i++) { + size_t j; + struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; + + for (j = 0; j < session->num_prfcnts; j++) { + struct kbase_ipa_control_prfcnt *prfcnt = + &session->prfcnts[j]; + + if (prfcnt->gpu_norm) + calc_prfcnt_delta(kbdev, prfcnt, true); + } + } + + ipa_ctrl->cur_gpu_rate = clk_rate_hz; + + /* Update the timer for automatic sampling if active sessions + * are present. Counters have already been manually sampled. + */ + if (ipa_ctrl->num_active_sessions > 0) { + kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), + timer_value(ipa_ctrl->cur_gpu_rate)); + } + + spin_unlock(&ipa_ctrl->lock); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + +void kbase_ipa_control_init(struct kbase_device *kbdev) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + struct kbase_ipa_control_listener_data *listener_data; + size_t i, j; + + for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) { + for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { + ipa_ctrl->blocks[i].select[j].idx = 0; + ipa_ctrl->blocks[i].select[j].refcount = 0; + } + ipa_ctrl->blocks[i].num_available_counters = + KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; + } + + spin_lock_init(&ipa_ctrl->lock); + ipa_ctrl->num_active_sessions = 0; + for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { + ipa_ctrl->sessions[i].active = false; + } + + listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), + GFP_KERNEL); + if (listener_data) { + listener_data->listener.notify = + kbase_ipa_control_rate_change_notify; + listener_data->kbdev = kbdev; + ipa_ctrl->rtm_listener_data = listener_data; + } + + spin_lock(&clk_rtm->lock); + if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]) + ipa_ctrl->cur_gpu_rate = + clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val; + if (listener_data) + kbase_clk_rate_trace_manager_subscribe_no_lock( + clk_rtm, &listener_data->listener); + spin_unlock(&clk_rtm->lock); +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_init); + +void kbase_ipa_control_term(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm; + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + struct kbase_ipa_control_listener_data *listener_data = + ipa_ctrl->rtm_listener_data; + + WARN_ON(ipa_ctrl->num_active_sessions); + + if (listener_data) + kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener); + kfree(ipa_ctrl->rtm_listener_data); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbdev->pm.backend.gpu_powered) + kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_term); + +int kbase_ipa_control_register( + struct kbase_device *kbdev, + const struct kbase_ipa_control_perf_counter *perf_counters, + size_t num_counters, void **client) +{ + int ret = 0; + size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM]; + bool already_configured[KBASE_IPA_CONTROL_MAX_COUNTERS]; + bool new_config = false; + struct kbase_ipa_control *ipa_ctrl; + struct kbase_ipa_control_session *session = NULL; + unsigned long flags; + + if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) || + WARN_ON(client == NULL) || + WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) { + dev_err(kbdev->dev, "%s: wrong input arguments", __func__); + return -EINVAL; + } + + kbase_pm_context_active(kbdev); + + ipa_ctrl = &kbdev->csf.ipa_control; + spin_lock_irqsave(&ipa_ctrl->lock, flags); + + if (ipa_ctrl->num_active_sessions == KBASE_IPA_CONTROL_MAX_SESSIONS) { + dev_err(kbdev->dev, "%s: too many sessions", __func__); + ret = -EBUSY; + goto exit; + } + + for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) + req_counters[i] = 0; + + /* + * Count how many counters would need to be configured in order to + * satisfy the request. Requested counters which happen to be already + * configured can be skipped. + */ + for (i = 0; i < num_counters; i++) { + size_t j; + enum kbase_ipa_core_type type = perf_counters[i].type; + u8 idx = perf_counters[i].idx; + + if ((type >= KBASE_IPA_CORE_TYPE_NUM) || + (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) { + dev_err(kbdev->dev, + "%s: invalid requested type %u and/or index %u", + __func__, type, idx); + ret = -EINVAL; + goto exit; + } + + for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { + struct kbase_ipa_control_prfcnt_config *prfcnt_config = + &ipa_ctrl->blocks[type].select[j]; + + if (prfcnt_config->refcount > 0) { + if (prfcnt_config->idx == idx) { + already_configured[i] = true; + break; + } + } + } + + if (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) { + already_configured[i] = false; + req_counters[type]++; + new_config = true; + } + } + + for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) + if (req_counters[i] > + ipa_ctrl->blocks[i].num_available_counters) { + dev_err(kbdev->dev, + "%s: more counters (%zu) than available (%zu) have been requested for type %zu", + __func__, req_counters[i], + ipa_ctrl->blocks[i].num_available_counters, i); + ret = -EINVAL; + goto exit; + } + + /* + * The request has been validated. + * Firstly, find an available session and then set up the initial state + * of the session and update the configuration of performance counters + * in the internal state of kbase_ipa_control. + */ + for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS; + session_idx++) { + session = &ipa_ctrl->sessions[session_idx]; + if (!session->active) + break; + } + + if (!session) { + dev_err(kbdev->dev, "%s: wrong or corrupt session state", + __func__); + ret = -EBUSY; + goto exit; + } + + for (i = 0; i < num_counters; i++) { + struct kbase_ipa_control_prfcnt_config *prfcnt_config; + size_t j; + u8 type = perf_counters[i].type; + u8 idx = perf_counters[i].idx; + + for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) { + prfcnt_config = &ipa_ctrl->blocks[type].select[j]; + + if (already_configured[i]) { + if ((prfcnt_config->refcount > 0) && + (prfcnt_config->idx == idx)) { + break; + } + } else { + if (prfcnt_config->refcount == 0) + break; + } + } + + if (WARN_ON((prfcnt_config->refcount > 0 && + prfcnt_config->idx != idx) || + (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) { + dev_err(kbdev->dev, + "%s: invalid internal state: counter already configured or no counter available to configure", + __func__); + ret = -EBUSY; + goto exit; + } + + if (prfcnt_config->refcount == 0) { + prfcnt_config->idx = idx; + ipa_ctrl->blocks[type].num_available_counters--; + } + + session->prfcnts[i].accumulated_diff = 0; + session->prfcnts[i].type = type; + session->prfcnts[i].select_idx = j; + session->prfcnts[i].scaling_factor = + perf_counters[i].scaling_factor; + session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm; + + /* Reports to this client for GPU time spent in protected mode + * should begin from the point of registration. + */ + session->last_query_time = ktime_get_ns(); + + /* Initially, no time has been spent in protected mode */ + session->protm_time = 0; + + prfcnt_config->refcount++; + } + + /* + * Apply new configuration, if necessary. + * As a temporary solution, make sure that the GPU is on + * before applying the new configuration. + */ + if (new_config) { + u64 select_config[KBASE_IPA_CORE_TYPE_NUM]; + + build_select_config(ipa_ctrl, select_config); + ret = apply_select_config(kbdev, select_config); + if (ret) + dev_err(kbdev->dev, + "%s: failed to apply SELECT configuration", + __func__); + } + + if (!ret) { + /* Accumulator registers don't contain any sample if the timer + * has not been enabled first. Take a sample manually before + * enabling the timer. + */ + if (ipa_ctrl->num_active_sessions == 0) { + kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), + COMMAND_SAMPLE); + ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); + if (!ret) { + kbase_reg_write( + kbdev, IPA_CONTROL_REG(TIMER), + timer_value(ipa_ctrl->cur_gpu_rate)); + } else { + dev_err(kbdev->dev, + "%s: failed to sample new counters", + __func__); + } + } + } + + if (!ret) { + session->num_prfcnts = num_counters; + session->active = true; + ipa_ctrl->num_active_sessions++; + *client = session; + + /* + * Read current raw value to initialize the session. + * This is necessary to put the first query in condition + * to generate a correct value by calculating the difference + * from the beginning of the session. + */ + for (i = 0; i < session->num_prfcnts; i++) { + struct kbase_ipa_control_prfcnt *prfcnt = + &session->prfcnts[i]; + u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type, + prfcnt->select_idx); + prfcnt->latest_raw_value = raw_value; + } + } + +exit: + spin_unlock_irqrestore(&ipa_ctrl->lock, flags); + kbase_pm_context_idle(kbdev); + return ret; +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_register); + +int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client) +{ + struct kbase_ipa_control *ipa_ctrl; + struct kbase_ipa_control_session *session; + int ret = 0; + size_t i; + unsigned long flags; + bool new_config = false, valid_session = false; + + if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) { + dev_err(kbdev->dev, "%s: wrong input arguments", __func__); + return -EINVAL; + } + + kbase_pm_context_active(kbdev); + + ipa_ctrl = &kbdev->csf.ipa_control; + session = (struct kbase_ipa_control_session *)client; + + spin_lock_irqsave(&ipa_ctrl->lock, flags); + + for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { + if (session == &ipa_ctrl->sessions[i]) { + valid_session = true; + break; + } + } + + if (!valid_session) { + dev_err(kbdev->dev, "%s: invalid session handle", __func__); + ret = -EINVAL; + goto exit; + } + + if (ipa_ctrl->num_active_sessions == 0) { + dev_err(kbdev->dev, "%s: no active sessions found", __func__); + ret = -EINVAL; + goto exit; + } + + if (!session->active) { + dev_err(kbdev->dev, "%s: session is already inactive", + __func__); + ret = -EINVAL; + goto exit; + } + + for (i = 0; i < session->num_prfcnts; i++) { + struct kbase_ipa_control_prfcnt_config *prfcnt_config; + u8 type = session->prfcnts[i].type; + u8 idx = session->prfcnts[i].select_idx; + + prfcnt_config = &ipa_ctrl->blocks[type].select[idx]; + + if (!WARN_ON(prfcnt_config->refcount == 0)) { + prfcnt_config->refcount--; + if (prfcnt_config->refcount == 0) { + new_config = true; + ipa_ctrl->blocks[type].num_available_counters++; + } + } + } + + if (new_config) { + u64 select_config[KBASE_IPA_CORE_TYPE_NUM]; + + build_select_config(ipa_ctrl, select_config); + ret = apply_select_config(kbdev, select_config); + if (ret) + dev_err(kbdev->dev, + "%s: failed to apply SELECT configuration", + __func__); + } + + session->num_prfcnts = 0; + session->active = false; + ipa_ctrl->num_active_sessions--; + +exit: + spin_unlock_irqrestore(&ipa_ctrl->lock, flags); + kbase_pm_context_idle(kbdev); + return ret; +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister); + +int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, + u64 *values, size_t num_values, u64 *protected_time) +{ + struct kbase_ipa_control *ipa_ctrl; + struct kbase_ipa_control_session *session; + size_t i; + unsigned long flags; + bool gpu_ready; + + if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) || + WARN_ON(values == NULL)) { + dev_err(kbdev->dev, "%s: wrong input arguments", __func__); + return -EINVAL; + } + + ipa_ctrl = &kbdev->csf.ipa_control; + session = (struct kbase_ipa_control_session *)client; + + if (WARN_ON(num_values < session->num_prfcnts)) { + dev_err(kbdev->dev, + "%s: not enough space (%zu) to return all counter values (%zu)", + __func__, num_values, session->num_prfcnts); + return -EINVAL; + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + gpu_ready = kbdev->pm.backend.gpu_ready; + + for (i = 0; i < session->num_prfcnts; i++) { + struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[i]; + + calc_prfcnt_delta(kbdev, prfcnt, gpu_ready); + /* Return all the accumulated difference */ + values[i] = prfcnt->accumulated_diff; + prfcnt->accumulated_diff = 0; + } + + if (protected_time) { + u64 time_now = ktime_get_ns(); + + /* This is the amount of protected-mode time spent prior to + * the current protm period. + */ + *protected_time = session->protm_time; + + if (kbdev->protected_mode) { + *protected_time += + time_now - MAX(session->last_query_time, + ipa_ctrl->protm_start); + } + session->last_query_time = time_now; + session->protm_time = 0; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + for (i = session->num_prfcnts; i < num_values; i++) + values[i] = 0; + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_query); + +void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + size_t session_idx; + int ret; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* GPU should still be ready for use when this function gets called */ + WARN_ON(!kbdev->pm.backend.gpu_ready); + + /* Interrupts are already disabled and interrupt state is also saved */ + spin_lock(&ipa_ctrl->lock); + + /* First disable the automatic sampling through TIMER */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0); + ret = wait_status(kbdev, STATUS_TIMER_ENABLED); + if (ret) { + dev_err(kbdev->dev, + "Wait for disabling of IPA control timer failed: %d", + ret); + } + + /* Now issue the manual SAMPLE command */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE); + ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); + if (ret) { + dev_err(kbdev->dev, + "Wait for the completion of manual sample failed: %d", + ret); + } + + for (session_idx = 0; session_idx < ipa_ctrl->num_active_sessions; + session_idx++) { + struct kbase_ipa_control_session *session = + &ipa_ctrl->sessions[session_idx]; + size_t i; + + for (i = 0; i < session->num_prfcnts; i++) { + struct kbase_ipa_control_prfcnt *prfcnt = + &session->prfcnts[i]; + + calc_prfcnt_delta(kbdev, prfcnt, true); + } + } + + spin_unlock(&ipa_ctrl->lock); +} + +void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + int ret; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* GPU should have become ready for use when this function gets called */ + WARN_ON(!kbdev->pm.backend.gpu_ready); + + /* Interrupts are already disabled and interrupt state is also saved */ + spin_lock(&ipa_ctrl->lock); + + /* Re-issue the APPLY command, this is actually needed only for CSHW */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY); + ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE); + if (ret) { + dev_err(kbdev->dev, + "Wait for the completion of apply command failed: %d", + ret); + } + + /* Re-enable the timer for periodic sampling */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), + timer_value(ipa_ctrl->cur_gpu_rate)); + + spin_unlock(&ipa_ctrl->lock); +} + +void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev) +{ + /* A soft reset is treated as a power down */ + kbase_ipa_control_handle_gpu_power_off(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_pre); + +void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + int ret; + u32 status; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + /* GPU should have become ready for use when this function gets called */ + WARN_ON(!kbdev->pm.backend.gpu_ready); + + /* Interrupts are already disabled and interrupt state is also saved */ + spin_lock(&ipa_ctrl->lock); + + /* Check the status reset bit is set before acknowledging it */ + status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + if (status & STATUS_RESET) { + /* Acknowledge the reset command */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK); + ret = wait_status(kbdev, STATUS_RESET); + if (ret) { + dev_err(kbdev->dev, + "Wait for the reset ack command failed: %d", + ret); + } + } + + spin_unlock(&ipa_ctrl->lock); + + kbase_ipa_control_handle_gpu_power_on(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post); + +#if MALI_UNIT_TEST +void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, + u32 clk_index, u32 clk_rate_hz) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + struct kbase_ipa_control_listener_data *listener_data = + ipa_ctrl->rtm_listener_data; + + kbase_ipa_control_rate_change_notify(&listener_data->listener, + clk_index, clk_rate_hz); +} +KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test); +#endif + +void kbase_ipa_control_protm_entered(struct kbase_device *kbdev) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + + lockdep_assert_held(&kbdev->hwaccess_lock); + ipa_ctrl->protm_start = ktime_get_ns(); +} + +void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) +{ + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + size_t i; + u64 time_now = ktime_get_ns(); + u32 status; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + for (i = 0; i < ipa_ctrl->num_active_sessions; i++) { + struct kbase_ipa_control_session *session = + &ipa_ctrl->sessions[i]; + u64 protm_time = time_now - MAX(session->last_query_time, + ipa_ctrl->protm_start); + + session->protm_time += protm_time; + } + + /* Acknowledge the protected_mode bit in the IPA_CONTROL STATUS + * register + */ + status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS)); + if (status & STATUS_PROTECTED_MODE) { + int ret; + + /* Acknowledge the protm command */ + kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), + COMMAND_PROTECTED_ACK); + ret = wait_status(kbdev, STATUS_PROTECTED_MODE); + if (ret) { + dev_err(kbdev->dev, + "Wait for the protm ack command failed: %d", + ret); + } + } +} + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h new file mode 100644 index 0000000..348a52f --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/ipa_control/mali_kbase_csf_ipa_control.h @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_IPA_CONTROL_H_ +#define _KBASE_CSF_IPA_CONTROL_H_ + +#include + +/** + * Maximum index accepted to configure an IPA Control performance counter. + */ +#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3) + +/** + * struct kbase_ipa_control_perf_counter - Performance counter description + * + * @scaling_factor: Scaling factor by which the counter's value shall be + * multiplied. A scaling factor of 1 corresponds to units + * of 1 second if values are normalised by GPU frequency. + * @gpu_norm: Indicating whether counter values shall be normalized by + * GPU frequency. If true, returned values represent + * an interval of time expressed in seconds (when the scaling + * factor is set to 1). + * @type: Type of counter block for performance counter. + * @idx: Index of the performance counter inside the block. + * It may be dependent on GPU architecture. + * It cannot be greater than KBASE_IPA_CONTROL_CNT_MAX_IDX. + * + * This structure is used by clients of the IPA Control component to describe + * a performance counter that they intend to read. The counter is identified + * by block and index. In addition to that, the client also specifies how + * values shall be represented. Raw values are a number of GPU cycles; + * if normalized, they are divided by GPU frequency and become an interval + * of time expressed in seconds, since the GPU frequency is given in Hz. + * The client may specify a scaling factor to multiply counter values before + * they are divided by frequency, in case the unit of time of 1 second is + * too low in resolution. For instance: a scaling factor of 1000 implies + * that the returned value is a time expressed in milliseconds; a scaling + * factor of 1000 * 1000 implies that the returned value is a time expressed + * in microseconds. + */ +struct kbase_ipa_control_perf_counter { + u64 scaling_factor; + bool gpu_norm; + enum kbase_ipa_core_type type; + u8 idx; +}; + +/** + * kbase_ipa_control_init - Initialize the IPA Control component + * + * @kbdev: Pointer to Kbase device. + */ +void kbase_ipa_control_init(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_term - Terminate the IPA Control component + * + * @kbdev: Pointer to Kbase device. + */ +void kbase_ipa_control_term(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_register - Register a client to the IPA Control component + * + * @kbdev: Pointer to Kbase device. + * @perf_counters: Array of performance counters the client intends to read. + * For each counter the client specifies block, index, + * scaling factor and whether it must be normalized by GPU + * frequency. + * @num_counters: Number of performance counters. It cannot exceed the total + * number of counters that exist on the IPA Control interface. + * @client: Handle to an opaque structure set by IPA Control if + * the registration is successful. This handle identifies + * a client's session and shall be provided in its future + * queries. + * + * A client needs to subscribe to the IPA Control component by declaring which + * performance counters it intends to read, and specifying a scaling factor + * and whether normalization is requested for each performance counter. + * The function shall configure the IPA Control interface accordingly and start + * a session for the client that made the request. A unique handle is returned + * if registration is successful in order to identify the client's session + * and be used for future queries. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_control_register( + struct kbase_device *kbdev, + const struct kbase_ipa_control_perf_counter *perf_counters, + size_t num_counters, void **client); + +/** + * kbase_ipa_control_unregister - Unregister a client from IPA Control + * + * @kbdev: Pointer to kbase device. + * @client: Handle to an opaque structure that identifies the client session + * to terminate, as returned by kbase_ipa_control_register. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_control_unregister(struct kbase_device *kbdev, + const void *client); + +/** + * kbase_ipa_control_query - Query performance counters + * + * @kbdev: Pointer to kbase device. + * @client: Handle to an opaque structure that identifies the client + * session, as returned by kbase_ipa_control_register. + * @values: Array of values queried from performance counters, whose + * length depends on the number of counters requested at + * the time of registration. Values are scaled and normalized + * and represent the difference since the last query. + * @num_values: Number of entries in the array of values that has been + * passed by the caller. It must be at least equal to the + * number of performance counters the client registered itself + * to read. + * @protected_time: Time spent in protected mode since last query, + * expressed in nanoseconds. This pointer may be NULL if the + * client doesn't want to know about this. + * + * A client that has already opened a session by registering itself to read + * some performance counters may use this function to query the values of + * those counters. The values returned are normalized by GPU frequency if + * requested and then multiplied by the scaling factor provided at the time + * of registration. Values always represent a difference since the last query. + * + * Performance counters are not updated while the GPU operates in protected + * mode. For this reason, returned values may be unreliable if the GPU has + * been in protected mode since the last query. The function returns success + * in that case, but it also gives a measure of how much time has been spent + * in protected mode. + * + * Return: 0 on success, negative -errno on error + */ +int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client, + u64 *values, size_t num_values, + u64 *protected_time); + +/** + * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event + * + * @kbdev: Pointer to kbase device. + * + * This function is called after GPU has been powered and is ready for use. + * After the GPU power on, IPA Control component needs to ensure that the + * counters start incrementing again. + */ +void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_handle_gpu_power_off - Handle the GPU power off event + * + * @kbdev: Pointer to kbase device. + * + * This function is called just before the GPU is powered off when it is still + * ready for use. + * IPA Control component needs to be aware of the GPU power off so that it can + * handle the query from Clients appropriately and return meaningful values + * to them. + */ +void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_handle_gpu_reset_pre - Handle the pre GPU reset event + * + * @kbdev: Pointer to kbase device. + * + * This function is called when the GPU is about to be reset. + */ +void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_handle_gpu_reset_post - Handle the post GPU reset event + * + * @kbdev: Pointer to kbase device. + * + * This function is called after the GPU has been reset. + */ +void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev); + +#if MALI_UNIT_TEST +/** + * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change + * (only for testing) + * + * @kbdev: Pointer to kbase device. + * @clk_index: Index of the clock for which the change has occurred. + * @clk_rate_hz: Clock frequency(Hz). + * + * Notify the IPA Control component about a GPU rate change. + */ +void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, + u32 clk_index, u32 clk_rate_hz); +#endif /* MALI_UNIT_TEST */ + +/** + * kbase_ipa_control_protm_entered - Tell IPA_CONTROL that protected mode + * has been entered. + * + * @kbdev: Pointer to kbase device. + * + * This function provides a means through which IPA_CONTROL can be informed + * that the GPU has entered protected mode. Since the GPU cannot access + * performance counters while in this mode, this information is useful as + * it implies (a) the values of these registers cannot change, so theres no + * point trying to read them, and (b) IPA_CONTROL has a means through which + * to record the duration of time the GPU is in protected mode, which can + * then be forwarded on to clients, who may wish, for example, to assume + * that the GPU was busy 100% of the time while in this mode. + */ +void kbase_ipa_control_protm_entered(struct kbase_device *kbdev); + +/** + * kbase_ipa_control_protm_exited - Tell IPA_CONTROL that protected mode + * has been exited. + * + * @kbdev: Pointer to kbase device + * + * This function provides a means through which IPA_CONTROL can be informed + * that the GPU has exited from protected mode. + */ +void kbase_ipa_control_protm_exited(struct kbase_device *kbdev); + +#endif /* _KBASE_CSF_IPA_CONTROL_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c new file mode 100644 index 0000000..d49e343 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c @@ -0,0 +1,3069 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include "mali_kbase_csf.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include +#include +#include +#include +#include "mali_kbase_csf_tiler_heap.h" +#include +#include "mali_kbase_csf_timeout.h" +#include + +#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) +#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) +#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1) + +/** + * struct kbase_csf_event - CSF event callback. + * + * This structure belongs to the list of events which is part of a Kbase + * context, and describes a callback function with a custom parameter to pass + * to it when a CSF event is signalled. + * + * @link: Link to the rest of the list. + * @kctx: Pointer to the Kbase context this event belongs to. + * @callback: Callback function to call when a CSF event is signalled. + * @param: Parameter to pass to the callback function. + */ +struct kbase_csf_event { + struct list_head link; + struct kbase_context *kctx; + kbase_csf_event_callback *callback; + void *param; +}; + +const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = { + KBASE_QUEUE_GROUP_PRIORITY_HIGH, + KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, + KBASE_QUEUE_GROUP_PRIORITY_LOW, + KBASE_QUEUE_GROUP_PRIORITY_REALTIME +}; +const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = { + BASE_QUEUE_GROUP_PRIORITY_REALTIME, + BASE_QUEUE_GROUP_PRIORITY_HIGH, + BASE_QUEUE_GROUP_PRIORITY_MEDIUM, + BASE_QUEUE_GROUP_PRIORITY_LOW +}; + +static void put_user_pages_mmap_handle(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + unsigned long cookie_nr; + + lockdep_assert_held(&kctx->csf.lock); + + if (queue->handle == BASEP_MEM_INVALID_HANDLE) + return; + + cookie_nr = + PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); + + if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) { + /* free up cookie */ + kctx->csf.user_pages_info[cookie_nr] = NULL; + bitmap_set(kctx->csf.cookies, cookie_nr, 1); + } + + queue->handle = BASEP_MEM_INVALID_HANDLE; +} + +/* Reserve a cookie, to be returned as a handle to userspace for creating + * the CPU mapping of the pair of input/output pages and Hw doorbell page. + * Will return 0 in case of success otherwise negative on failure. + */ +static int get_user_pages_mmap_handle(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + unsigned long cookie, cookie_nr; + + lockdep_assert_held(&kctx->csf.lock); + + if (bitmap_empty(kctx->csf.cookies, + KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) { + dev_err(kctx->kbdev->dev, + "No csf cookies available for allocation!"); + return -ENOMEM; + } + + /* allocate a cookie */ + cookie_nr = find_first_bit(kctx->csf.cookies, + KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); + if (kctx->csf.user_pages_info[cookie_nr]) { + dev_err(kctx->kbdev->dev, + "Inconsistent state of csf cookies!"); + return -EINVAL; + } + kctx->csf.user_pages_info[cookie_nr] = queue; + bitmap_clear(kctx->csf.cookies, cookie_nr, 1); + + /* relocate to correct base */ + cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); + cookie <<= PAGE_SHIFT; + + queue->handle = (u64)cookie; + + return 0; +} + +static void gpu_munmap_user_io_pages(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + size_t num_pages = 2; + + kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, + reg->start_pfn, num_pages, MCU_AS_NR); + + WARN_ON(reg->flags & KBASE_REG_FREE); + + mutex_lock(&kctx->kbdev->csf.reg_lock); + kbase_remove_va_region(reg); + mutex_unlock(&kctx->kbdev->csf.reg_lock); +} + +static void init_user_io_pages(struct kbase_queue *queue) +{ + u32 *input_addr = (u32 *)(queue->user_io_addr); + u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + + input_addr[CS_INSERT_LO/4] = 0; + input_addr[CS_INSERT_HI/4] = 0; + + input_addr[CS_EXTRACT_INIT_LO/4] = 0; + input_addr[CS_EXTRACT_INIT_HI/4] = 0; + + output_addr[CS_EXTRACT_LO/4] = 0; + output_addr[CS_EXTRACT_HI/4] = 0; + + output_addr[CS_ACTIVE/4] = 0; +} + +/* Map the input/output pages in the shared interface segment of MCU firmware + * address space. + */ +static int gpu_mmap_user_io_pages(struct kbase_device *kbdev, + struct tagged_addr *phys, struct kbase_va_region *reg) +{ + unsigned long mem_flags = KBASE_REG_GPU_RD; + const size_t num_pages = 2; + int ret; + +#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ + (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) + mem_flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); +#else + if (kbdev->system_coherency == COHERENCY_NONE) { + mem_flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + } else { + mem_flags |= KBASE_REG_SHARE_BOTH | + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + } +#endif + + mutex_lock(&kbdev->csf.reg_lock); + ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1); + reg->flags &= ~KBASE_REG_FREE; + mutex_unlock(&kbdev->csf.reg_lock); + + if (ret) + return ret; + + /* Map input page */ + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, + reg->start_pfn, &phys[0], + 1, mem_flags, MCU_AS_NR, + KBASE_MEM_GROUP_CSF_IO); + if (ret) + goto bad_insert; + + /* Map output page, it needs rw access */ + mem_flags |= KBASE_REG_GPU_WR; + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, + reg->start_pfn + 1, &phys[1], + 1, mem_flags, MCU_AS_NR, + KBASE_MEM_GROUP_CSF_IO); + if (ret) + goto bad_insert_output_page; + + return 0; + +bad_insert_output_page: + kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, + reg->start_pfn, 1, MCU_AS_NR); +bad_insert: + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(reg); + mutex_unlock(&kbdev->csf.reg_lock); + + return ret; +} + +static void kernel_unmap_user_io_pages(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + const size_t num_pages = 2; + + kbase_gpu_vm_lock(kctx); + + vunmap(queue->user_io_addr); + + WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages)); + atomic_sub(num_pages, &kctx->permanent_mapped_pages); + + kbase_gpu_vm_unlock(kctx); +} + +static int kernel_map_user_io_pages(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + struct page *page_list[2]; + pgprot_t cpu_map_prot; + int ret = 0; + size_t i; + + kbase_gpu_vm_lock(kctx); + + if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES - + atomic_read(&kctx->permanent_mapped_pages))) { + ret = -ENOMEM; + goto unlock; + } + + /* The pages are mapped to Userspace also, so use the same mapping + * attributes as used inside the CPU page fault handler. + */ +#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ + (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) + cpu_map_prot = pgprot_device(PAGE_KERNEL); +#else + if (kctx->kbdev->system_coherency == COHERENCY_NONE) + cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); + else + cpu_map_prot = PAGE_KERNEL; +#endif + + for (i = 0; i < ARRAY_SIZE(page_list); i++) + page_list[i] = as_page(queue->phys[i]); + + queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot); + + if (!queue->user_io_addr) + ret = -ENOMEM; + else + atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages); + +unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +static void term_queue_group(struct kbase_queue_group *group); +static void get_queue(struct kbase_queue *queue); +static void release_queue(struct kbase_queue *queue); + +/** + * kbase_csf_free_command_stream_user_pages() - Free the resources allocated + * for a queue at the time of bind. + * + * @kctx: Address of the kbase context within which the queue was created. + * @queue: Pointer to the queue to be unlinked. + * + * This function will free the pair of physical pages allocated for a GPU + * command queue, and also release the hardware doorbell page, that were mapped + * into the process address space to enable direct submission of commands to + * the hardware. Also releases the reference taken on the queue when the mapping + * was created. + * + * This function will be called only when the mapping is being removed and + * so the resources for queue will not get freed up until the mapping is + * removed even though userspace could have terminated the queue. + * Kernel will ensure that the termination of Kbase context would only be + * triggered after the mapping is removed. + * + * If an explicit or implicit unbind was missed by the userspace then the + * mapping will persist. On process exit kernel itself will remove the mapping. + */ +static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + const size_t num_pages = 2; + + gpu_munmap_user_io_pages(kctx, queue->reg); + kernel_unmap_user_io_pages(kctx, queue); + + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + num_pages, queue->phys, true, false); + + kfree(queue->reg); + queue->reg = NULL; + + /* If the queue has already been terminated by userspace + * then the ref count for queue object will drop to 0 here. + */ + release_queue(queue); +} + +int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_va_region *reg; + const size_t num_pages = 2; + int ret; + + lockdep_assert_held(&kctx->csf.lock); + + reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, + num_pages, KBASE_REG_ZONE_MCU_SHARED); + if (!reg) + return -ENOMEM; + + ret = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + num_pages, queue->phys, false); + + if (ret != num_pages) + goto phys_alloc_failed; + + ret = kernel_map_user_io_pages(kctx, queue); + if (ret) + goto kernel_map_failed; + + init_user_io_pages(queue); + + ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg); + if (ret) + goto gpu_mmap_failed; + + queue->reg = reg; + + mutex_lock(&kbdev->csf.reg_lock); + if (kbdev->csf.db_file_offsets > + (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1)) + kbdev->csf.db_file_offsets = 0; + + queue->db_file_offset = kbdev->csf.db_file_offsets; + kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES; + + WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n"); + /* This is the second reference taken on the queue object and + * would be dropped only when the IO mapping is removed either + * explicitly by userspace or implicitly by kernel on process exit. + */ + get_queue(queue); + queue->bind_state = KBASE_CSF_QUEUE_BOUND; + mutex_unlock(&kbdev->csf.reg_lock); + + return 0; + +gpu_mmap_failed: + kernel_unmap_user_io_pages(kctx, queue); + +kernel_map_failed: + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], + num_pages, queue->phys, false, false); + +phys_alloc_failed: + kfree(reg); + + return -ENOMEM; +} + +static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx, + u8 group_handle) +{ + uint index = group_handle; + + lockdep_assert_held(&kctx->csf.lock); + + if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) { + if (WARN_ON(kctx->csf.queue_groups[index]->handle != index)) + return NULL; + return kctx->csf.queue_groups[index]; + } + + return NULL; +} + +int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, + u8 group_handle) +{ + struct kbase_queue_group *group; + + mutex_lock(&kctx->csf.lock); + group = find_queue_group(kctx, group_handle); + mutex_unlock(&kctx->csf.lock); + + return group ? 0 : -EINVAL; +} + +static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr) +{ + struct kbase_queue *queue; + + lockdep_assert_held(&kctx->csf.lock); + + list_for_each_entry(queue, &kctx->csf.queue_list, link) { + if (base_addr == queue->base_addr) + return queue; + } + + return NULL; +} + +static void get_queue(struct kbase_queue *queue) +{ + WARN_ON(!atomic_inc_not_zero(&queue->refcount)); +} + +static void release_queue(struct kbase_queue *queue) +{ + lockdep_assert_held(&queue->kctx->csf.lock); + + WARN_ON(atomic_read(&queue->refcount) <= 0); + + if (atomic_dec_and_test(&queue->refcount)) { + /* The queue can't still be on the per context list. */ + WARN_ON(!list_empty(&queue->link)); + WARN_ON(queue->group); + kfree(queue); + } +} + +static void oom_event_worker(struct work_struct *data); +static void fatal_event_worker(struct work_struct *data); + +/* Between reg and reg_ex, one and only one must be null */ +static int csf_queue_register_internal(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register *reg, + struct kbase_ioctl_cs_queue_register_ex *reg_ex) +{ + struct kbase_queue *queue; + int ret = 0; + struct kbase_va_region *region; + u64 queue_addr; + size_t queue_size; + + /* Only one pointer expected, otherwise coding error */ + if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) { + dev_err(kctx->kbdev->dev, + "Error, one and only one param-ptr expected!"); + return -EINVAL; + } + + /* struct kbase_ioctl_cs_queue_register_ex contains a full + * struct kbase_ioctl_cs_queue_register at the start address. So + * the pointer can be safely cast to pointing to a + * kbase_ioctl_cs_queue_register object. + */ + if (reg_ex) + reg = (struct kbase_ioctl_cs_queue_register *)reg_ex; + + /* Validate the queue priority */ + if (reg->priority > BASE_QUEUE_MAX_PRIORITY) + return -EINVAL; + + queue_addr = reg->buffer_gpu_addr; + queue_size = reg->buffer_size >> PAGE_SHIFT; + + mutex_lock(&kctx->csf.lock); + + /* Check if queue is already registered */ + if (find_queue(kctx, queue_addr) != NULL) { + ret = -EINVAL; + goto out; + } + + /* Check if the queue address is valid */ + kbase_gpu_vm_lock(kctx); + region = kbase_region_tracker_find_region_enclosing_address(kctx, + queue_addr); + + if (kbase_is_region_invalid_or_free(region)) { + ret = -ENOENT; + goto out_unlock_vm; + } + + if (queue_size > (region->nr_pages - + ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) { + ret = -EINVAL; + goto out_unlock_vm; + } + + /* Check address validity on cs_trace buffer etc. Don't care + * if not enabled (i.e. when size is 0). + */ + if (reg_ex && reg_ex->ex_buffer_size) { + int buf_pages = (reg_ex->ex_buffer_size + + (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT; + + region = kbase_region_tracker_find_region_enclosing_address( + kctx, reg_ex->ex_buffer_base); + if (kbase_is_region_invalid_or_free(region)) { + ret = -ENOENT; + goto out_unlock_vm; + } + + if (buf_pages > (region->nr_pages - + ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - + region->start_pfn))) { + ret = -EINVAL; + goto out_unlock_vm; + } + + region = kbase_region_tracker_find_region_enclosing_address( + kctx, reg_ex->ex_offset_var_addr); + if (kbase_is_region_invalid_or_free(region)) { + ret = -ENOENT; + goto out_unlock_vm; + } + } + + queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL); + + if (!queue) { + ret = -ENOMEM; + goto out_unlock_vm; + } + + queue->kctx = kctx; + queue->base_addr = queue_addr; + queue->queue_reg = region; + queue->size = (queue_size << PAGE_SHIFT); + queue->csi_index = KBASEP_IF_NR_INVALID; + queue->enabled = false; + + queue->priority = reg->priority; + atomic_set(&queue->refcount, 1); + + queue->group = NULL; + queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; + queue->handle = BASEP_MEM_INVALID_HANDLE; + queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; + + queue->status_wait = 0; + queue->sync_ptr = 0; + queue->sync_value = 0; + + queue->sb_status = 0; + queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; + + INIT_LIST_HEAD(&queue->link); + INIT_LIST_HEAD(&queue->error.link); + INIT_WORK(&queue->oom_event_work, oom_event_worker); + INIT_WORK(&queue->fatal_event_work, fatal_event_worker); + list_add(&queue->link, &kctx->csf.queue_list); + + region->flags |= KBASE_REG_NO_USER_FREE; + + /* Initialize the cs_trace configuration parameters, When buffer_size + * is 0, trace is disabled. Here we only update the fields when + * enabled, otherwise leave them as default zeros. + */ + if (reg_ex && reg_ex->ex_buffer_size) { + u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET( + 0, reg_ex->ex_event_size); + cfg = CS_INSTR_CONFIG_EVENT_STATE_SET( + cfg, reg_ex->ex_event_state); + + queue->trace_cfg = cfg; + queue->trace_buffer_size = reg_ex->ex_buffer_size; + queue->trace_buffer_base = reg_ex->ex_buffer_base; + queue->trace_offset_ptr = reg_ex->ex_offset_var_addr; + } + +out_unlock_vm: + kbase_gpu_vm_unlock(kctx); +out: + mutex_unlock(&kctx->csf.lock); + + return ret; +} + +int kbase_csf_queue_register(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register *reg) +{ + return csf_queue_register_internal(kctx, reg, NULL); +} + +int kbase_csf_queue_register_ex(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register_ex *reg) +{ + struct kbase_csf_global_iface const *const iface = + &kctx->kbdev->csf.global_iface; + u32 const glb_version = iface->version; + u32 instr = iface->instr_features; + u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr); + u32 min_buf_size = (1u << reg->ex_event_size) * + GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr); + + /* If cs_trace_command not supported, the call fails */ + if (glb_version < kbase_csf_interface_version(1, 1, 0)) + return -EINVAL; + + /* Validate the cs_trace configuration parameters */ + if (reg->ex_buffer_size && + ((reg->ex_event_size > max_size) || + (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) || + (reg->ex_buffer_size < min_buf_size))) + return -EINVAL; + + return csf_queue_register_internal(kctx, NULL, reg); +} + +static void unbind_queue(struct kbase_context *kctx, + struct kbase_queue *queue); + +void kbase_csf_queue_terminate(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_terminate *term) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_queue *queue; + int err; + bool reset_prevented = false; + + err = kbase_reset_gpu_prevent_and_wait(kbdev); + if (err) + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless", + term->buffer_gpu_addr); + else + reset_prevented = true; + + mutex_lock(&kctx->csf.lock); + queue = find_queue(kctx, term->buffer_gpu_addr); + + if (queue) { + unsigned long flags; + + /* As the GPU queue has been terminated by the + * user space, undo the actions that were performed when the + * queue was registered i.e. remove the queue from the per + * context list & release the initial reference. The subsequent + * lookups for the queue in find_queue() would fail. + */ + list_del_init(&queue->link); + + /* Stop the CSI to which queue was bound */ + unbind_queue(kctx, queue); + + kbase_gpu_vm_lock(kctx); + if (!WARN_ON(!queue->queue_reg)) { + /* After this the Userspace would be able to free the + * memory for GPU queue. In case the Userspace missed + * terminating the queue, the cleanup will happen on + * context termination where teardown of region tracker + * would free up the GPU queue memory. + */ + queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE; + } + kbase_gpu_vm_unlock(kctx); + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + dev_dbg(kctx->kbdev->dev, + "Remove any pending command queue fatal from context %pK\n", + (void *)kctx); + list_del_init(&queue->error.link); + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); + + release_queue(queue); + } + + mutex_unlock(&kctx->csf.lock); + if (reset_prevented) + kbase_reset_gpu_allow(kbdev); +} + +int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind) +{ + struct kbase_queue *queue; + struct kbase_queue_group *group; + u8 max_streams; + int ret = -EINVAL; + + mutex_lock(&kctx->csf.lock); + + group = find_queue_group(kctx, bind->in.group_handle); + queue = find_queue(kctx, bind->in.buffer_gpu_addr); + + if (!group || !queue) + goto out; + + /* For the time being, all CSGs have the same number of CSs + * so we check CSG 0 for this number + */ + max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num; + + if (bind->in.csi_index >= max_streams) + goto out; + + if (group->run_state == KBASE_CSF_GROUP_TERMINATED) + goto out; + + if (queue->group || group->bound_queues[bind->in.csi_index]) + goto out; + + ret = get_user_pages_mmap_handle(kctx, queue); + if (ret) + goto out; + + bind->out.mmap_handle = queue->handle; + group->bound_queues[bind->in.csi_index] = queue; + queue->group = group; + queue->csi_index = bind->in.csi_index; + queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS; + +out: + mutex_unlock(&kctx->csf.lock); + + return ret; +} + +static struct kbase_queue_group *get_bound_queue_group( + struct kbase_queue *queue) +{ + struct kbase_context *kctx = queue->kctx; + struct kbase_queue_group *group; + + if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND) + return NULL; + + if (!queue->group) + return NULL; + + if (queue->csi_index == KBASEP_IF_NR_INVALID) { + dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n"); + return NULL; + } + + group = queue->group; + + if (group->bound_queues[queue->csi_index] != queue) { + dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n"); + return NULL; + } + + return group; +} + +void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) +{ + if (WARN_ON(slot < 0)) + return; + + kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot)); +} + +void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, + u32 slot_bitmap) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + const u32 allowed_bitmap = + (u32) ((1U << kbdev->csf.global_iface.group_num) - 1); + u32 value; + + if (WARN_ON(slot_bitmap > allowed_bitmap)) + return; + + value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK); + value ^= slot_bitmap; + kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value, + slot_bitmap); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, + struct kbase_queue *queue) +{ + mutex_lock(&kbdev->csf.reg_lock); + + if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) + kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr); + + mutex_unlock(&kbdev->csf.reg_lock); +} + +void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, + int csi_index, int csg_nr, + bool ring_csg_doorbell) +{ + struct kbase_csf_cmd_stream_group_info *ginfo; + u32 value; + + if (WARN_ON(csg_nr < 0) || + WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) + return; + + ginfo = &kbdev->csf.global_iface.groups[csg_nr]; + + if (WARN_ON(csi_index < 0) || + WARN_ON(csi_index >= ginfo->stream_num)) + return; + + value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK); + value ^= (1 << csi_index); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value, + 1 << csi_index); + + if (likely(ring_csg_doorbell)) + kbase_csf_ring_csg_doorbell(kbdev, csg_nr); +} + +int kbase_csf_queue_kick(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_kick *kick) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_queue_group *group; + struct kbase_queue *queue; + int err = 0; + + err = kbase_reset_gpu_prevent_and_wait(kbdev); + if (err) { + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)", + kick->buffer_gpu_addr); + return err; + } + + mutex_lock(&kctx->csf.lock); + queue = find_queue(kctx, kick->buffer_gpu_addr); + if (!queue) + err = -EINVAL; + + if (!err) { + group = get_bound_queue_group(queue); + if (!group) { + dev_err(kctx->kbdev->dev, "queue not bound\n"); + err = -EINVAL; + } + } + + if (!err) + err = kbase_csf_scheduler_queue_start(queue); + mutex_unlock(&kctx->csf.lock); + kbase_reset_gpu_allow(kbdev); + + return err; +} + +static void unbind_stopped_queue(struct kbase_context *kctx, + struct kbase_queue *queue) +{ + lockdep_assert_held(&kctx->csf.lock); + + if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags); + bitmap_clear(queue->group->protm_pending_bitmap, + queue->csi_index, 1); + KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR, + queue->group, queue, queue->group->protm_pending_bitmap[0]); + queue->group->bound_queues[queue->csi_index] = NULL; + queue->group = NULL; + kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags); + + put_user_pages_mmap_handle(kctx, queue); + queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; + } +} +/** + * unbind_queue() - Remove the linkage between a GPU command queue and the group + * to which it was bound or being bound. + * + * @kctx: Address of the kbase context within which the queue was created. + * @queue: Pointer to the queue to be unlinked. + * + * This function will also send the stop request to firmware for the CS + * if the group to which the GPU command queue was bound is scheduled. + * + * This function would be called when :- + * - queue is being unbound. This would happen when the IO mapping + * created on bind is removed explicitly by userspace or the process + * is getting exited. + * - queue group is being terminated which still has queues bound + * to it. This could happen on an explicit terminate request from userspace + * or when the kbase context is being terminated. + * - queue is being terminated without completing the bind operation. + * This could happen if either the queue group is terminated + * after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation + * to create the IO mapping is initiated. + * - There is a failure in executing the 2nd part of bind operation, inside the + * mmap handler, which creates the IO mapping for queue. + */ + +static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue) +{ + kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); + lockdep_assert_held(&kctx->csf.lock); + + if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) { + if (queue->bind_state == KBASE_CSF_QUEUE_BOUND) + kbase_csf_scheduler_queue_stop(queue); + + unbind_stopped_queue(kctx, queue); + } +} + +void kbase_csf_queue_unbind(struct kbase_queue *queue) +{ + struct kbase_context *kctx = queue->kctx; + + lockdep_assert_held(&kctx->csf.lock); + + /* As the process itself is exiting, the termination of queue group can + * be done which would be much faster than stopping of individual + * queues. This would ensure a faster exit for the process especially + * in the case where CSI gets stuck. + * The CSI STOP request will wait for the in flight work to drain + * whereas CSG TERM request would result in an immediate abort or + * cancellation of the pending work. + */ + if (current->flags & PF_EXITING) { + struct kbase_queue_group *group = get_bound_queue_group(queue); + + if (group) + term_queue_group(group); + + WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND); + } else { + unbind_queue(kctx, queue); + } + + /* Free the resources, if allocated for this queue. */ + if (queue->reg) + kbase_csf_free_command_stream_user_pages(kctx, queue); +} + +void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue) +{ + struct kbase_context *kctx = queue->kctx; + + lockdep_assert_held(&kctx->csf.lock); + + WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND); + unbind_stopped_queue(kctx, queue); + + /* Free the resources, if allocated for this queue. */ + if (queue->reg) + kbase_csf_free_command_stream_user_pages(kctx, queue); +} + +/** + * find_free_group_handle() - Find a free handle for a queue group + * + * @kctx: Address of the kbase context within which the queue group + * is to be created. + * + * Return: a queue group handle on success, or a negative error code on failure. + */ +static int find_free_group_handle(struct kbase_context *const kctx) +{ + /* find the available index in the array of CSGs per this context */ + int idx, group_handle = -ENOMEM; + + lockdep_assert_held(&kctx->csf.lock); + + for (idx = 0; + (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0); + idx++) { + if (!kctx->csf.queue_groups[idx]) + group_handle = idx; + } + + return group_handle; +} + +/** + * iface_has_enough_streams() - Check that at least one CSG supports + * a given number of CS + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @cs_min: Minimum number of CSs required. + * + * Return: true if at least one CSG supports the given number + * of CSs (or more); otherwise false. + */ +static bool iface_has_enough_streams(struct kbase_device *const kbdev, + u32 const cs_min) +{ + bool has_enough = false; + struct kbase_csf_cmd_stream_group_info *const groups = + kbdev->csf.global_iface.groups; + const u32 group_num = kbdev->csf.global_iface.group_num; + u32 i; + + for (i = 0; (i < group_num) && !has_enough; i++) { + if (groups[i].stream_num >= cs_min) + has_enough = true; + } + + return has_enough; +} + +/** + * create_normal_suspend_buffer() - Create normal-mode suspend buffer per + * queue group + * + * @kctx: Pointer to kbase context where the queue group is created at + * @s_buf: Pointer to suspend buffer that is attached to queue group + * + * Return: 0 if suspend buffer is successfully allocated and reflected to GPU + * MMU page table. Otherwise -ENOMEM. + */ +static int create_normal_suspend_buffer(struct kbase_context *const kctx, + struct kbase_normal_suspend_buffer *s_buf) +{ + struct kbase_va_region *reg = NULL; + const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; + const size_t nr_pages = + PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); + int err = 0; + + lockdep_assert_held(&kctx->csf.lock); + + /* Allocate and initialize Region Object */ + reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0, + nr_pages, KBASE_REG_ZONE_MCU_SHARED); + + if (!reg) + return -ENOMEM; + + s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL); + + if (!s_buf->phy) { + err = -ENOMEM; + goto phy_alloc_failed; + } + + /* Get physical page for a normal suspend buffer */ + err = kbase_mem_pool_alloc_pages( + &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + nr_pages, &s_buf->phy[0], false); + + if (err < 0) + goto phy_pages_alloc_failed; + + /* Insert Region Object into rbtree and make virtual address available + * to map it to physical page + */ + mutex_lock(&kctx->kbdev->csf.reg_lock); + err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1); + reg->flags &= ~KBASE_REG_FREE; + mutex_unlock(&kctx->kbdev->csf.reg_lock); + + if (err) + goto add_va_region_failed; + + /* Update MMU table */ + err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, + reg->start_pfn, &s_buf->phy[0], + nr_pages, mem_flags, + MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); + if (err) + goto mmu_insert_failed; + + s_buf->reg = reg; + + return 0; + +mmu_insert_failed: + mutex_lock(&kctx->kbdev->csf.reg_lock); + WARN_ON(kbase_remove_va_region(reg)); + mutex_unlock(&kctx->kbdev->csf.reg_lock); + +add_va_region_failed: + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages, + &s_buf->phy[0], false, false); + +phy_pages_alloc_failed: + kfree(s_buf->phy); +phy_alloc_failed: + kfree(reg); + + return err; +} + +/** + * create_protected_suspend_buffer() - Create protected-mode suspend buffer + * per queue group + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @s_buf: Pointer to suspend buffer that is attached to queue group + * + * Return: 0 if suspend buffer is successfully allocated and reflected to GPU + * MMU page table. Otherwise -ENOMEM. + */ +static int create_protected_suspend_buffer(struct kbase_device *const kbdev, + struct kbase_protected_suspend_buffer *s_buf) +{ + struct kbase_va_region *reg = NULL; + struct tagged_addr *phys = NULL; + const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR; + const size_t nr_pages = + PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + int err = 0; + + /* Allocate and initialize Region Object */ + reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, + nr_pages, KBASE_REG_ZONE_MCU_SHARED); + + if (!reg) + return -ENOMEM; + + phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL); + if (!phys) { + err = -ENOMEM; + goto phy_alloc_failed; + } + + s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, + nr_pages); + if (s_buf->pma == NULL) { + err = -ENOMEM; + goto pma_alloc_failed; + } + + /* Insert Region Object into rbtree and make virtual address available + * to map it to physical page + */ + mutex_lock(&kbdev->csf.reg_lock); + err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1); + reg->flags &= ~KBASE_REG_FREE; + mutex_unlock(&kbdev->csf.reg_lock); + + if (err) + goto add_va_region_failed; + + /* Update MMU table */ + err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, + reg->start_pfn, phys, + nr_pages, mem_flags, MCU_AS_NR, + KBASE_MEM_GROUP_CSF_FW); + if (err) + goto mmu_insert_failed; + + s_buf->reg = reg; + kfree(phys); + return 0; + +mmu_insert_failed: + mutex_lock(&kbdev->csf.reg_lock); + WARN_ON(kbase_remove_va_region(reg)); + mutex_unlock(&kbdev->csf.reg_lock); + +add_va_region_failed: + kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages); +pma_alloc_failed: + kfree(phys); +phy_alloc_failed: + kfree(reg); + + return err; +} + +static void timer_event_worker(struct work_struct *data); +static void protm_event_worker(struct work_struct *data); +static void term_normal_suspend_buffer(struct kbase_context *const kctx, + struct kbase_normal_suspend_buffer *s_buf); + +/** + * create_suspend_buffers - Setup normal and protected mode + * suspend buffers. + * + * @kctx: Address of the kbase context within which the queue group + * is to be created. + * @group: Pointer to GPU command queue group data. + * + * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM. + */ +static int create_suspend_buffers(struct kbase_context *const kctx, + struct kbase_queue_group * const group) +{ + int err = 0; + + if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) { + dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n"); + return -ENOMEM; + } + + if (kctx->kbdev->csf.pma_dev) { + err = create_protected_suspend_buffer(kctx->kbdev, + &group->protected_suspend_buf); + if (err) { + term_normal_suspend_buffer(kctx, + &group->normal_suspend_buf); + dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n"); + } + } else { + group->protected_suspend_buf.reg = NULL; + } + + return err; +} + +/** + * generate_group_uid() - Makes an ID unique to all kernel base devices + * and contexts, for a queue group and CSG. + * + * Return: A unique ID in the form of an unsigned 32-bit integer + */ +static u32 generate_group_uid(void) +{ + /* use first KBase device to store max UID */ + struct kbase_device *kbdev = kbase_find_device(-1); + u32 uid = 1; + + if (kbdev) + uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices); + else + WARN(1, "NULL kbase device pointer in group UID generation"); + + return uid; +} + +/** + * create_queue_group() - Create a queue group + * + * @kctx: Address of the kbase context within which the queue group + * is to be created. + * @create: Address of a structure which contains details of the + * queue group which is to be created. + * + * Return: a queue group handle on success, or a negative error code on failure. + */ +static int create_queue_group(struct kbase_context *const kctx, + union kbase_ioctl_cs_queue_group_create *const create) +{ + int group_handle = find_free_group_handle(kctx); + + if (group_handle < 0) { + dev_err(kctx->kbdev->dev, + "All queue group handles are already in use\n"); + } else { + struct kbase_queue_group * const group = + kmalloc(sizeof(struct kbase_queue_group), + GFP_KERNEL); + + lockdep_assert_held(&kctx->csf.lock); + + if (!group) { + dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n"); + group_handle = -ENOMEM; + } else { + int err = 0; + + group->kctx = kctx; + group->handle = group_handle; + group->csg_nr = KBASEP_CSG_NR_INVALID; + + group->tiler_mask = create->in.tiler_mask; + group->fragment_mask = create->in.fragment_mask; + group->compute_mask = create->in.compute_mask; + + group->tiler_max = create->in.tiler_max; + group->fragment_max = create->in.fragment_max; + group->compute_max = create->in.compute_max; + group->priority = kbase_csf_priority_queue_group_priority_to_relative( + kbase_csf_priority_check(kctx->kbdev, create->in.priority)); + group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; + group->faulted = false; + + group->group_uid = generate_group_uid(); + create->out.group_uid = group->group_uid; + + INIT_LIST_HEAD(&group->link); + INIT_LIST_HEAD(&group->link_to_schedule); + INIT_LIST_HEAD(&group->error_fatal.link); + INIT_LIST_HEAD(&group->error_timeout.link); + INIT_LIST_HEAD(&group->error_tiler_oom.link); + INIT_WORK(&group->timer_event_work, timer_event_worker); + INIT_WORK(&group->protm_event_work, protm_event_worker); + bitmap_zero(group->protm_pending_bitmap, + MAX_SUPPORTED_STREAMS_PER_GROUP); + + group->run_state = KBASE_CSF_GROUP_INACTIVE; + err = create_suspend_buffers(kctx, group); + + if (err < 0) { + kfree(group); + group_handle = err; + } else { + int j; + + kctx->csf.queue_groups[group_handle] = group; + for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP; + j++) + group->bound_queues[j] = NULL; + } + } + } + + return group_handle; +} + +int kbase_csf_queue_group_create(struct kbase_context *const kctx, + union kbase_ioctl_cs_queue_group_create *const create) +{ + int err = 0; + const u32 tiler_count = hweight64(create->in.tiler_mask); + const u32 fragment_count = hweight64(create->in.fragment_mask); + const u32 compute_count = hweight64(create->in.compute_mask); + + mutex_lock(&kctx->csf.lock); + + if ((create->in.tiler_max > tiler_count) || + (create->in.fragment_max > fragment_count) || + (create->in.compute_max > compute_count)) { + dev_err(kctx->kbdev->dev, + "Invalid maximum number of endpoints for a queue group\n"); + err = -EINVAL; + } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) { + dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n", + (unsigned int)create->in.priority); + err = -EINVAL; + } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) { + dev_err(kctx->kbdev->dev, + "No CSG has at least %d CSs\n", + create->in.cs_min); + err = -EINVAL; + } else { + /* For the CSG which satisfies the condition for having + * the needed number of CSs, check whether it also conforms + * with the requirements for at least one of its CSs having + * the iterator of the needed type + * (note: for CSF v1.0 all CSs in a CSG will have access to + * the same iterators) + */ + const int group_handle = create_queue_group(kctx, create); + + if (group_handle >= 0) + create->out.group_handle = group_handle; + else + err = group_handle; + } + + mutex_unlock(&kctx->csf.lock); + + return err; +} + +/** + * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group + * + * @kctx: Pointer to kbase context where queue group belongs to + * @s_buf: Pointer to queue group suspend buffer to be freed + */ +static void term_normal_suspend_buffer(struct kbase_context *const kctx, + struct kbase_normal_suspend_buffer *s_buf) +{ + const size_t nr_pages = + PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size); + + lockdep_assert_held(&kctx->csf.lock); + + WARN_ON(kbase_mmu_teardown_pages( + kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, + s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); + + WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); + + mutex_lock(&kctx->kbdev->csf.reg_lock); + WARN_ON(kbase_remove_va_region(s_buf->reg)); + mutex_unlock(&kctx->kbdev->csf.reg_lock); + + kbase_mem_pool_free_pages( + &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + nr_pages, &s_buf->phy[0], false, false); + + kfree(s_buf->phy); + s_buf->phy = NULL; + kfree(s_buf->reg); + s_buf->reg = NULL; +} + +/** + * term_protected_suspend_buffer() - Free normal-mode suspend buffer of + * queue group + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @s_buf: Pointer to queue group suspend buffer to be freed + */ +static void term_protected_suspend_buffer(struct kbase_device *const kbdev, + struct kbase_protected_suspend_buffer *s_buf) +{ + const size_t nr_pages = + PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size); + + WARN_ON(kbase_mmu_teardown_pages( + kbdev, &kbdev->csf.mcu_mmu, + s_buf->reg->start_pfn, nr_pages, MCU_AS_NR)); + + WARN_ON(s_buf->reg->flags & KBASE_REG_FREE); + + mutex_lock(&kbdev->csf.reg_lock); + WARN_ON(kbase_remove_va_region(s_buf->reg)); + mutex_unlock(&kbdev->csf.reg_lock); + + kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages); + s_buf->pma = NULL; + kfree(s_buf->reg); + s_buf->reg = NULL; +} + +void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + + /* Currently each group supports the same number of CS */ + u32 max_streams = + kctx->kbdev->csf.global_iface.groups[0].stream_num; + u32 i; + + lockdep_assert_held(&kctx->csf.lock); + + WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE && + group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); + + for (i = 0; i < max_streams; i++) { + struct kbase_queue *queue = + group->bound_queues[i]; + + /* The group is already being evicted from the scheduler */ + if (queue) + unbind_stopped_queue(kctx, queue); + } + + term_normal_suspend_buffer(kctx, &group->normal_suspend_buf); + if (kctx->kbdev->csf.pma_dev) + term_protected_suspend_buffer(kctx->kbdev, + &group->protected_suspend_buf); + + group->run_state = KBASE_CSF_GROUP_TERMINATED; +} + +/** + * term_queue_group - Terminate a GPU command queue group. + * + * @group: Pointer to GPU command queue group data. + * + * Terminates a GPU command queue group. From the userspace perspective the + * group will still exist but it can't bind new queues to it. Userspace can + * still add work in queues bound to the group but it won't be executed. (This + * is because the IO mapping created upon binding such queues is still intact.) + */ +static void term_queue_group(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + + kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev); + lockdep_assert_held(&kctx->csf.lock); + + /* Stop the group and evict it from the scheduler */ + kbase_csf_scheduler_group_deschedule(group); + + if (group->run_state == KBASE_CSF_GROUP_TERMINATED) + return; + + dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle); + + kbase_csf_term_descheduled_queue_group(group); +} + +static void cancel_queue_group_events(struct kbase_queue_group *group) +{ + cancel_work_sync(&group->timer_event_work); + cancel_work_sync(&group->protm_event_work); +} + +void kbase_csf_queue_group_terminate(struct kbase_context *kctx, + u8 group_handle) +{ + struct kbase_queue_group *group; + int err; + bool reset_prevented = false; + struct kbase_device *const kbdev = kctx->kbdev; + + err = kbase_reset_gpu_prevent_and_wait(kbdev); + if (err) + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless", + group_handle); + else + reset_prevented = true; + + mutex_lock(&kctx->csf.lock); + + group = find_queue_group(kctx, group_handle); + + if (group) { + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + + dev_dbg(kbdev->dev, + "Remove any pending group fatal error from context %pK\n", + (void *)group->kctx); + + list_del_init(&group->error_tiler_oom.link); + list_del_init(&group->error_timeout.link); + list_del_init(&group->error_fatal.link); + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); + + term_queue_group(group); + kctx->csf.queue_groups[group_handle] = NULL; + } + + mutex_unlock(&kctx->csf.lock); + if (reset_prevented) + kbase_reset_gpu_allow(kbdev); + + if (!group) + return; + + /* Cancel any pending event callbacks. If one is in progress + * then this thread waits synchronously for it to complete (which + * is why we must unlock the context first). We already ensured + * that no more callbacks can be enqueued by terminating the group. + */ + cancel_queue_group_events(group); + kfree(group); +} + +int kbase_csf_queue_group_suspend(struct kbase_context *kctx, + struct kbase_suspend_copy_buffer *sus_buf, + u8 group_handle) +{ + struct kbase_device *const kbdev = kctx->kbdev; + int err; + struct kbase_queue_group *group; + + err = kbase_reset_gpu_prevent_and_wait(kbdev); + if (err) { + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when suspending group %d", + group_handle); + return err; + } + mutex_lock(&kctx->csf.lock); + + group = find_queue_group(kctx, group_handle); + if (group) + err = kbase_csf_scheduler_group_copy_suspend_buf(group, + sus_buf); + else + err = -EINVAL; + + mutex_unlock(&kctx->csf.lock); + kbase_reset_gpu_allow(kbdev); + + return err; +} + +/** + * add_error() - Add an error to the list of errors to report to user space + * + * @kctx: Address of a base context associated with a GPU address space. + * @error: Address of the item to be added to the context's pending error list. + * @data: Error data to be returned to userspace. + * + * Does not wake up the event queue blocking a user thread in kbase_poll. This + * is to make it more efficient to add multiple errors. + * + * The added error must not already be on the context's list of errors waiting + * to be reported (e.g. because a previous error concerning the same object has + * not yet been reported). + */ +static void add_error(struct kbase_context *const kctx, + struct kbase_csf_notification *const error, + struct base_csf_notification const *const data) +{ + unsigned long flags; + + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(!error)) + return; + + if (WARN_ON(!data)) + return; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + + if (!WARN_ON(!list_empty(&error->link))) { + error->data = *data; + list_add_tail(&error->link, &kctx->csf.error_list); + dev_dbg(kctx->kbdev->dev, + "Added error %pK of type %d in context %pK\n", + (void *)error, data->type, (void *)kctx); + } + + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); +} + +void kbase_csf_add_group_fatal_error( + struct kbase_queue_group *const group, + struct base_gpu_queue_group_error const *const err_payload) +{ + struct base_csf_notification error; + + if (WARN_ON(!group)) + return; + + if (WARN_ON(!err_payload)) + return; + + error = (struct base_csf_notification) { + .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .handle = group->handle, + .error = *err_payload + } + } + }; + + add_error(group->kctx, &group->error_fatal, &error); +} + +void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct list_head evicted_groups; + struct kbase_queue_group *group; + int i; + + INIT_LIST_HEAD(&evicted_groups); + + mutex_lock(&kctx->csf.lock); + + kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups); + while (!list_empty(&evicted_groups)) { + group = list_first_entry(&evicted_groups, + struct kbase_queue_group, link); + + dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated", + kctx->tgid, kctx->id, group->handle); + kbase_csf_term_descheduled_queue_group(group); + list_del_init(&group->link); + } + + /* Acting on the queue groups that are pending to be terminated. */ + for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { + group = kctx->csf.queue_groups[i]; + if (group && + group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) + kbase_csf_term_descheduled_queue_group(group); + } + + mutex_unlock(&kctx->csf.lock); +} + +int kbase_csf_ctx_init(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int err = -ENOMEM; + + INIT_LIST_HEAD(&kctx->csf.event_callback_list); + INIT_LIST_HEAD(&kctx->csf.queue_list); + INIT_LIST_HEAD(&kctx->csf.link); + INIT_LIST_HEAD(&kctx->csf.error_list); + + spin_lock_init(&kctx->csf.event_lock); + kctx->csf.user_reg_vma = NULL; + mutex_lock(&kbdev->pm.lock); + /* The inode information for /dev/malixx file is not available at the + * time of device probe as the inode is created when the device node + * is created by udevd (through mknod). + */ + if (kctx->filp) { + if (!kbdev->csf.mali_file_inode) + kbdev->csf.mali_file_inode = kctx->filp->f_inode; + + /* inode is unique for a file */ + WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode); + } + mutex_unlock(&kbdev->pm.lock); + + /* Mark all the cookies as 'free' */ + bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); + + kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", + WQ_UNBOUND, 1); + + if (likely(kctx->csf.wq)) { + err = kbase_csf_scheduler_context_init(kctx); + + if (likely(!err)) { + err = kbase_csf_kcpu_queue_context_init(kctx); + + if (likely(!err)) { + err = kbase_csf_tiler_heap_context_init(kctx); + + if (likely(!err)) + mutex_init(&kctx->csf.lock); + else + kbase_csf_kcpu_queue_context_term(kctx); + } + + if (unlikely(err)) + kbase_csf_scheduler_context_term(kctx); + } + + if (unlikely(err)) + destroy_workqueue(kctx->csf.wq); + } + + return err; +} + +void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, + struct kbase_fault *fault) +{ + int gr; + bool reported = false; + struct base_gpu_queue_group_error err_payload; + int err; + struct kbase_device *kbdev; + + if (WARN_ON(!kctx)) + return; + + if (WARN_ON(!fault)) + return; + + kbdev = kctx->kbdev; + err = kbase_reset_gpu_try_prevent(kbdev); + /* Regardless of whether reset failed or is currently happening, exit + * early + */ + if (err) + return; + + err_payload = (struct base_gpu_queue_group_error) { + .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { + .fatal_group = { + .sideband = fault->addr, + .status = fault->status, + } + } + }; + + mutex_lock(&kctx->csf.lock); + + for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { + struct kbase_queue_group *const group = + kctx->csf.queue_groups[gr]; + + if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) { + term_queue_group(group); + kbase_csf_add_group_fatal_error(group, &err_payload); + reported = true; + } + } + + mutex_unlock(&kctx->csf.lock); + + if (reported) + kbase_event_wakeup(kctx); + + kbase_reset_gpu_allow(kbdev); +} + +void kbase_csf_ctx_term(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_as *as = NULL; + unsigned long flags; + u32 i; + int err; + bool reset_prevented = false; + + /* As the kbase context is terminating, its debugfs sub-directory would + * have been removed already and so would be the debugfs file created + * for queue groups & kcpu queues, hence no need to explicitly remove + * those debugfs files. + */ + kbase_csf_event_wait_remove_all(kctx); + + /* Wait for a GPU reset if it is happening, prevent it if not happening */ + err = kbase_reset_gpu_prevent_and_wait(kbdev); + if (err) + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless", + kctx->tgid, kctx->id); + else + reset_prevented = true; + + mutex_lock(&kctx->csf.lock); + /* Iterate through the queue groups that were not terminated by + * userspace and issue the term request to firmware for them. + */ + for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { + if (kctx->csf.queue_groups[i]) + term_queue_group(kctx->csf.queue_groups[i]); + } + mutex_unlock(&kctx->csf.lock); + + if (reset_prevented) + kbase_reset_gpu_allow(kbdev); + + /* Now that all queue groups have been terminated, there can be no + * more OoM or timer event interrupts but there can be inflight work + * items. Destroying the wq will implicitly flush those work items. + */ + destroy_workqueue(kctx->csf.wq); + + /* Wait for the firmware error work item to also finish as it could + * be affecting this outgoing context also. + */ + flush_work(&kctx->kbdev->csf.fw_error_work); + + /* A work item to handle page_fault/bus_fault/gpu_fault could be + * pending for the outgoing context. Flush the workqueue that will + * execute that work item. + */ + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + if (kctx->as_nr != KBASEP_AS_NR_INVALID) + as = &kctx->kbdev->as[kctx->as_nr]; + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + if (as) + flush_workqueue(as->pf_wq); + + mutex_lock(&kctx->csf.lock); + + for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) { + kfree(kctx->csf.queue_groups[i]); + kctx->csf.queue_groups[i] = NULL; + } + + /* Iterate through the queues that were not terminated by + * userspace and do the required cleanup for them. + */ + while (!list_empty(&kctx->csf.queue_list)) { + struct kbase_queue *queue; + + queue = list_first_entry(&kctx->csf.queue_list, + struct kbase_queue, link); + + /* The reference held when the IO mapping was created on bind + * would have been dropped otherwise the termination of Kbase + * context itself wouldn't have kicked-in. So there shall be + * only one reference left that was taken when queue was + * registered. + */ + if (atomic_read(&queue->refcount) != 1) + dev_warn(kctx->kbdev->dev, + "Releasing queue with incorrect refcounting!\n"); + list_del_init(&queue->link); + release_queue(queue); + } + + mutex_unlock(&kctx->csf.lock); + + kbase_csf_tiler_heap_context_term(kctx); + kbase_csf_kcpu_queue_context_term(kctx); + kbase_csf_scheduler_context_term(kctx); + + mutex_destroy(&kctx->csf.lock); +} + +int kbase_csf_event_wait_add(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param) +{ + int err = -ENOMEM; + struct kbase_csf_event *event = + kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL); + + if (event) { + unsigned long flags; + + event->kctx = kctx; + event->callback = callback; + event->param = param; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + list_add_tail(&event->link, &kctx->csf.event_callback_list); + dev_dbg(kctx->kbdev->dev, + "Added event handler %pK with param %pK\n", event, + event->param); + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); + + err = 0; + } + + return err; +} + +void kbase_csf_event_wait_remove(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param) +{ + struct kbase_csf_event *event; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + + list_for_each_entry(event, &kctx->csf.event_callback_list, link) { + if ((event->callback == callback) && (event->param == param)) { + list_del(&event->link); + dev_dbg(kctx->kbdev->dev, + "Removed event handler %pK with param %pK\n", + event, event->param); + kfree(event); + break; + } + } + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); +} + +bool kbase_csf_read_error(struct kbase_context *kctx, + struct base_csf_notification *event_data) +{ + bool got_event = true; + struct kbase_csf_notification *error_data = NULL; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + + if (likely(!list_empty(&kctx->csf.error_list))) { + error_data = list_first_entry(&kctx->csf.error_list, + struct kbase_csf_notification, link); + list_del_init(&error_data->link); + *event_data = error_data->data; + dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n", + (void *)error_data, (void *)kctx); + } else { + got_event = false; + } + + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); + + return got_event; +} + +bool kbase_csf_error_pending(struct kbase_context *kctx) +{ + bool event_pended = false; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + event_pended = !list_empty(&kctx->csf.error_list); + dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n", + event_pended ? "An" : "No", (void *)kctx); + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); + + return event_pended; +} + +void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu) +{ + struct kbase_csf_event *event, *next_event; + unsigned long flags; + + dev_dbg(kctx->kbdev->dev, + "Signal event (%s GPU notify) for context %pK\n", + notify_gpu ? "with" : "without", (void *)kctx); + + /* First increment the signal count and wake up event thread. + */ + atomic_set(&kctx->event_count, 1); + kbase_event_wakeup(kctx); + + /* Signal the CSF firmware. This is to ensure that pending command + * stream synch object wait operations are re-evaluated. + * Write to GLB_DOORBELL would suffice as spec says that all pending + * synch object wait operations are re-evaluated on a write to any + * CS_DOORBELL/GLB_DOORBELL register. + */ + if (notify_gpu) { + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + if (kctx->kbdev->pm.backend.gpu_powered) + kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR); + KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + } + + /* Now invoke the callbacks registered on backend side. + * Allow item removal inside the loop, if requested by the callback. + */ + spin_lock_irqsave(&kctx->csf.event_lock, flags); + + list_for_each_entry_safe( + event, next_event, &kctx->csf.event_callback_list, link) { + enum kbase_csf_event_callback_action action; + + dev_dbg(kctx->kbdev->dev, + "Calling event handler %pK with param %pK\n", + (void *)event, event->param); + action = event->callback(event->param); + if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) { + list_del(&event->link); + kfree(event); + } + } + + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); +} + +void kbase_csf_event_wait_remove_all(struct kbase_context *kctx) +{ + struct kbase_csf_event *event, *next_event; + unsigned long flags; + + spin_lock_irqsave(&kctx->csf.event_lock, flags); + + list_for_each_entry_safe( + event, next_event, &kctx->csf.event_callback_list, link) { + list_del(&event->link); + dev_dbg(kctx->kbdev->dev, + "Removed event handler %pK with param %pK\n", + (void *)event, event->param); + kfree(event); + } + + spin_unlock_irqrestore(&kctx->csf.event_lock, flags); +} + +/** + * handle_oom_event - Handle the OoM event generated by the firmware for the + * CSI. + * + * This function will handle the OoM event request from the firmware for the + * CS. It will retrieve the address of heap context and heap's + * statistics (like number of render passes in-flight) from the CS's kernel + * kernel output page and pass them to the tiler heap function to allocate a + * new chunk. + * It will also update the CS's kernel input page with the address + * of a new chunk that was allocated. + * + * @kctx: Pointer to the kbase context in which the tiler heap was initialized. + * @stream: Pointer to the structure containing info provided by the firmware + * about the CSI. + * + * Return: 0 if successfully handled the request, otherwise a negative error + * code on failure. + */ +static int handle_oom_event(struct kbase_context *const kctx, + struct kbase_csf_cmd_stream_info const *const stream) +{ + u64 gpu_heap_va = + kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) | + ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32); + const u32 vt_start = + kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START); + const u32 vt_end = + kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END); + const u32 frag_end = + kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END); + u32 renderpasses_in_flight; + u32 pending_frag_count; + u64 new_chunk_ptr; + int err; + + if ((frag_end > vt_end) || (vt_end >= vt_start)) { + dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n", + vt_start, vt_end, frag_end); + return -EINVAL; + } + + renderpasses_in_flight = vt_start - frag_end; + pending_frag_count = vt_end - frag_end; + + err = kbase_csf_tiler_heap_alloc_new_chunk(kctx, + gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr); + + /* It is okay to acknowledge with a NULL chunk (firmware will then wait + * for the fragment jobs to complete and release chunks) + */ + if (err == -EBUSY) + new_chunk_ptr = 0; + else if (err) + return err; + + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO, + new_chunk_ptr & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI, + new_chunk_ptr >> 32); + + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO, + new_chunk_ptr & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI, + new_chunk_ptr >> 32); + + return 0; +} + +/** + * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event + * + * @group: Pointer to the GPU command queue group that encountered the error + */ +static void report_tiler_oom_error(struct kbase_queue_group *group) +{ + struct base_csf_notification const + error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .handle = group->handle, + .error = { + .error_type = + BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, + } } } }; + + add_error(group->kctx, &group->error_tiler_oom, &error); + kbase_event_wakeup(group->kctx); +} + +/** + * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue. + * + * @queue: Pointer to queue for which out-of-memory event was received. + * + * Called with the CSF locked for the affected GPU virtual address space. + * Do not call in interrupt context. + * + * Handles tiler out-of-memory for a GPU command queue and then clears the + * notification to allow the firmware to report out-of-memory again in future. + * If the out-of-memory condition was successfully handled then this function + * rings the relevant doorbell to notify the firmware; otherwise, it terminates + * the GPU command queue group to which the queue is bound. See + * term_queue_group() for details. + */ +static void kbase_queue_oom_event(struct kbase_queue *const queue) +{ + struct kbase_context *const kctx = queue->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_queue_group *group; + int slot_num, err; + struct kbase_csf_cmd_stream_group_info const *ginfo; + struct kbase_csf_cmd_stream_info const *stream; + int csi_index = queue->csi_index; + u32 cs_oom_ack, cs_oom_req; + + lockdep_assert_held(&kctx->csf.lock); + + group = get_bound_queue_group(queue); + if (!group) { + dev_warn(kctx->kbdev->dev, "queue not bound\n"); + return; + } + + kbase_csf_scheduler_lock(kbdev); + + slot_num = kbase_csf_scheduler_group_get_slot(group); + + /* The group could have gone off slot before this work item got + * a chance to execute. + */ + if (slot_num < 0) + goto unlock; + + /* If the bound group is on slot yet the kctx is marked with disabled + * on address-space fault, the group is pending to be killed. So skip + * the inflight oom operation. + */ + if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) + goto unlock; + + ginfo = &kbdev->csf.global_iface.groups[slot_num]; + stream = &ginfo->streams[csi_index]; + cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) & + CS_ACK_TILER_OOM_MASK; + cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) & + CS_REQ_TILER_OOM_MASK; + + /* The group could have already undergone suspend-resume cycle before + * this work item got a chance to execute. On CSG resume the CS_ACK + * register is set by firmware to reflect the CS_REQ register, which + * implies that all events signaled before suspension are implicitly + * acknowledged. + * A new OoM event is expected to be generated after resume. + */ + if (cs_oom_ack == cs_oom_req) + goto unlock; + + err = handle_oom_event(kctx, stream); + + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack, + CS_REQ_TILER_OOM_MASK); + + if (err) { + dev_warn( + kbdev->dev, + "Queue group to be terminated, couldn't handle the OoM event\n"); + kbase_csf_scheduler_unlock(kbdev); + term_queue_group(group); + report_tiler_oom_error(group); + return; + } + + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); +unlock: + kbase_csf_scheduler_unlock(kbdev); +} + +/** + * oom_event_worker - Tiler out-of-memory handler called from a workqueue. + * + * @data: Pointer to a work_struct embedded in GPU command queue data. + * + * Handles a tiler out-of-memory condition for a GPU command queue and then + * releases a reference that was added to prevent the queue being destroyed + * while this work item was pending on a workqueue. + */ +static void oom_event_worker(struct work_struct *data) +{ + struct kbase_queue *queue = + container_of(data, struct kbase_queue, oom_event_work); + struct kbase_context *kctx = queue->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + + int err = kbase_reset_gpu_try_prevent(kbdev); + /* Regardless of whether reset failed or is currently happening, exit + * early + */ + if (err) + return; + + mutex_lock(&kctx->csf.lock); + + kbase_queue_oom_event(queue); + release_queue(queue); + + mutex_unlock(&kctx->csf.lock); + kbase_reset_gpu_allow(kbdev); +} + +/** + * report_group_timeout_error - Report the timeout error for the group to userspace. + * + * @group: Pointer to the group for which timeout error occurred + */ +static void report_group_timeout_error(struct kbase_queue_group *const group) +{ + struct base_csf_notification const + error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .handle = group->handle, + .error = { + .error_type = + BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, + } } } }; + + dev_warn(group->kctx->kbdev->dev, + "Notify the event notification thread, forward progress timeout (%llu cycles)\n", + kbase_csf_timeout_get(group->kctx->kbdev)); + + add_error(group->kctx, &group->error_timeout, &error); + kbase_event_wakeup(group->kctx); +} + +/** + * timer_event_worker - Handle the progress timeout error for the group + * + * @data: Pointer to a work_struct embedded in GPU command queue group data. + * + * Terminate the CSG and report the error to userspace + */ +static void timer_event_worker(struct work_struct *data) +{ + struct kbase_queue_group *const group = + container_of(data, struct kbase_queue_group, timer_event_work); + struct kbase_context *const kctx = group->kctx; + bool reset_prevented = false; + int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev); + + if (err) + dev_warn( + kctx->kbdev->dev, + "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless", + group->handle); + else + reset_prevented = true; + + mutex_lock(&kctx->csf.lock); + + term_queue_group(group); + report_group_timeout_error(group); + + mutex_unlock(&kctx->csf.lock); + if (reset_prevented) + kbase_reset_gpu_allow(kctx->kbdev); +} + +/** + * handle_progress_timer_event - Progress timer timeout event handler. + * + * @group: Pointer to GPU queue group for which the timeout event is received. + * + * Enqueue a work item to terminate the group and notify the event notification + * thread of progress timeout fault for the GPU command queue group. + */ +static void handle_progress_timer_event(struct kbase_queue_group *const group) +{ + queue_work(group->kctx->csf.wq, &group->timer_event_work); +} + +/** + * protm_event_worker - Protected mode switch request event handler + * called from a workqueue. + * + * @data: Pointer to a work_struct embedded in GPU command queue group data. + * + * Request to switch to protected mode. + */ +static void protm_event_worker(struct work_struct *data) +{ + struct kbase_queue_group *const group = + container_of(data, struct kbase_queue_group, protm_event_work); + + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN, + group, 0u); + kbase_csf_scheduler_group_protm_enter(group); + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, + group, 0u); +} + +static void report_queue_fatal_error(struct kbase_queue *const queue, + u32 cs_fatal, u64 cs_fatal_info, + u8 group_handle) +{ + struct base_csf_notification error = + { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .handle = group_handle, + .error = { + .error_type = + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + .payload = { + .fatal_queue = { + .sideband = + cs_fatal_info, + .status = cs_fatal, + .csi_index = + queue->csi_index, + } } } } } }; + + add_error(queue->kctx, &queue->error, &error); + kbase_event_wakeup(queue->kctx); +} + +/** + * handle_fault_event - Handler for CS fault. + * + * @queue: Pointer to queue for which fault event was received. + * @stream: Pointer to the structure containing info provided by the + * firmware about the CSI. + * + * Prints meaningful CS fault information. + * + */ +static void +handle_fault_event(struct kbase_queue *const queue, + struct kbase_csf_cmd_stream_info const *const stream) +{ + const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT); + const u64 cs_fault_info = + kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) | + ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI) + << 32); + const u8 cs_fault_exception_type = + CS_FAULT_EXCEPTION_TYPE_GET(cs_fault); + const u32 cs_fault_exception_data = + CS_FAULT_EXCEPTION_DATA_GET(cs_fault); + const u64 cs_fault_info_exception_data = + CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info); + struct kbase_device *const kbdev = queue->kctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + dev_warn(kbdev->dev, + "Ctx %d_%d Group %d CSG %d CSI: %d\n" + "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" + "CS_FAULT.EXCEPTION_DATA: 0x%x\n" + "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", + queue->kctx->tgid, queue->kctx->id, queue->group->handle, + queue->group->csg_nr, queue->csi_index, + cs_fault_exception_type, + kbase_gpu_exception_name(cs_fault_exception_type), + cs_fault_exception_data, cs_fault_info_exception_data); + + if (cs_fault_exception_type == + CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT) + report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2, + 0, queue->group->handle); +} + +/** + * fatal_event_worker - Handle the fatal error for the GPU queue + * + * @data: Pointer to a work_struct embedded in GPU command queue. + * + * Terminate the CSG and report the error to userspace. + */ +static void fatal_event_worker(struct work_struct *const data) +{ + struct kbase_queue *const queue = + container_of(data, struct kbase_queue, fatal_event_work); + struct kbase_context *const kctx = queue->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_queue_group *group; + u8 group_handle; + bool reset_prevented = false; + int err = kbase_reset_gpu_prevent_and_wait(kbdev); + + if (err) + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless"); + else + reset_prevented = true; + + mutex_lock(&kctx->csf.lock); + + group = get_bound_queue_group(queue); + if (!group) { + dev_warn(kbdev->dev, "queue not bound when handling fatal event"); + goto unlock; + } + + group_handle = group->handle; + term_queue_group(group); + report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info, + group_handle); + +unlock: + release_queue(queue); + mutex_unlock(&kctx->csf.lock); + if (reset_prevented) + kbase_reset_gpu_allow(kbdev); +} + +/** + * handle_fatal_event - Handler for CS fatal. + * + * @queue: Pointer to queue for which fatal event was received. + * @stream: Pointer to the structure containing info provided by the + * firmware about the CSI. + * + * Prints meaningful CS fatal information. + * Enqueue a work item to terminate the group and report the fatal error + * to user space. + */ +static void +handle_fatal_event(struct kbase_queue *const queue, + struct kbase_csf_cmd_stream_info const *const stream) +{ + const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL); + const u64 cs_fatal_info = + kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) | + ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI) + << 32); + const u32 cs_fatal_exception_type = + CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal); + const u32 cs_fatal_exception_data = + CS_FATAL_EXCEPTION_DATA_GET(cs_fatal); + const u64 cs_fatal_info_exception_data = + CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info); + struct kbase_device *const kbdev = queue->kctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + dev_warn(kbdev->dev, + "Ctx %d_%d Group %d CSG %d CSI: %d\n" + "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" + "CS_FATAL.EXCEPTION_DATA: 0x%x\n" + "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", + queue->kctx->tgid, queue->kctx->id, queue->group->handle, + queue->group->csg_nr, queue->csi_index, + cs_fatal_exception_type, + kbase_gpu_exception_name(cs_fatal_exception_type), + cs_fatal_exception_data, cs_fatal_info_exception_data); + + if (cs_fatal_exception_type == + CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) { + queue_work(system_wq, &kbdev->csf.fw_error_work); + } else { + get_queue(queue); + queue->cs_fatal = cs_fatal; + queue->cs_fatal_info = cs_fatal_info; + if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work)) + release_queue(queue); + } +} + +/** + * handle_queue_exception_event - Handler for CS fatal/fault exception events. + * + * @queue: Pointer to queue for which fatal/fault event was received. + * @cs_req: Value of the CS_REQ register from the CS's input page. + * @cs_ack: Value of the CS_ACK register from the CS's output page. + */ +static void handle_queue_exception_event(struct kbase_queue *const queue, + const u32 cs_req, const u32 cs_ack) +{ + struct kbase_csf_cmd_stream_group_info const *ginfo; + struct kbase_csf_cmd_stream_info const *stream; + struct kbase_context *const kctx = queue->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_queue_group *group = queue->group; + int csi_index = queue->csi_index; + int slot_num = group->csg_nr; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + ginfo = &kbdev->csf.global_iface.groups[slot_num]; + stream = &ginfo->streams[csi_index]; + + if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) { + handle_fatal_event(queue, stream); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FATAL_MASK); + } + + if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) { + handle_fault_event(queue, stream); + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, + CS_REQ_FAULT_MASK); + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true); + } +} + +/** + * process_cs_interrupts - Process interrupts for a CS. + * + * @group: Pointer to GPU command queue group data. + * @ginfo: The CSG interface provided by the firmware. + * @irqreq: CSG's IRQ request bitmask (one bit per CS). + * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS). + * + * If the interrupt request bitmask differs from the acknowledge bitmask + * then the firmware is notifying the host of an event concerning those + * CSs indicated by bits whose value differs. The actions required + * are then determined by examining which notification flags differ between + * the request and acknowledge registers for the individual CS(s). + */ +static void process_cs_interrupts(struct kbase_queue_group *const group, + struct kbase_csf_cmd_stream_group_info const *const ginfo, + u32 const irqreq, u32 const irqack) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + u32 remaining = irqreq ^ irqack; + bool protm_pend = false; + const bool group_suspending = + !kbase_csf_scheduler_group_events_enabled(kbdev, group); + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + while (remaining != 0) { + int const i = ffs(remaining) - 1; + struct kbase_queue *const queue = group->bound_queues[i]; + + remaining &= ~(1 << i); + + /* The queue pointer can be NULL, but if it isn't NULL then it + * cannot disappear since scheduler spinlock is held and before + * freeing a bound queue it has to be first unbound which + * requires scheduler spinlock. + */ + if (queue && !WARN_ON(queue->csi_index != i)) { + struct kbase_csf_cmd_stream_info const *const stream = + &ginfo->streams[i]; + u32 const cs_req = kbase_csf_firmware_cs_input_read( + stream, CS_REQ); + u32 const cs_ack = + kbase_csf_firmware_cs_output(stream, CS_ACK); + struct workqueue_struct *wq = group->kctx->csf.wq; + + if ((cs_req & CS_REQ_EXCEPTION_MASK) ^ + (cs_ack & CS_ACK_EXCEPTION_MASK)) { + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack); + handle_queue_exception_event(queue, cs_req, cs_ack); + } + + /* PROTM_PEND and TILER_OOM can be safely ignored + * because they will be raised again if the group + * is assigned a CSG slot in future. + */ + if (group_suspending) { + u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK; + u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK; + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND, + group, queue, cs_req_remain ^ cs_ack_remain); + continue; + } + + if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ + (cs_ack & CS_ACK_TILER_OOM_MASK))) { + get_queue(queue); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue, + cs_req ^ cs_ack); + if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) { + /* The work item shall not have been + * already queued, there can be only + * one pending OoM event for a + * queue. + */ + release_queue(queue); + } + } + + if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^ + (cs_ack & CS_ACK_PROTM_PEND_MASK)) { + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue, + cs_req ^ cs_ack); + + dev_dbg(kbdev->dev, + "Protected mode entry request for queue on csi %d bound to group-%d on slot %d", + queue->csi_index, group->handle, + group->csg_nr); + + bitmap_set(group->protm_pending_bitmap, i, 1); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue, + group->protm_pending_bitmap[0]); + protm_pend = true; + } + } + } + + if (protm_pend) + queue_work(group->kctx->csf.wq, &group->protm_event_work); +} + +/** + * process_csg_interrupts - Process interrupts for a CSG. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @csg_nr: CSG number. + * + * Handles interrupts for a CSG and for CSs within it. + * + * If the CSG's request register value differs from its acknowledge register + * then the firmware is notifying the host of an event concerning the whole + * group. The actions required are then determined by examining which + * notification flags differ between those two register values. + * + * See process_cs_interrupts() for details of per-stream interrupt handling. + */ +static void process_csg_interrupts(struct kbase_device *const kbdev, + int const csg_nr) +{ + struct kbase_csf_cmd_stream_group_info *ginfo; + struct kbase_queue_group *group = NULL; + u32 req, ack, irqreq, irqack; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num)) + return; + + KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr); + + ginfo = &kbdev->csf.global_iface.groups[csg_nr]; + req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); + ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ); + irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK); + + /* There may not be any pending CSG/CS interrupts to process */ + if ((req == ack) && (irqreq == irqack)) + goto out; + + /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before + * examining the CS_ACK & CS_REQ bits. This would ensure that Host + * doesn't misses an interrupt for the CS in the race scenario where + * whilst Host is servicing an interrupt for the CS, firmware sends + * another interrupt for that CS. + */ + kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq); + + group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr); + + /* The group pointer can be NULL here if interrupts for the group + * (like SYNC_UPDATE, IDLE notification) were delayed and arrived + * just after the suspension of group completed. However if not NULL + * then the group pointer cannot disappear even if User tries to + * terminate the group whilst this loop is running as scheduler + * spinlock is held and for freeing a group that is resident on a CSG + * slot scheduler spinlock is required. + */ + if (!group) + goto out; + + if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr)) + goto out; + + if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { + kbase_csf_firmware_csg_input_mask(ginfo, + CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack); + kbase_csf_event_signal_cpu_only(group->kctx); + } + + if ((req ^ ack) & CSG_REQ_IDLE_MASK) { + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, + CSG_REQ_IDLE_MASK); + + set_bit(csg_nr, scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group, + scheduler->csg_slots_idle_mask[0]); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack); + dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n", + group->handle, csg_nr); + + /* Check if the scheduling tick can be advanced */ + if (kbase_csf_scheduler_all_csgs_idle(kbdev) && + !scheduler->gpu_idle_fw_timer_enabled) { + kbase_csf_scheduler_advance_tick_nolock(kbdev); + } + } + + if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) { + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, + CSG_REQ_PROGRESS_TIMER_EVENT_MASK); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT, + group, req ^ ack); + dev_info(kbdev->dev, + "Timeout notification received for group %u of ctx %d_%d on slot %d\n", + group->handle, group->kctx->tgid, group->kctx->id, csg_nr); + + handle_progress_timer_event(group); + } + + process_cs_interrupts(group, ginfo, irqreq, irqack); + +out: + /* group may still be NULL here */ + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group, + ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32)); +} + +/** + * process_prfcnt_interrupts - Process performance counter interrupts. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @glb_req: Global request register value. + * @glb_ack: Global acknowledge register value. + * + * Handles interrupts issued by the firmware that relate to the performance + * counters. For example, on completion of a performance counter sample. It is + * expected that the scheduler spinlock is already held on calling this + * function. + */ +static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, + u32 glb_ack) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + + lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); + + /* Process PRFCNT_SAMPLE interrupt. */ + if (kbdev->csf.hwcnt.request_pending && + ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) == + (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) { + kbdev->csf.hwcnt.request_pending = false; + + dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received."); + + kbase_hwcnt_backend_csf_on_prfcnt_sample( + &kbdev->hwcnt_gpu_iface); + } + + /* Process PRFCNT_ENABLE interrupt. */ + if (kbdev->csf.hwcnt.enable_pending && + ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) == + (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) { + kbdev->csf.hwcnt.enable_pending = false; + + dev_dbg(kbdev->dev, + "PRFCNT_ENABLE status changed interrupt received."); + + if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK) + kbase_hwcnt_backend_csf_on_prfcnt_enable( + &kbdev->hwcnt_gpu_iface); + else + kbase_hwcnt_backend_csf_on_prfcnt_disable( + &kbdev->hwcnt_gpu_iface); + } + + /* Process PRFCNT_THRESHOLD interrupt. */ + if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) { + dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received."); + + kbase_hwcnt_backend_csf_on_prfcnt_threshold( + &kbdev->hwcnt_gpu_iface); + + /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to + * the same value as GLB_ACK.PRFCNT_THRESHOLD + * flag in order to enable reporting of another + * PRFCNT_THRESHOLD event. + */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, glb_ack, + GLB_REQ_PRFCNT_THRESHOLD_MASK); + } + + /* Process PRFCNT_OVERFLOW interrupt. */ + if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) { + dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received."); + + kbase_hwcnt_backend_csf_on_prfcnt_overflow( + &kbdev->hwcnt_gpu_iface); + + /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to + * the same value as GLB_ACK.PRFCNT_OVERFLOW + * flag in order to enable reporting of another + * PRFCNT_OVERFLOW event. + */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, glb_ack, + GLB_REQ_PRFCNT_OVERFLOW_MASK); + } +} + +void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) +{ + unsigned long flags; + u32 remaining = val; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); + + if (val & JOB_IRQ_GLOBAL_IF) { + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + kbdev->csf.interrupt_received = true; + remaining &= ~JOB_IRQ_GLOBAL_IF; + + if (!kbdev->csf.firmware_reloaded) + kbase_csf_firmware_reload_completed(kbdev); + else if (global_iface->output) { + u32 glb_req, glb_ack; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + glb_req = kbase_csf_firmware_global_input_read( + global_iface, GLB_REQ); + glb_ack = kbase_csf_firmware_global_output( + global_iface, GLB_ACK); + KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack); + + if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) { + dev_dbg(kbdev->dev, "Protected mode exit interrupt received"); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, glb_ack, + GLB_REQ_PROTM_EXIT_MASK); + WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev)); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u); + scheduler->active_protm_grp = NULL; + kbdev->protected_mode = false; + kbase_ipa_control_protm_exited(kbdev); + kbase_hwcnt_backend_csf_protm_exited( + &kbdev->hwcnt_gpu_iface); + } + + /* Handle IDLE Hysteresis notification event */ + if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) { + int non_idle_offslot_grps; + bool can_suspend_on_idle; + dev_dbg(kbdev->dev, "Idle-hysteresis event flagged"); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_REQ, glb_ack, + GLB_REQ_IDLE_EVENT_MASK); + + non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps); + can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL, + ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32)); + + if (!non_idle_offslot_grps) { + if (can_suspend_on_idle) + queue_work(system_highpri_wq, + &scheduler->gpu_idle_work); + } else { + /* Advance the scheduling tick to get + * the non-idle suspended groups loaded + * soon. + */ + kbase_csf_scheduler_advance_tick_nolock( + kbdev); + } + } + + process_prfcnt_interrupts(kbdev, glb_req, glb_ack); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + /* Invoke the MCU state machine as a state transition + * might have completed. + */ + kbase_pm_update_state(kbdev); + } + + if (!remaining) { + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); + return; + } + } + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + while (remaining != 0) { + int const csg_nr = ffs(remaining) - 1; + + process_csg_interrupts(kbdev, csg_nr); + remaining &= ~(1 << csg_nr); + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); +} + +void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) +{ + if (kbdev->csf.db_filp) { + struct page *page = as_page(kbdev->csf.dummy_db_page); + + kbase_mem_pool_free( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + page, false); + + fput(kbdev->csf.db_filp); + } +} + +int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) +{ + struct tagged_addr phys; + struct file *filp; + int ret; + + filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + ret = kbase_mem_pool_alloc_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + 1, &phys, false); + + if (ret <= 0) { + fput(filp); + return ret; + } + + kbdev->csf.db_filp = filp; + kbdev->csf.dummy_db_page = phys; + kbdev->csf.db_file_offsets = 0; + + return 0; +} + +void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) +{ + if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) { + struct page *page = as_page(kbdev->csf.dummy_user_reg_page); + + kbase_mem_pool_free( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, + false); + } +} + +int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) +{ + struct tagged_addr phys; + struct page *page; + u32 *addr; + int ret; + + kbdev->csf.dummy_user_reg_page = as_tagged(0); + + ret = kbase_mem_pool_alloc_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys, + false); + + if (ret <= 0) + return ret; + + page = as_page(phys); + addr = kmap_atomic(page); + + /* Write a special value for the latest flush register inside the + * dummy page + */ + addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE; + + kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32), + DMA_BIDIRECTIONAL); + kunmap_atomic(addr); + + kbdev->csf.dummy_user_reg_page = phys; + + return 0; +} + +u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority) +{ + struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; + u8 out_priority = req_priority; + + if (pcm_device) { + req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority); + out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); + out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority); + } + + return out_priority; +} + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h new file mode 100644 index 0000000..e3bd436 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.h @@ -0,0 +1,564 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_H_ +#define _KBASE_CSF_H_ + +#include "mali_kbase_csf_kcpu.h" +#include "mali_kbase_csf_scheduler.h" +#include "mali_kbase_csf_firmware.h" +#include "mali_kbase_csf_protected_memory.h" + +/* Indicate invalid CS h/w interface + */ +#define KBASEP_IF_NR_INVALID ((s8)-1) + +/* Indicate invalid CSG number for a GPU command queue group + */ +#define KBASEP_CSG_NR_INVALID ((s8)-1) + +/* Indicate invalid user doorbell number for a GPU command queue + */ +#define KBASEP_USER_DB_NR_INVALID ((s8)-1) + +#define FIRMWARE_PING_INTERVAL_MS (4000) /* 4 seconds */ + +#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */ + +/** + * enum kbase_csf_event_callback_action - return type for CSF event callbacks. + * + * @KBASE_CSF_EVENT_CALLBACK_FIRST: Never set explicitly. + * It doesn't correspond to any action or type of event callback. + * + * @KBASE_CSF_EVENT_CALLBACK_KEEP: The callback will remain registered. + * + * @KBASE_CSF_EVENT_CALLBACK_REMOVE: The callback will be removed + * immediately upon return. + * + * @KBASE_CSF_EVENT_CALLBACK_LAST: Never set explicitly. + * It doesn't correspond to any action or type of event callback. + */ +enum kbase_csf_event_callback_action { + KBASE_CSF_EVENT_CALLBACK_FIRST = 0, + KBASE_CSF_EVENT_CALLBACK_KEEP, + KBASE_CSF_EVENT_CALLBACK_REMOVE, + KBASE_CSF_EVENT_CALLBACK_LAST, +}; + +/** + * kbase_csf_event_callback_action - type for callback functions to be + * called upon CSF events. + * + * This is the type of callback functions that can be registered + * for CSF events. These function calls shall be triggered by any call + * to kbase_csf_event_signal. + * + * @param: Generic parameter to pass to the callback function. + * + * Return: KBASE_CSF_EVENT_CALLBACK_KEEP if the callback should remain + * registered, or KBASE_CSF_EVENT_CALLBACK_REMOVE if it should be removed. + */ +typedef enum kbase_csf_event_callback_action kbase_csf_event_callback(void *param); + +/** + * kbase_csf_event_wait_add - Add a CSF event callback + * + * This function adds an event callback to the list of CSF event callbacks + * belonging to a given Kbase context, to be triggered when a CSF event is + * signalled by kbase_csf_event_signal. + * + * @kctx: The Kbase context the @callback should be registered to. + * @callback: The callback function to register. + * @param: Custom parameter to be passed to the @callback function. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_event_wait_add(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param); + +/** + * kbase_csf_event_wait_remove - Remove a CSF event callback + * + * This function removes an event callback from the list of CSF event callbacks + * belonging to a given Kbase context. + * + * @kctx: The kbase context the @callback should be removed from. + * @callback: The callback function to remove. + * @param: Custom parameter that would have been passed to the @p callback + * function. + */ +void kbase_csf_event_wait_remove(struct kbase_context *kctx, + kbase_csf_event_callback *callback, void *param); + +/** + * kbase_csf_event_wait_remove_all - Removes all CSF event callbacks + * + * This function empties the list of CSF event callbacks belonging to a given + * Kbase context. + * + * @kctx: The kbase context for which CSF event callbacks have to be removed. + */ +void kbase_csf_event_wait_remove_all(struct kbase_context *kctx); + +/** + * kbase_csf_read_error - Read CS fatal error + * + * This function takes the CS fatal error from context's ordered + * error_list, copies its contents to @event_data. + * + * @kctx: The kbase context to read fatal error from + * @event_data: Caller-provided buffer to copy the fatal error to + * + * Return: true if fatal error is read successfully. + */ +bool kbase_csf_read_error(struct kbase_context *kctx, + struct base_csf_notification *event_data); + +/** + * kbase_csf_error_pending - Check whether fatal error is pending + * + * @kctx: The kbase context to check fatal error upon. + * + * Return: true if fatal error is pending. + */ +bool kbase_csf_error_pending(struct kbase_context *kctx); + +/** + * kbase_csf_event_signal - Signal a CSF event + * + * This function triggers all the CSF event callbacks that are registered to + * a given Kbase context, and also signals the event handling thread of + * userspace driver waiting for the CSF event. + * + * @kctx: The kbase context whose CSF event callbacks shall be triggered. + * @notify_gpu: Flag to indicate if CSF firmware should be notified of the + * signaling of event that happened on the Driver side, either + * the signal came from userspace or from kcpu queues. + */ +void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu); + +static inline void kbase_csf_event_signal_notify_gpu(struct kbase_context *kctx) +{ + kbase_csf_event_signal(kctx, true); +} + +static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx) +{ + kbase_csf_event_signal(kctx, false); +} + +/** + * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space. + * + * @kctx: Pointer to the kbase context which is being initialized. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_csf_ctx_init(struct kbase_context *kctx); + +/** + * kbase_csf_ctx_handle_fault - Terminate queue groups & notify fault upon + * GPU bus fault, MMU page fault or similar. + * + * This function terminates all GPU command queue groups in the context and + * notifies the event notification thread of the fault. + * + * @kctx: Pointer to faulty kbase context. + * @fault: Pointer to the fault. + */ +void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, + struct kbase_fault *fault); + +/** + * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. + * + * This function terminates any remaining CSGs and CSs which weren't destroyed + * before context termination. + * + * @kctx: Pointer to the kbase context which is being terminated. + */ +void kbase_csf_ctx_term(struct kbase_context *kctx); + +/** + * kbase_csf_queue_register - Register a GPU command queue. + * + * @kctx: Pointer to the kbase context within which the + * queue is to be registered. + * @reg: Pointer to the structure which contains details of the + * queue to be registered within the provided + * context. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_register(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register *reg); + +/** + * kbase_csf_queue_register_ex - Register a GPU command queue with + * extended format. + * + * @kctx: Pointer to the kbase context within which the + * queue is to be registered. + * @reg: Pointer to the structure which contains details of the + * queue to be registered within the provided + * context, together with the extended parameter fields + * for supporting cs trace command. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_register_ex(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register_ex *reg); + +/** + * kbase_csf_queue_terminate - Terminate a GPU command queue. + * + * @kctx: Pointer to the kbase context within which the + * queue is to be terminated. + * @term: Pointer to the structure which identifies which + * queue is to be terminated. + */ +void kbase_csf_queue_terminate(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_terminate *term); + +/** + * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a + * GPU command queue. + * + * This function allocates a pair of User mode input/output pages for a + * GPU command queue and maps them in the shared interface segment of MCU + * firmware address space. Also reserves a hardware doorbell page for the queue. + * + * @kctx: Pointer to the kbase context within which the resources + * for the queue are being allocated. + * @queue: Pointer to the queue for which to allocate resources. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, + struct kbase_queue *queue); + +/** + * kbase_csf_queue_bind - Bind a GPU command queue to a queue group. + * + * @kctx: The kbase context. + * @bind: Pointer to the union which specifies a queue group and a + * queue to be bound to that group. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_bind(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_bind *bind); + +/** + * kbase_csf_queue_unbind - Unbind a GPU command queue from a queue group + * to which it has been bound and free + * resources allocated for this queue if there + * are any. + * + * @queue: Pointer to queue to be unbound. + */ +void kbase_csf_queue_unbind(struct kbase_queue *queue); + +/** + * kbase_csf_queue_unbind_stopped - Unbind a GPU command queue in the case + * where it was never started. + * @queue: Pointer to queue to be unbound. + * + * Variant of kbase_csf_queue_unbind() for use on error paths for cleaning up + * queues that failed to fully bind. + */ +void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue); + +/** + * kbase_csf_queue_kick - Schedule a GPU command queue on the firmware + * + * @kctx: The kbase context. + * @kick: Pointer to the struct which specifies the queue + * that needs to be scheduled. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_kick(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_kick *kick); + +/** + * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle + * is valid. + * + * This function is used to determine if the queue group handle is valid. + * + * @kctx: The kbase context under which the queue group exists. + * @group_handle: Handle for the group which uniquely identifies it within + * the context with which it was created. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, + u8 group_handle); + +/** + * kbase_csf_queue_group_create - Create a GPU command queue group. + * + * @kctx: Pointer to the kbase context within which the + * queue group is to be created. + * @create: Pointer to the structure which contains details of the + * queue group which is to be created within the + * provided kbase context. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_queue_group_create(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create *create); + +/** + * kbase_csf_queue_group_terminate - Terminate a GPU command queue group. + * + * @kctx: Pointer to the kbase context within which the + * queue group is to be terminated. + * @group_handle: Pointer to the structure which identifies the queue + * group which is to be terminated. + */ +void kbase_csf_queue_group_terminate(struct kbase_context *kctx, + u8 group_handle); + +/** + * kbase_csf_term_descheduled_queue_group - Terminate a GPU command queue + * group that is not operational + * inside the scheduler. + * + * @group: Pointer to the structure which identifies the queue + * group to be terminated. The function assumes that the caller + * is sure that the given group is not operational inside the + * scheduler. If in doubt, use its alternative: + * @ref kbase_csf_queue_group_terminate(). + */ +void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group); + +/** + * kbase_csf_queue_group_suspend - Suspend a GPU command queue group + * + * This function is used to suspend a queue group and copy the suspend buffer. + * + * @kctx: The kbase context for which the queue group is to be + * suspended. + * @sus_buf: Pointer to the structure which contains details of the + * user buffer and its kernel pinned pages. + * @group_handle: Handle for the group which uniquely identifies it within + * the context within which it was created. + * + * Return: 0 on success or negative value if failed to suspend + * queue group and copy suspend buffer contents. + */ +int kbase_csf_queue_group_suspend(struct kbase_context *kctx, + struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle); + +/** + * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace + * + * @group: GPU command queue group. + * @err_payload: Error payload to report. + */ +void kbase_csf_add_group_fatal_error( + struct kbase_queue_group *const group, + struct base_gpu_queue_group_error const *const err_payload); + +/** + * kbase_csf_interrupt - Handle interrupts issued by CSF firmware. + * + * @kbdev: The kbase device to handle an IRQ for + * @val: The value of JOB IRQ status register which triggered the interrupt + */ +void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); + +/** + * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates + * the update of userspace mapping of HW + * doorbell page. + * + * The function creates a file and allocates a dummy page to facilitate the + * update of userspace mapping to point to the dummy page instead of the real + * HW doorbell page after the suspend of queue group. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev); + +/** + * kbase_csf_doorbell_mapping_term - Free the dummy page & close the file used + * to update the userspace mapping of HW doorbell page + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev); + +/** + * kbase_csf_setup_dummy_user_reg_page - Setup the dummy page that is accessed + * instead of the User register page after + * the GPU power down. + * + * The function allocates a dummy page which is used to replace the User + * register page in the userspace mapping after the power down of GPU. + * On the power up of GPU, the mapping is updated to point to the real + * User register page. The mapping is used to allow access to LATEST_FLUSH + * register from userspace. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); + +/** + * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used + * used to replace the User register page + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); + +/** + * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. + * + * The function kicks a notification on the CSG interface to firmware. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @slot: Index of CSG interface for ringing the door-bell. + */ +void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot); + +/** + * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG + * interfaces. + * + * The function kicks a notification on a set of CSG interfaces to firmware. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc. + */ +void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev, + u32 slot_bitmap); + +/** + * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI + * assigned to a GPU queue + * + * The function sends a doorbell interrupt notification to the firmware for + * a CSI assigned to a GPU queue. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @csi_index: ID of the CSI assigned to the GPU queue. + * @csg_nr: Index of the CSG slot assigned to the queue + * group to which the GPU queue is bound. + * @ring_csg_doorbell: Flag to indicate if the CSG doorbell needs to be rung + * after updating the CSG_DB_REQ. So if this flag is false + * the doorbell interrupt will not be sent to FW. + * The flag is supposed be false only when the input page + * for bound GPU queues is programmed at the time of + * starting/resuming the group on a CSG slot. + */ +void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, + int csi_index, int csg_nr, + bool ring_csg_doorbell); + +/** + * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a + * queue. + * + * The function kicks a notification to the firmware on the doorbell assigned + * to the queue. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @queue: Pointer to the queue for ringing the door-bell. + */ +void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev, + struct kbase_queue *queue); + +/** + * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU + * command queue groups associated with the context. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @kctx: The kbase context. + * + * This function will iterate through all the active/scheduled GPU command + * queue groups associated with the context, deschedule and mark them as + * terminated (which will then lead to unbinding of all the queues bound to + * them) and also no more work would be allowed to execute for them. + * + * This is similar to the action taken in response to an unexpected OoM event. + */ +void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_csf_priority_check - Check the priority requested + * + * @kbdev: Device pointer + * @req_priority: Requested priority + * + * This will determine whether the requested priority can be satisfied. + * + * Return: The same or lower priority than requested. + */ +u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority); + +extern const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT]; +extern const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + +/** + * kbase_csf_priority_relative_to_queue_group_priority - Convert relative to base priority + * + * @priority: kbase relative priority + * + * This will convert the monotonically increasing realtive priority to the + * fixed base priority list. + * + * Return: base_queue_group_priority priority. + */ +static inline u8 kbase_csf_priority_relative_to_queue_group_priority(u8 priority) +{ + if (priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT) + priority = KBASE_QUEUE_GROUP_PRIORITY_LOW; + return kbasep_csf_relative_to_queue_group_priority[priority]; +} + +/** + * kbase_csf_priority_queue_group_priority_to_relative - Convert base priority to relative + * + * @priority: base_queue_group_priority priority + * + * This will convert the fixed base priority list to monotonically increasing realtive priority. + * + * Return: kbase relative priority. + */ +static inline u8 kbase_csf_priority_queue_group_priority_to_relative(u8 priority) +{ + /* Apply low priority in case of invalid priority */ + if (priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) + priority = BASE_QUEUE_GROUP_PRIORITY_LOW; + return kbasep_csf_queue_group_priority_to_relative[priority]; +} + + +#endif /* _KBASE_CSF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c new file mode 100644 index 0000000..26637bf --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_cpu_queue_debugfs.h" +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, + struct base_csf_notification *req) +{ + if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status, + BASE_CSF_CPU_QUEUE_DUMP_ISSUED, + BASE_CSF_CPU_QUEUE_DUMP_PENDING) != + BASE_CSF_CPU_QUEUE_DUMP_ISSUED) { + return false; + } + + req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP; + return true; +} + +/** + * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase_context + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data) +{ + struct kbase_context *kctx = file->private; + + mutex_lock(&kctx->csf.lock); + if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) != + BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) { + seq_printf(file, "Dump request already started! (try again)\n"); + mutex_unlock(&kctx->csf.lock); + return -EBUSY; + } + + atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED); + init_completion(&kctx->csf.cpu_queue.dump_cmp); + kbase_event_wakeup(kctx); + mutex_unlock(&kctx->csf.lock); + + seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION); + + wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp, + msecs_to_jiffies(3000)); + + mutex_lock(&kctx->csf.lock); + if (kctx->csf.cpu_queue.buffer) { + WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) != + BASE_CSF_CPU_QUEUE_DUMP_PENDING); + + seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer); + + kfree(kctx->csf.cpu_queue.buffer); + kctx->csf.cpu_queue.buffer = NULL; + kctx->csf.cpu_queue.buffer_size = 0; + } + else + seq_printf(file, "Dump error! (time out)\n"); + + atomic_set(&kctx->csf.cpu_queue.dump_req_status, + BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); + + mutex_unlock(&kctx->csf.lock); + return 0; +} + +static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_csf_cpu_queue_debugfs_show, in->i_private); +} + +static const struct file_operations kbasep_csf_cpu_queue_debugfs_fops = { + .open = kbasep_csf_cpu_queue_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry, + kctx, &kbasep_csf_cpu_queue_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kctx->kbdev->dev, + "Unable to create cpu queue debugfs entry"); + } + + kctx->csf.cpu_queue.buffer = NULL; + kctx->csf.cpu_queue.buffer_size = 0; + atomic_set(&kctx->csf.cpu_queue.dump_req_status, + BASE_CSF_CPU_QUEUE_DUMP_COMPLETE); +} + +int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, + u64 buffer, size_t buf_size) +{ + int err = 0; + + size_t alloc_size = buf_size; + char *dump_buffer; + + if (!buffer || !alloc_size) + goto done; + + alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); + dump_buffer = kzalloc(alloc_size, GFP_KERNEL); + if (ZERO_OR_NULL_PTR(dump_buffer)) { + err = -ENOMEM; + goto done; + } + + WARN_ON(kctx->csf.cpu_queue.buffer != NULL); + + err = copy_from_user(dump_buffer, + u64_to_user_ptr(buffer), + buf_size); + if (err) { + kfree(dump_buffer); + err = -EFAULT; + goto done; + } + + mutex_lock(&kctx->csf.lock); + + kfree(kctx->csf.cpu_queue.buffer); + + if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == + BASE_CSF_CPU_QUEUE_DUMP_PENDING) { + kctx->csf.cpu_queue.buffer = dump_buffer; + kctx->csf.cpu_queue.buffer_size = buf_size; + complete_all(&kctx->csf.cpu_queue.dump_cmp); + } else { + kfree(dump_buffer); + } + + mutex_unlock(&kctx->csf.lock); +done: + return err; +} +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) +{ +} + +bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, + struct base_csf_notification *req) +{ + return false; +} + +int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, + u64 buffer, size_t buf_size) +{ + return 0; +} +#endif /* CONFIG_DEBUG_FS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h new file mode 100644 index 0000000..435a993 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_cpu_queue_debugfs.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ +#define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ + +#include +#include + +#include "mali_kbase.h" + +/* Forward declaration */ +struct base_csf_notification; + +#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0 + +/* CPU queue dump status */ +/* Dumping is done or no dumping is in progress. */ +#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE 0 +/* Dumping request is pending. */ +#define BASE_CSF_CPU_QUEUE_DUMP_PENDING 1 +/* Dumping request is issued to Userspace */ +#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED 2 + + +/** + * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s) + * + * @kctx: The kbase_context for which to create the debugfs entry + */ +void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx); + +/** + * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event + * + * @kctx: The kbase_context which cpu queue dumpped belongs to + * @req: Notification with cpu queue dump request. + * + * Return: true if needs CPU queue dump, or false otherwise. + */ +bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx, + struct base_csf_notification *req); + +/** + * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump + * + * @kctx: The kbase_context which cpu queue dumpped belongs to + * + * Return: true if it needs cpu queue dump, or false otherwise. + */ +static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) == + BASE_CSF_CPU_QUEUE_DUMP_ISSUED); +#else + return false; +#endif +} + +/** + * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs + * + * @kctx: The kbase_context which cpu queue dumpped belongs to + * @buffer: Buffer containing the cpu queue information. + * @buf_size: Buffer size. + * + * Return: Return 0 for dump successfully, or error code. + */ +int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, + u64 buffer, size_t buf_size); +#endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c new file mode 100644 index 0000000..14deb98 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.c @@ -0,0 +1,591 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_csg_debugfs.h" +#include +#include +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) +#include "mali_kbase_csf_tl_reader.h" + +/** + * blocked_reason_to_string() - Convert blocking reason id to a string + * + * @reason_id: blocked_reason + * + * Return: Suitable string + */ +static const char *blocked_reason_to_string(u32 reason_id) +{ + /* possible blocking reasons of a cs */ + static const char *const cs_blocked_reason[] = { + [CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED] = "UNBLOCKED", + [CS_STATUS_BLOCKED_REASON_REASON_WAIT] = "WAIT", + [CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT] = + "PROGRESS_WAIT", + [CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT] = "SYNC_WAIT", + [CS_STATUS_BLOCKED_REASON_REASON_DEFERRED] = "DEFERRED", + [CS_STATUS_BLOCKED_REASON_REASON_RESOURCE] = "RESOURCE", + [CS_STATUS_BLOCKED_REASON_REASON_FLUSH] = "FLUSH" + }; + + if (WARN_ON(reason_id >= ARRAY_SIZE(cs_blocked_reason))) + return "UNKNOWN_BLOCKED_REASON_ID"; + + return cs_blocked_reason[reason_id]; +} + +static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( + struct seq_file *file, u32 wait_status, u32 wait_sync_value, + u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, + u32 blocked_reason) +{ +#define WAITING "Waiting" +#define NOT_WAITING "Not waiting" + + seq_printf(file, "SB_MASK: %d\n", + CS_STATUS_WAIT_SB_MASK_GET(wait_status)); + seq_printf(file, "PROGRESS_WAIT: %s\n", + CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? + WAITING : NOT_WAITING); + seq_printf(file, "PROTM_PEND: %s\n", + CS_STATUS_WAIT_PROTM_PEND_GET(wait_status) ? + WAITING : NOT_WAITING); + seq_printf(file, "SYNC_WAIT: %s\n", + CS_STATUS_WAIT_SYNC_WAIT_GET(wait_status) ? + WAITING : NOT_WAITING); + seq_printf(file, "WAIT_CONDITION: %s\n", + CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(wait_status) ? + "greater than" : "less or equal"); + seq_printf(file, "SYNC_POINTER: 0x%llx\n", wait_sync_pointer); + seq_printf(file, "SYNC_VALUE: %d\n", wait_sync_value); + seq_printf(file, "SYNC_LIVE_VALUE: 0x%016llx\n", wait_sync_live_value); + seq_printf(file, "SB_STATUS: %u\n", + CS_STATUS_SCOREBOARDS_NONZERO_GET(sb_status)); + seq_printf(file, "BLOCKED_REASON: %s\n", + blocked_reason_to_string(CS_STATUS_BLOCKED_REASON_REASON_GET( + blocked_reason))); +} + +static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, + struct kbase_csf_cmd_stream_info const *const stream) +{ + u32 val = kbase_csf_firmware_cs_input_read(stream, + CS_INSTR_BUFFER_BASE_LO); + u64 addr = ((u64)kbase_csf_firmware_cs_input_read(stream, + CS_INSTR_BUFFER_BASE_HI) << 32) | val; + val = kbase_csf_firmware_cs_input_read(stream, + CS_INSTR_BUFFER_SIZE); + + seq_printf(file, "CS_TRACE_BUF_ADDR: 0x%16llx, SIZE: %u\n", addr, val); + + /* Write offset variable address (pointer) */ + val = kbase_csf_firmware_cs_input_read(stream, + CS_INSTR_BUFFER_OFFSET_POINTER_LO); + addr = ((u64)kbase_csf_firmware_cs_input_read(stream, + CS_INSTR_BUFFER_OFFSET_POINTER_HI) << 32) | val; + seq_printf(file, "CS_TRACE_BUF_OFFSET_PTR: 0x%16llx\n", addr); + + /* EVENT_SIZE and EVENT_STATEs */ + val = kbase_csf_firmware_cs_input_read(stream, CS_INSTR_CONFIG); + seq_printf(file, "TRACE_EVENT_SIZE: 0x%x, TRACE_EVENT_STAES 0x%x\n", + CS_INSTR_CONFIG_EVENT_SIZE_GET(val), + CS_INSTR_CONFIG_EVENT_STATE_GET(val)); +} + +/** + * kbasep_csf_scheduler_dump_active_queue() - Print GPU command queue + * debug information + * + * @file: seq_file for printing to + * @queue: Address of a GPU command queue to examine + */ +static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, + struct kbase_queue *queue) +{ + u32 *addr; + u64 cs_extract; + u64 cs_insert; + u32 cs_active; + u64 wait_sync_pointer; + u32 wait_status, wait_sync_value; + u32 sb_status; + u32 blocked_reason; + struct kbase_vmap_struct *mapping; + u64 *evt; + u64 wait_sync_live_value; + + if (!queue) + return; + + if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || + !queue->group)) + return; + + /* Ring the doorbell to have firmware update CS_EXTRACT */ + kbase_csf_ring_cs_user_doorbell(queue->kctx->kbdev, queue); + msleep(100); + + addr = (u32 *)queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); + + addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); + cs_active = addr[CS_ACTIVE/4]; + +#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ + "Bind Idx, Ringbuf addr, Prio, Insert offset, Extract offset, Active, Doorbell\n" + + seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %4u, %16llx, %16llx, %6u, %8d\n", + queue->csi_index, queue->base_addr, queue->priority, + cs_insert, cs_extract, cs_active, queue->doorbell_nr); + + /* Print status information for blocked group waiting for sync object. For on-slot queues, + * if cs_trace is enabled, dump the interface's cs_trace configuration. + */ + if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) { + if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { + wait_status = queue->status_wait; + wait_sync_value = queue->sync_value; + wait_sync_pointer = queue->sync_ptr; + sb_status = queue->sb_status; + blocked_reason = queue->blocked_reason; + + evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); + if (evt) { + wait_sync_live_value = evt[0]; + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } else { + wait_sync_live_value = U64_MAX; + } + + kbasep_csf_scheduler_dump_active_queue_cs_status_wait( + file, wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, + sb_status, blocked_reason); + } + } else { + struct kbase_device const *const kbdev = + queue->group->kctx->kbdev; + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[queue->group->csg_nr]; + struct kbase_csf_cmd_stream_info const *const stream = + &ginfo->streams[queue->csi_index]; + u64 cmd_ptr; + u32 req_res; + + if (WARN_ON(!stream)) + return; + + cmd_ptr = kbase_csf_firmware_cs_output(stream, + CS_STATUS_CMD_PTR_LO); + cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, + CS_STATUS_CMD_PTR_HI) << 32; + req_res = kbase_csf_firmware_cs_output(stream, + CS_STATUS_REQ_RESOURCE); + + seq_printf(file, "CMD_PTR: 0x%llx\n", cmd_ptr); + seq_printf(file, "REQ_RESOURCE [COMPUTE]: %d\n", + CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(req_res)); + seq_printf(file, "REQ_RESOURCE [FRAGMENT]: %d\n", + CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(req_res)); + seq_printf(file, "REQ_RESOURCE [TILER]: %d\n", + CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(req_res)); + seq_printf(file, "REQ_RESOURCE [IDVS]: %d\n", + CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(req_res)); + + wait_status = kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT); + wait_sync_value = kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT_SYNC_VALUE); + wait_sync_pointer = kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT_SYNC_POINTER_LO); + wait_sync_pointer |= (u64)kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; + + sb_status = kbase_csf_firmware_cs_output(stream, + CS_STATUS_SCOREBOARDS); + blocked_reason = kbase_csf_firmware_cs_output( + stream, CS_STATUS_BLOCKED_REASON); + + evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, wait_sync_pointer, &mapping); + if (evt) { + wait_sync_live_value = evt[0]; + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } else { + wait_sync_live_value = U64_MAX; + } + + kbasep_csf_scheduler_dump_active_queue_cs_status_wait( + file, wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, sb_status, + blocked_reason); + /* Dealing with cs_trace */ + if (kbase_csf_scheduler_queue_has_trace(queue)) + kbasep_csf_scheduler_dump_active_cs_trace(file, stream); + else + seq_puts(file, "NO CS_TRACE\n"); + } + + seq_puts(file, "\n"); +} + +/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */ +#define CSF_STATUS_UPDATE_TO_MS (100) + +static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, + struct kbase_queue_group *const group) +{ + if (kbase_csf_scheduler_group_get_slot(group) >= 0) { + struct kbase_device *const kbdev = group->kctx->kbdev; + unsigned long flags; + u32 ep_c, ep_r; + char exclusive; + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[group->csg_nr]; + long remaining = + kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS); + u8 slot_priority = + kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, + ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), + CSG_REQ_STATUS_UPDATE_MASK); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ + kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & + CSG_REQ_STATUS_UPDATE_MASK), remaining); + + ep_c = kbase_csf_firmware_csg_output(ginfo, + CSG_STATUS_EP_CURRENT); + ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); + + if (CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(ep_r)) + exclusive = 'C'; + else if (CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(ep_r)) + exclusive = 'F'; + else + exclusive = '0'; + + if (!remaining) { + dev_err(kbdev->dev, + "Timed out for STATUS_UPDATE on group %d on slot %d", + group->handle, group->csg_nr); + + seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", + group->csg_nr); + seq_printf(file, "*** The following group-record is likely stale\n"); + } + + seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n"); + seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n", + group->handle, + group->csg_nr, + slot_priority, + group->run_state, + group->priority, + CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), + CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), + CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), + CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), + exclusive); + } else { + seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); + seq_printf(file, "%7d, %6d, %9d, %8d\n", + group->handle, + group->csg_nr, + group->run_state, + group->priority); + } + + if (group->run_state != KBASE_CSF_GROUP_TERMINATED) { + unsigned int i; + + seq_puts(file, "Bound queues:\n"); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { + kbasep_csf_scheduler_dump_active_queue(file, + group->bound_queues[i]); + } + } + + seq_puts(file, "\n"); +} + +/** + * kbasep_csf_queue_group_debugfs_show() - Print per-context GPU command queue + * group debug information + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase context + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, + void *data) +{ + u32 gr; + struct kbase_context *const kctx = file->private; + struct kbase_device *const kbdev = kctx->kbdev; + + if (WARN_ON(!kctx)) + return -EINVAL; + + seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", + MALI_CSF_CSG_DEBUGFS_VERSION); + + mutex_lock(&kctx->csf.lock); + kbase_csf_scheduler_lock(kbdev); + for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { + struct kbase_queue_group *const group = + kctx->csf.queue_groups[gr]; + + if (group) + kbasep_csf_scheduler_dump_active_group(file, group); + } + kbase_csf_scheduler_unlock(kbdev); + mutex_unlock(&kctx->csf.lock); + + return 0; +} + +/** + * kbasep_csf_scheduler_dump_active_groups() - Print debug info for active + * GPU command queue groups + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase_device + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, + void *data) +{ + u32 csg_nr; + struct kbase_device *kbdev = file->private; + u32 num_groups = kbdev->csf.global_iface.group_num; + + seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", + MALI_CSF_CSG_DEBUGFS_VERSION); + + kbase_csf_scheduler_lock(kbdev); + for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + + if (!group) + continue; + + seq_printf(file, "\nCtx %d_%d\n", group->kctx->tgid, + group->kctx->id); + + kbasep_csf_scheduler_dump_active_group(file, group); + } + kbase_csf_scheduler_unlock(kbdev); + + return 0; +} + +static int kbasep_csf_queue_group_debugfs_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbasep_csf_queue_group_debugfs_show, + in->i_private); +} + +static int kbasep_csf_active_queue_groups_debugfs_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbasep_csf_scheduler_dump_active_groups, + in->i_private); +} + +static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { + .open = kbasep_csf_queue_group_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("groups", mode, + kctx->kctx_dentry, kctx, &kbasep_csf_queue_group_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kctx->kbdev->dev, + "Unable to create per context queue groups debugfs entry"); + } +} + +static const struct file_operations + kbasep_csf_active_queue_groups_debugfs_fops = { + .open = kbasep_csf_active_queue_groups_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int kbasep_csf_debugfs_scheduling_timer_enabled_get( + void *data, u64 *val) +{ + struct kbase_device *const kbdev = data; + + *val = kbase_csf_scheduler_timer_is_enabled(kbdev); + + return 0; +} + +static int kbasep_csf_debugfs_scheduling_timer_enabled_set( + void *data, u64 val) +{ + struct kbase_device *const kbdev = data; + + kbase_csf_scheduler_timer_set_enabled(kbdev, val != 0); + + return 0; +} + +static int kbasep_csf_debugfs_scheduling_timer_kick_set( + void *data, u64 val) +{ + struct kbase_device *const kbdev = data; + + kbase_csf_scheduler_kick(kbdev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, + &kbasep_csf_debugfs_scheduling_timer_enabled_get, + &kbasep_csf_debugfs_scheduling_timer_enabled_set, + "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, + NULL, + &kbasep_csf_debugfs_scheduling_timer_kick_set, + "%llu\n"); + +/** + * kbase_csf_debugfs_scheduler_suspend_get() - get if the scheduler is suspended. + * + * @data: The debugfs dentry private data, a pointer to kbase_device + * @val: The debugfs output value, boolean: 1 suspended, 0 otherwise + * + * Return: 0 + */ +static int kbase_csf_debugfs_scheduler_suspend_get( + void *data, u64 *val) +{ + struct kbase_device *kbdev = data; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + kbase_csf_scheduler_lock(kbdev); + *val = (scheduler->state == SCHED_SUSPENDED); + kbase_csf_scheduler_unlock(kbdev); + + return 0; +} + +/** + * kbase_csf_debugfs_scheduler_suspend_set() - set the scheduler to suspended. + * + * @data: The debugfs dentry private data, a pointer to kbase_device + * @val: The debugfs input value, boolean: 1 suspend, 0 otherwise + * + * Return: Negative value if already in requested state, 0 otherwise. + */ +static int kbase_csf_debugfs_scheduler_suspend_set( + void *data, u64 val) +{ + struct kbase_device *kbdev = data; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + enum kbase_csf_scheduler_state state; + + kbase_csf_scheduler_lock(kbdev); + state = scheduler->state; + kbase_csf_scheduler_unlock(kbdev); + + if (val && (state != SCHED_SUSPENDED)) + kbase_csf_scheduler_pm_suspend(kbdev); + else if (!val && (state == SCHED_SUSPENDED)) + kbase_csf_scheduler_pm_resume(kbdev); + else + return -1; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduler_suspend_fops, + &kbase_csf_debugfs_scheduler_suspend_get, + &kbase_csf_debugfs_scheduler_suspend_set, + "%llu\n"); + +void kbase_csf_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("active_groups", 0444, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_active_queue_groups_debugfs_fops); + + debugfs_create_file("scheduling_timer_enabled", 0644, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_scheduling_timer_enabled_fops); + debugfs_create_file("scheduling_timer_kick", 0200, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_scheduling_timer_kick_fops); + debugfs_create_file("scheduler_suspend", 0644, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_debugfs_scheduler_suspend_fops); + + kbase_csf_tl_reader_debugfs_init(kbdev); + kbase_csf_firmware_trace_buffer_debugfs_init(kbdev); +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) +{ +} + +void kbase_csf_debugfs_init(struct kbase_device *kbdev) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h new file mode 100644 index 0000000..397e657 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_csg_debugfs.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_CSG_DEBUGFS_H_ +#define _KBASE_CSF_CSG_DEBUGFS_H_ + +/* Forward declarations */ +struct kbase_device; +struct kbase_context; +struct kbase_queue_group; + +#define MALI_CSF_CSG_DEBUGFS_VERSION 0 + +/** + * kbase_csf_queue_group_debugfs_init() - Add debugfs entry for queue groups + * associated with @kctx. + * + * @kctx: Pointer to kbase_context + */ +void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx); + +/** + * kbase_csf_debugfs_init() - Add a global debugfs entry for queue groups + * + * @kbdev: Pointer to the device + */ +void kbase_csf_debugfs_init(struct kbase_device *kbdev); + +#endif /* _KBASE_CSF_CSG_DEBUGFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h new file mode 100644 index 0000000..53526ce --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h @@ -0,0 +1,1254 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* Definitions (types, defines, etcs) common to the CSF. + * They are placed here to allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_CSF_DEFS_H_ +#define _KBASE_CSF_DEFS_H_ + +#include +#include + +#include "mali_kbase_csf_firmware.h" + +/* Maximum number of KCPU command queues to be created per GPU address space. + */ +#define KBASEP_MAX_KCPU_QUEUES ((size_t)256) + +/* Maximum number of GPU command queue groups to be created per GPU address + * space. + */ +#define MAX_QUEUE_GROUP_NUM (256) + +/* Maximum number of GPU tiler heaps to allow to be created per GPU address + * space. + */ +#define MAX_TILER_HEAPS (128) + +#define CSF_FIRMWARE_ENTRY_READ (1ul << 0) +#define CSF_FIRMWARE_ENTRY_WRITE (1ul << 1) +#define CSF_FIRMWARE_ENTRY_EXECUTE (1ul << 2) +#define CSF_FIRMWARE_ENTRY_CACHE_MODE (3ul << 3) +#define CSF_FIRMWARE_ENTRY_PROTECTED (1ul << 5) +#define CSF_FIRMWARE_ENTRY_SHARED (1ul << 30) +#define CSF_FIRMWARE_ENTRY_ZERO (1ul << 31) + +/** + * enum kbase_csf_bind_state - bind state of the queue + * + * @KBASE_CSF_QUEUE_UNBOUND: Set when the queue is registered or when the link + * between queue and the group to which it was bound or being bound is removed. + * @KBASE_CSF_QUEUE_BIND_IN_PROGRESS: Set when the first part of bind operation + * has completed i.e. CS_QUEUE_BIND ioctl. + * @KBASE_CSF_QUEUE_BOUND: Set when the bind operation has completed i.e. IO + * pages have been mapped in the process address space. + */ +enum kbase_csf_queue_bind_state { + KBASE_CSF_QUEUE_UNBOUND, + KBASE_CSF_QUEUE_BIND_IN_PROGRESS, + KBASE_CSF_QUEUE_BOUND, +}; + +/** + * enum kbase_csf_reset_gpu_state - state of the gpu reset + * + * @KBASE_CSF_RESET_GPU_NOT_PENDING: Set when the GPU reset isn't pending + * + * @KBASE_CSF_RESET_GPU_PREPARED: Set when kbase_prepare_to_reset_gpu() has + * been called. This is just for debugging checks to encourage callers to call + * kbase_prepare_to_reset_gpu() before kbase_reset_gpu(). + * + * @KBASE_CSF_RESET_GPU_COMMITTED: Set when the GPU reset process has been + * committed and so will definitely happen, but the procedure to reset the GPU + * has not yet begun. Other threads must finish accessing the HW before we + * reach %KBASE_CSF_RESET_GPU_HAPPENING. + * + * @KBASE_CSF_RESET_GPU_HAPPENING: Set when the GPU reset process is occurring + * (silent or otherwise), and is actively accessing the HW. Any changes to the + * HW in other threads might get lost, overridden, or corrupted. + * + * @KBASE_CSF_RESET_GPU_COMMITTED_SILENT: Set when the GPU reset process has + * been committed but has not started happening. This is used when resetting + * the GPU as part of normal behavior (e.g. when exiting protected mode). + * Other threads must finish accessing the HW before we reach + * %KBASE_CSF_RESET_GPU_HAPPENING. + * + * @KBASE_CSF_RESET_GPU_FAILED: Set when an error is encountered during the + * GPU reset process. No more work could then be executed on GPU, unloading + * the Driver module is the only option. + */ +enum kbase_csf_reset_gpu_state { + KBASE_CSF_RESET_GPU_NOT_PENDING, + KBASE_CSF_RESET_GPU_PREPARED, + KBASE_CSF_RESET_GPU_COMMITTED, + KBASE_CSF_RESET_GPU_HAPPENING, + KBASE_CSF_RESET_GPU_COMMITTED_SILENT, + KBASE_CSF_RESET_GPU_FAILED, +}; + +/** + * enum kbase_csf_group_state - state of the GPU command queue group + * + * @KBASE_CSF_GROUP_INACTIVE: Group is inactive and won't be + * considered by scheduler for running on + * CSG slot. + * @KBASE_CSF_GROUP_RUNNABLE: Group is in the list of runnable groups + * and is subjected to time-slice based + * scheduling. A start request would be + * sent (or already has been sent) if the + * group is assigned the CS + * group slot for the fist time. + * @KBASE_CSF_GROUP_IDLE: Group is currently on a CSG slot + * but all the CSs bound to the group have + * become either idle or waiting on sync + * object. + * Group could be evicted from the slot on + * the next tick if there are no spare + * slots left after scheduling non-idle + * queue groups. If the group is kept on + * slot then it would be moved to the + * RUNNABLE state, also if one of the + * queues bound to the group is kicked it + * would be moved to the RUNNABLE state. + * If the group is evicted from the slot it + * would be moved to either + * KBASE_CSF_GROUP_SUSPENDED_ON_IDLE or + * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC + * state. + * @KBASE_CSF_GROUP_SUSPENDED: Group was evicted from the CSG slot + * and is not running but is still in the + * list of runnable groups and subjected + * to time-slice based scheduling. A resume + * request would be sent when a CSG slot is + * re-assigned to the group and once the + * resume is complete group would be moved + * back to the RUNNABLE state. + * @KBASE_CSF_GROUP_SUSPENDED_ON_IDLE: Same as KBASE_CSF_GROUP_SUSPENDED except + * that queue group also became idle before + * the suspension. This state helps + * Scheduler avoid scheduling the idle + * groups over the non-idle groups in the + * subsequent ticks. If one of the queues + * bound to the group is kicked it would be + * moved to the SUSPENDED state. + * @KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: Same as GROUP_SUSPENDED_ON_IDLE + * except that at least one CS + * bound to this group was + * waiting for synchronization object + * before the suspension. + * @KBASE_CSF_GROUP_FAULT_EVICTED: Group is evicted from the scheduler due + * to a fault condition, pending to be + * terminated. + * @KBASE_CSF_GROUP_TERMINATED: Group is no longer schedulable and is + * pending to be deleted by Client, all the + * queues bound to it have been unbound. + */ +enum kbase_csf_group_state { + KBASE_CSF_GROUP_INACTIVE, + KBASE_CSF_GROUP_RUNNABLE, + KBASE_CSF_GROUP_IDLE, + KBASE_CSF_GROUP_SUSPENDED, + KBASE_CSF_GROUP_SUSPENDED_ON_IDLE, + KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC, + KBASE_CSF_GROUP_FAULT_EVICTED, + KBASE_CSF_GROUP_TERMINATED, +}; + +/** + * enum kbase_csf_csg_slot_state - state of the command queue group slots under + * the scheduler control. + * + * @CSG_SLOT_READY: The slot is clean and ready to be programmed with a + * queue group. + * @CSG_SLOT_READY2RUN: The slot has been programmed with a queue group, i.e. a + * start or resume request has been sent to the firmware. + * @CSG_SLOT_RUNNING: The queue group is running on the slot, acknowledgment + * of a start or resume request has been obtained from the + * firmware. + * @CSG_SLOT_DOWN2STOP: The suspend or terminate request for the queue group on + * the slot has been sent to the firmware. + * @CSG_SLOT_STOPPED: The queue group is removed from the slot, acknowledgment + * of suspend or terminate request has been obtained from + * the firmware. + * @CSG_SLOT_READY2RUN_TIMEDOUT: The start or resume request sent on the slot + * for the queue group timed out. + * @CSG_SLOT_DOWN2STOP_TIMEDOUT: The suspend or terminate request for queue + * group on the slot timed out. + */ +enum kbase_csf_csg_slot_state { + CSG_SLOT_READY, + CSG_SLOT_READY2RUN, + CSG_SLOT_RUNNING, + CSG_SLOT_DOWN2STOP, + CSG_SLOT_STOPPED, + CSG_SLOT_READY2RUN_TIMEDOUT, + CSG_SLOT_DOWN2STOP_TIMEDOUT, +}; + +/** + * enum kbase_csf_scheduler_state - state of the scheduler operational phases. + * + * @SCHED_BUSY: The scheduler is busy performing on tick schedule + * operations, the state of CSG slots + * can't be changed. + * @SCHED_INACTIVE: The scheduler is inactive, it is allowed to modify the + * state of CSG slots by in-cycle + * priority scheduling. + * @SCHED_SUSPENDED: The scheduler is in low-power mode with scheduling + * operations suspended and is not holding the power + * management reference. This can happen if the GPU + * becomes idle for a duration exceeding a threshold, + * or due to a system triggered suspend action. + */ +enum kbase_csf_scheduler_state { + SCHED_BUSY, + SCHED_INACTIVE, + SCHED_SUSPENDED, +}; + +/** + * enum kbase_queue_group_priority - Kbase internal relative priority list. + * + * @KBASE_QUEUE_GROUP_PRIORITY_REALTIME: The realtime queue group priority. + * @KBASE_QUEUE_GROUP_PRIORITY_HIGH: The high queue group priority. + * @KBASE_QUEUE_GROUP_PRIORITY_MEDIUM: The medium queue group priority. + * @KBASE_QUEUE_GROUP_PRIORITY_LOW: The low queue group priority. + * @KBASE_QUEUE_GROUP_PRIORITY_COUNT: The number of priority levels. + */ +enum kbase_queue_group_priority { + KBASE_QUEUE_GROUP_PRIORITY_REALTIME = 0, + KBASE_QUEUE_GROUP_PRIORITY_HIGH, + KBASE_QUEUE_GROUP_PRIORITY_MEDIUM, + KBASE_QUEUE_GROUP_PRIORITY_LOW, + KBASE_QUEUE_GROUP_PRIORITY_COUNT +}; + + +/** + * struct kbase_csf_notification - Event or error generated as part of command + * queue execution + * + * @data: Event or error data returned to userspace + * @link: Link to the linked list, &struct_kbase_csf_context.error_list. + */ +struct kbase_csf_notification { + struct base_csf_notification data; + struct list_head link; +}; + +/** + * struct kbase_queue - Object representing a GPU command queue. + * + * @kctx: Pointer to the base context with which this GPU command queue + * is associated. + * @reg: Pointer to the region allocated from the shared + * interface segment for mapping the User mode + * input/output pages in MCU firmware address space. + * @phys: Pointer to the physical pages allocated for the + * pair or User mode input/output page + * @user_io_addr: Pointer to the permanent kernel mapping of User mode + * input/output pages. The pages can be accessed through + * the mapping without any cache maintenance. + * @handle: Handle returned with bind ioctl for creating a + * contiguous User mode mapping of input/output pages & + * the hardware doorbell page. + * @doorbell_nr: Index of the hardware doorbell page assigned to the + * queue. + * @db_file_offset: File offset value that is assigned to userspace mapping + * created on bind to access the doorbell page. + * It is in page units. + * @link: Link to the linked list of GPU command queues created per + * GPU address space. + * @refcount: Reference count, stands for the number of times the queue + * has been referenced. The reference is taken when it is + * created, when it is bound to the group and also when the + * @oom_event_work work item is queued + * for it. + * @group: Pointer to the group to which this queue is bound. + * @queue_reg: Pointer to the VA region allocated for CS buffer. + * @oom_event_work: Work item corresponding to the out of memory event for + * chunked tiler heap being used for this queue. + * @base_addr: Base address of the CS buffer. + * @size: Size of the CS buffer. + * @priority: Priority of this queue within the group. + * @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state + * @csi_index: The ID of the assigned CS hardware interface. + * @enabled: Indicating whether the CS is running, or not. + * @status_wait: Value of CS_STATUS_WAIT register of the CS will + * be kept when the CS gets blocked by sync wait. + * CS_STATUS_WAIT provides information on conditions queue is + * blocking on. This is set when the group, to which queue is + * bound, is suspended after getting blocked, i.e. in + * KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC state. + * @sync_ptr: Value of CS_STATUS_WAIT_SYNC_POINTER register of the CS + * will be kept when the CS gets blocked by + * sync wait. CS_STATUS_WAIT_SYNC_POINTER contains the address + * of synchronization object being waited on. + * Valid only when @status_wait is set. + * @sync_value: Value of CS_STATUS_WAIT_SYNC_VALUE register of the CS + * will be kept when the CS gets blocked by + * sync wait. CS_STATUS_WAIT_SYNC_VALUE contains the value + * tested against the synchronization object. + * Valid only when @status_wait is set. + * @sb_status: Value indicates which of the scoreboard entries in the queue + * are non-zero + * @blocked_reason: Value shows if the queue is blocked, and if so, + * the reason why it is blocked + * @trace_buffer_base: CS trace buffer base address. + * @trace_offset_ptr: Pointer to the CS trace buffer offset variable. + * @trace_buffer_size: CS trace buffer size for the queue. + * @trace_cfg: CS trace configuration parameters. + * @error: GPU command queue fatal information to pass to user space. + * @fatal_event_work: Work item to handle the CS fatal event reported for this + * queue. + * @cs_fatal_info: Records additional information about the CS fatal event. + * @cs_fatal: Records information about the CS fatal event. + */ +struct kbase_queue { + struct kbase_context *kctx; + struct kbase_va_region *reg; + struct tagged_addr phys[2]; + char *user_io_addr; + u64 handle; + int doorbell_nr; + unsigned long db_file_offset; + struct list_head link; + atomic_t refcount; + struct kbase_queue_group *group; + struct kbase_va_region *queue_reg; + struct work_struct oom_event_work; + u64 base_addr; + u32 size; + u8 priority; + s8 csi_index; + enum kbase_csf_queue_bind_state bind_state; + bool enabled; + u32 status_wait; + u64 sync_ptr; + u32 sync_value; + u32 sb_status; + u32 blocked_reason; + u64 trace_buffer_base; + u64 trace_offset_ptr; + u32 trace_buffer_size; + u32 trace_cfg; + struct kbase_csf_notification error; + struct work_struct fatal_event_work; + u64 cs_fatal_info; + u32 cs_fatal; +}; + +/** + * struct kbase_normal_suspend_buffer - Object representing a normal + * suspend buffer for queue group. + * @reg: Memory region allocated for the normal-mode suspend buffer. + * @phy: Array of physical memory pages allocated for the normal- + * mode suspend buffer. + */ +struct kbase_normal_suspend_buffer { + struct kbase_va_region *reg; + struct tagged_addr *phy; +}; + +/** + * struct kbase_protected_suspend_buffer - Object representing a protected + * suspend buffer for queue group. + * @reg: Memory region allocated for the protected-mode suspend buffer. + * @pma: Array of pointer to protected mode allocations containing + * information about memory pages allocated for protected mode + * suspend buffer. + */ +struct kbase_protected_suspend_buffer { + struct kbase_va_region *reg; + struct protected_memory_allocation **pma; +}; + +/** + * struct kbase_queue_group - Object representing a GPU command queue group. + * + * @kctx: Pointer to the kbase context with which this queue group + * is associated. + * @normal_suspend_buf: Object representing the normal suspend buffer. + * Normal-mode suspend buffer that is used for + * group context switch. + * @protected_suspend_buf: Object representing the protected suspend + * buffer. Protected-mode suspend buffer that is + * used for group context switch. + * @handle: Handle which identifies this queue group. + * @csg_nr: Number/index of the CSG to which this queue group is + * mapped; KBASEP_CSG_NR_INVALID indicates that the queue + * group is not scheduled. + * @priority: Priority of the queue group, 0 being the highest, + * BASE_QUEUE_GROUP_PRIORITY_COUNT - 1 being the lowest. + * @tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @compute_max: Maximum number of compute endpoints the group is + * allowed to use. + * @tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @compute_mask: Mask of compute endpoints the group is allowed to use. + * @group_uid: 32-bit wide unsigned identifier for the group, unique + * across all kbase devices and contexts. + * @link: Link to this queue group in the 'runnable_groups' list of + * the corresponding kctx. + * @link_to_schedule: Link to this queue group in the list of prepared groups + * to be scheduled, if the group is runnable/suspended. + * If the group is idle or waiting for CQS, it would be a + * link to the list of idle/blocked groups list. + * @run_state: Current state of the queue group. + * @prepared_seq_num: Indicates the position of queue group in the list of + * prepared groups to be scheduled. + * @scan_seq_num: Scan out sequence number before adjusting for dynamic + * idle conditions. It is used for setting a group's + * onslot priority. It could differ from prepared_seq_number + * when there are idle groups. + * @faulted: Indicates that a GPU fault occurred for the queue group. + * This flag persists until the fault has been queued to be + * reported to userspace. + * @bound_queues: Array of registered queues bound to this queue group. + * @doorbell_nr: Index of the hardware doorbell page assigned to the + * group. + * @protm_event_work: Work item corresponding to the protected mode entry + * event for this queue. + * @protm_pending_bitmap: Bit array to keep a track of CSs that + * have pending protected mode entry requests. + * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be + * returned to userspace if such an error has occurred. + * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT + * to be returned to userspace if such an error has occurred. + * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM + * to be returned to userspace if such an error has occurred. + * @timer_event_work: Work item to handle the progress timeout fatal event + * for the group. + */ +struct kbase_queue_group { + struct kbase_context *kctx; + struct kbase_normal_suspend_buffer normal_suspend_buf; + struct kbase_protected_suspend_buffer protected_suspend_buf; + u8 handle; + s8 csg_nr; + u8 priority; + + u8 tiler_max; + u8 fragment_max; + u8 compute_max; + + u64 tiler_mask; + u64 fragment_mask; + u64 compute_mask; + + u32 group_uid; + + struct list_head link; + struct list_head link_to_schedule; + enum kbase_csf_group_state run_state; + u32 prepared_seq_num; + u32 scan_seq_num; + bool faulted; + + struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP]; + + int doorbell_nr; + struct work_struct protm_event_work; + DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); + + struct kbase_csf_notification error_fatal; + struct kbase_csf_notification error_timeout; + struct kbase_csf_notification error_tiler_oom; + + struct work_struct timer_event_work; +}; + +/** + * struct kbase_csf_kcpu_queue_context - Object representing the kernel CPU + * queues for a GPU address space. + * + * @lock: Lock preventing concurrent access to @array and the @in_use bitmap. + * @array: Array of pointers to kernel CPU command queues. + * @in_use: Bitmap which indicates which kernel CPU command queues are in use. + * @wq: Dedicated workqueue for processing kernel CPU command queues. + * @num_cmds: The number of commands that have been enqueued across + * all the KCPU command queues. This could be used as a + * timestamp to determine the command's enqueueing time. + * @jit_cmds_head: A list of the just-in-time memory commands, both + * allocate & free, in submission order, protected + * by kbase_csf_kcpu_queue_context.lock. + * @jit_blocked_queues: A list of KCPU command queues blocked by a pending + * just-in-time memory allocation command which will be + * reattempted after the impending free of other active + * allocations. + */ +struct kbase_csf_kcpu_queue_context { + struct mutex lock; + struct kbase_kcpu_command_queue *array[KBASEP_MAX_KCPU_QUEUES]; + DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES); + struct workqueue_struct *wq; + u64 num_cmds; + + struct list_head jit_cmds_head; + struct list_head jit_blocked_queues; +}; + +/** + * struct kbase_csf_cpu_queue_context - Object representing the cpu queue + * information. + * + * @buffer: Buffer containing CPU queue information provided by Userspace. + * @buffer_size: The size of @buffer. + * @dump_req_status: Indicates the current status for CPU queues dump request. + * @dump_cmp: Dumping cpu queue completion event. + */ +struct kbase_csf_cpu_queue_context { + char *buffer; + size_t buffer_size; + atomic_t dump_req_status; + struct completion dump_cmp; +}; + +/** + * struct kbase_csf_heap_context_allocator - Allocator of heap contexts + * + * Heap context structures are allocated by the kernel for use by the firmware. + * The current implementation subdivides a single GPU memory region for use as + * a sparse array. + * + * @kctx: Pointer to the kbase context with which this allocator is + * associated. + * @region: Pointer to a GPU memory region from which heap context structures + * are allocated. NULL if no heap contexts have been allocated. + * @gpu_va: GPU virtual address of the start of the region from which heap + * context structures are allocated. 0 if no heap contexts have been + * allocated. + * @lock: Lock preventing concurrent access to the @in_use bitmap. + * @in_use: Bitmap that indicates which heap context structures are currently + * allocated (in @region). + */ +struct kbase_csf_heap_context_allocator { + struct kbase_context *kctx; + struct kbase_va_region *region; + u64 gpu_va; + struct mutex lock; + DECLARE_BITMAP(in_use, MAX_TILER_HEAPS); +}; + +/** + * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps + * context for a GPU address space. + * + * This contains all of the CSF state relating to chunked tiler heaps for one + * @kbase_context. It is not the same as a heap context structure allocated by + * the kernel for use by the firmware. + * + * @lock: Lock preventing concurrent access to the tiler heaps. + * @list: List of tiler heaps. + * @ctx_alloc: Allocator for heap context structures. + * @nr_of_heaps: Total number of tiler heaps that were added during the + * life time of the context. + */ +struct kbase_csf_tiler_heap_context { + struct mutex lock; + struct list_head list; + struct kbase_csf_heap_context_allocator ctx_alloc; + u64 nr_of_heaps; +}; + +/** + * struct kbase_csf_scheduler_context - Object representing the scheduler's + * context for a GPU address space. + * + * @runnable_groups: Lists of runnable GPU command queue groups in the kctx, + * one per queue group relative-priority level. + * @num_runnable_grps: Total number of runnable groups across all priority + * levels in @runnable_groups. + * @idle_wait_groups: A list of GPU command queue groups in which all enabled + * GPU command queues are idle and at least one of them + * is blocked on a sync wait operation. + * @num_idle_wait_grps: Length of the @idle_wait_groups list. + * @sync_update_wq: Dedicated workqueue to process work items corresponding + * to the sync_update events by sync_set/sync_add + * instruction execution on CSs bound to groups + * of @idle_wait_groups list. + * @sync_update_work: work item to process the sync_update events by + * sync_set / sync_add instruction execution on command + * streams bound to groups of @idle_wait_groups list. + * @ngrp_to_schedule: Number of groups added for the context to the + * 'groups_to_schedule' list of scheduler instance. + */ +struct kbase_csf_scheduler_context { + struct list_head runnable_groups[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + u32 num_runnable_grps; + struct list_head idle_wait_groups; + u32 num_idle_wait_grps; + struct workqueue_struct *sync_update_wq; + struct work_struct sync_update_work; + u32 ngrp_to_schedule; +}; + +/** + * struct kbase_csf_context - Object representing CSF for a GPU address space. + * + * @event_pages_head: A list of pages allocated for the event memory used by + * the synchronization objects. A separate list would help + * in the fast lookup, since the list is expected to be short + * as one page would provide the memory for up to 1K + * synchronization objects. + * KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES is the upper + * bound on the size of event memory. + * @cookies: Bitmask containing of KBASE_CSF_NUM_USER_IO_PAGES_HANDLE + * bits, used for creating the User mode CPU mapping in a + * deferred manner of a pair of User mode input/output pages + * & a hardware doorbell page. + * The pages are allocated when a GPU command queue is + * bound to a CSG in kbase_csf_queue_bind. + * This helps returning unique handles to Userspace from + * kbase_csf_queue_bind and later retrieving the pointer to + * queue in the mmap handler. + * @user_pages_info: Array containing pointers to queue + * structures, used in conjunction with cookies bitmask for + * providing a mechansim to create a CPU mapping of + * input/output pages & hardware doorbell page. + * @lock: Serializes accesses to all members, except for ones that + * have their own locks. + * @queue_groups: Array of registered GPU command queue groups. + * @queue_list: Linked list of GPU command queues not yet deregistered. + * Note that queues can persist after deregistration if the + * userspace mapping created for them on bind operation + * hasn't been removed. + * @kcpu_queues: Kernel CPU command queues. + * @event_lock: Lock protecting access to @event_callback_list and + * @error_list. + * @event_callback_list: List of callbacks which are registered to serve CSF + * events. + * @tiler_heaps: Chunked tiler memory heaps. + * @wq: Dedicated workqueue to process work items corresponding + * to the OoM events raised for chunked tiler heaps being + * used by GPU command queues, and progress timeout events. + * @link: Link to this csf context in the 'runnable_kctxs' list of + * the scheduler instance + * @user_reg_vma: Pointer to the vma corresponding to the virtual mapping + * of the USER register page. Currently used only for sanity + * checking. + * @sched: Object representing the scheduler's context + * @error_list: List for CS fatal errors in this context. + * Link of fatal error is + * &struct_kbase_csf_notification.link. + * @event_lock needs to be held to access this list. + * @cpu_queue: CPU queue information. Only be available when DEBUG_FS + * is enabled. + */ +struct kbase_csf_context { + struct list_head event_pages_head; + DECLARE_BITMAP(cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE); + struct kbase_queue *user_pages_info[ + KBASE_CSF_NUM_USER_IO_PAGES_HANDLE]; + struct mutex lock; + struct kbase_queue_group *queue_groups[MAX_QUEUE_GROUP_NUM]; + struct list_head queue_list; + struct kbase_csf_kcpu_queue_context kcpu_queues; + spinlock_t event_lock; + struct list_head event_callback_list; + struct kbase_csf_tiler_heap_context tiler_heaps; + struct workqueue_struct *wq; + struct list_head link; + struct vm_area_struct *user_reg_vma; + struct kbase_csf_scheduler_context sched; + struct list_head error_list; +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct kbase_csf_cpu_queue_context cpu_queue; +#endif +}; + +/** + * struct kbase_csf_reset_gpu - Object containing the members required for + * GPU reset handling. + * @workq: Workqueue to execute the GPU reset work item @work. + * @work: Work item for performing the GPU reset. + * @wait: Wait queue used to wait for the GPU reset completion. + * @sem: RW Semaphore to ensure no other thread attempts to use the + * GPU whilst a reset is in process. Unlike traditional + * semaphores and wait queues, this allows Linux's lockdep + * mechanism to check for deadlocks involving reset waits. + * @state: Tracks if the GPU reset is in progress or not. + * The state is represented by enum @kbase_csf_reset_gpu_state. + */ +struct kbase_csf_reset_gpu { + struct workqueue_struct *workq; + struct work_struct work; + wait_queue_head_t wait; + struct rw_semaphore sem; + atomic_t state; +}; + +/** + * struct kbase_csf_csg_slot - Object containing members for tracking the state + * of CSG slots. + * @resident_group: pointer to the queue group that is resident on the CSG slot. + * @state: state of the slot as per enum @kbase_csf_csg_slot_state. + * @trigger_jiffies: value of jiffies when change in slot state is recorded. + * @priority: dynamic priority assigned to CSG slot. + */ +struct kbase_csf_csg_slot { + struct kbase_queue_group *resident_group; + atomic_t state; + unsigned long trigger_jiffies; + u8 priority; +}; + +/** + * struct kbase_csf_scheduler - Object representing the scheduler used for + * CSF for an instance of GPU platform device. + * @lock: Lock to serialize the scheduler operations and + * access to the data members. + * @interrupt_lock: Lock to protect members accessed by interrupt + * handler. + * @state: The operational phase the scheduler is in. Primarily + * used for indicating what in-cycle schedule actions + * are allowed. + * @doorbell_inuse_bitmap: Bitmap of hardware doorbell pages keeping track of + * which pages are currently available for assignment + * to clients. + * @csg_inuse_bitmap: Bitmap to keep a track of CSG slots + * that are currently in use. + * @csg_slots: The array for tracking the state of CS + * group slots. + * @runnable_kctxs: List of Kbase contexts that have runnable command + * queue groups. + * @groups_to_schedule: List of runnable queue groups prepared on every + * scheduler tick. The dynamic priority of the CSG + * slot assigned to a group will depend upon the + * position of group in the list. + * @ngrp_to_schedule: Number of groups in the @groups_to_schedule list, + * incremented when a group is added to the list, used + * to record the position of group in the list. + * @num_active_address_spaces: Number of GPU address space slots that would get + * used to program the groups in @groups_to_schedule + * list on all the available CSG + * slots. + * @num_csg_slots_for_tick: Number of CSG slots that can be + * active in the given tick/tock. This depends on the + * value of @num_active_address_spaces. + * @remaining_tick_slots: Tracking the number of remaining available slots + * for @num_csg_slots_for_tick during the scheduling + * operation in a tick/tock. + * @idle_groups_to_schedule: List of runnable queue groups, in which all GPU + * command queues became idle or are waiting for + * synchronization object, prepared on every + * scheduler tick. The groups in this list are + * appended to the tail of @groups_to_schedule list + * after the scan out so that the idle groups aren't + * preferred for scheduling over the non-idle ones. + * @csg_scan_count_for_tick: CSG scanout count for assign the scan_seq_num for + * each scanned out group during scheduling operation + * in a tick/tock. + * @total_runnable_grps: Total number of runnable groups across all KCTXs. + * @csgs_events_enable_mask: Use for temporary masking off asynchronous events + * from firmware (such as OoM events) before a group + * is suspended. + * @csg_slots_idle_mask: Bit array for storing the mask of CS + * group slots for which idle notification was + * received. + * @csg_slots_prio_update: Bit array for tracking slots that have an on-slot + * priority update operation. + * @last_schedule: Time in jiffies recorded when the last "tick" or + * "tock" schedule operation concluded. Used for + * evaluating the exclusion window for in-cycle + * schedule operation. + * @timer_enabled: Whether the CSF scheduler wakes itself up for + * periodic scheduling tasks. If this value is 0 + * then it will only perform scheduling under the + * influence of external factors e.g., IRQs, IOCTLs. + * @wq: Dedicated workqueue to execute the @tick_work. + * @tick_timer: High-resolution timer employed to schedule tick + * workqueue items (kernel-provided delayed_work + * items do not use hrtimer and for some reason do + * not provide sufficiently reliable periodicity). + * @tick_work: Work item that performs the "schedule on tick" + * operation to implement timeslice-based scheduling. + * @tock_work: Work item that would perform the schedule on tock + * operation to implement the asynchronous scheduling. + * @ping_work: Work item that would ping the firmware at regular + * intervals, only if there is a single active CSG + * slot, to check if firmware is alive and would + * initiate a reset if the ping request isn't + * acknowledged. + * @top_ctx: Pointer to the Kbase context corresponding to the + * @top_grp. + * @top_grp: Pointer to queue group inside @groups_to_schedule + * list that was assigned the highest slot priority. + * @tock_pending_request: A "tock" request is pending: a group that is not + * currently on the GPU demands to be scheduled. + * @active_protm_grp: Indicates if firmware has been permitted to let GPU + * enter protected mode with the given group. On exit + * from protected mode the pointer is reset to NULL. + * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the + * firmware idle hysteresis timer for preparing a + * GPU suspend on idle. + * @gpu_idle_work: Work item for facilitating the scheduler to bring + * the GPU to a low-power mode on becoming idle. + * @non_idle_offslot_grps: Count of off-slot non-idle groups. Reset during + * the scheduler active phase in a tick. It then + * tracks the count of non-idle groups across all the + * other phases. + * @non_idle_scanout_grps: Count on the non-idle groups in the scan-out + * list at the scheduling prepare stage. + * @pm_active_count: Count indicating if the scheduler is owning a power + * management reference count. Reference is taken when + * the count becomes 1 and is dropped when the count + * becomes 0. It is used to enable the power up of MCU + * after GPU and L2 cache have been powered up. So when + * this count is zero, MCU will not be powered up. + * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds. + * @tick_timer_active: Indicates whether the @tick_timer is effectively + * active or not, as the callback function of + * @tick_timer will enqueue @tick_work only if this + * flag is true. This is mainly useful for the case + * when scheduling tick needs to be advanced from + * interrupt context, without actually deactivating + * the @tick_timer first and then enqueing @tick_work. + */ +struct kbase_csf_scheduler { + struct mutex lock; + spinlock_t interrupt_lock; + enum kbase_csf_scheduler_state state; + DECLARE_BITMAP(doorbell_inuse_bitmap, CSF_NUM_DOORBELL); + DECLARE_BITMAP(csg_inuse_bitmap, MAX_SUPPORTED_CSGS); + struct kbase_csf_csg_slot *csg_slots; + struct list_head runnable_kctxs; + struct list_head groups_to_schedule; + u32 ngrp_to_schedule; + u32 num_active_address_spaces; + u32 num_csg_slots_for_tick; + u32 remaining_tick_slots; + struct list_head idle_groups_to_schedule; + u32 csg_scan_count_for_tick; + u32 total_runnable_grps; + DECLARE_BITMAP(csgs_events_enable_mask, MAX_SUPPORTED_CSGS); + DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS); + DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS); + unsigned long last_schedule; + bool timer_enabled; + struct workqueue_struct *wq; + struct hrtimer tick_timer; + struct work_struct tick_work; + struct delayed_work tock_work; + struct delayed_work ping_work; + struct kbase_context *top_ctx; + struct kbase_queue_group *top_grp; + bool tock_pending_request; + struct kbase_queue_group *active_protm_grp; + bool gpu_idle_fw_timer_enabled; + struct work_struct gpu_idle_work; + atomic_t non_idle_offslot_grps; + u32 non_idle_scanout_grps; + u32 pm_active_count; + unsigned int csg_scheduling_period_ms; + bool tick_timer_active; +}; + +/** + * Number of GPU cycles per unit of the global progress timeout. + */ +#define GLB_PROGRESS_TIMER_TIMEOUT_SCALE ((u64)1024) + +/** + * Maximum value of the global progress timeout. + */ +#define GLB_PROGRESS_TIMER_TIMEOUT_MAX \ + ((GLB_PROGRESS_TIMER_TIMEOUT_MASK >> \ + GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) * \ + GLB_PROGRESS_TIMER_TIMEOUT_SCALE) + +/** + * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. + */ +#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) + +/** + * In typical operations, the management of the shader core power transitions + * is delegated to the MCU/firmware. However, if the host driver is configured + * to take direct control, one needs to disable the MCU firmware GLB_PWROFF + * timer. + */ +#define DISABLE_GLB_PWROFF_TIMER (0) + +/* Index of the GPU_ACTIVE counter within the CSHW counter block */ +#define GPU_ACTIVE_CNT_IDX (4) + +/** + * Maximum number of sessions that can be managed by the IPA Control component. + */ +#if MALI_UNIT_TEST +#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)8) +#else +#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)2) +#endif + +/** + * enum kbase_ipa_core_type - Type of counter block for performance counters + * + * @KBASE_IPA_CORE_TYPE_CSHW: CS Hardware counters. + * @KBASE_IPA_CORE_TYPE_MEMSYS: Memory System counters. + * @KBASE_IPA_CORE_TYPE_TILER: Tiler counters. + * @KBASE_IPA_CORE_TYPE_SHADER: Shader Core counters. + * @KBASE_IPA_CORE_TYPE_NUM: Number of core types. + */ +enum kbase_ipa_core_type { + KBASE_IPA_CORE_TYPE_CSHW = 0, + KBASE_IPA_CORE_TYPE_MEMSYS, + KBASE_IPA_CORE_TYPE_TILER, + KBASE_IPA_CORE_TYPE_SHADER, + KBASE_IPA_CORE_TYPE_NUM +}; + +/** + * Number of configurable counters per type of block on the IPA Control + * interface. + */ +#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8) + +/** + * Total number of configurable counters existing on the IPA Control interface. + */ +#define KBASE_IPA_CONTROL_MAX_COUNTERS \ + ((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) + +/** + * struct kbase_ipa_control_prfcnt - Session for a single performance counter + * + * @latest_raw_value: Latest raw value read from the counter. + * @scaling_factor: Factor raw value shall be multiplied by. + * @accumulated_diff: Partial sum of scaled and normalized values from + * previous samples. This represent all the values + * that were read before the latest raw value. + * @type: Type of counter block for performance counter. + * @select_idx: Index of the performance counter as configured on + * the IPA Control interface. + * @gpu_norm: Indicating whether values shall be normalized by + * GPU frequency. If true, returned values represent + * an interval of time expressed in seconds (when the + * scaling factor is set to 1). + */ +struct kbase_ipa_control_prfcnt { + u64 latest_raw_value; + u64 scaling_factor; + u64 accumulated_diff; + enum kbase_ipa_core_type type; + u8 select_idx; + bool gpu_norm; +}; + +/** + * struct kbase_ipa_control_session - Session for an IPA Control client + * + * @prfcnts: Sessions for individual performance counters. + * @num_prfcnts: Number of performance counters. + * @active: Indicates whether this slot is in use or not + * @last_query_time: Time of last query, in ns + * @protm_time: Amount of time (in ns) that GPU has been in protected + */ +struct kbase_ipa_control_session { + struct kbase_ipa_control_prfcnt prfcnts[KBASE_IPA_CONTROL_MAX_COUNTERS]; + size_t num_prfcnts; + bool active; + u64 last_query_time; + u64 protm_time; +}; + +/** + * struct kbase_ipa_control_prfcnt_config - Performance counter configuration + * + * @idx: Index of the performance counter inside the block, as specified + * in the GPU architecture. + * @refcount: Number of client sessions bound to this counter. + * + * This structure represents one configurable performance counter of + * the IPA Control interface. The entry may be mapped to a specific counter + * by one or more client sessions. The counter is considered to be unused + * if it isn't part of any client session. + */ +struct kbase_ipa_control_prfcnt_config { + u8 idx; + u8 refcount; +}; + +/** + * struct kbase_ipa_control_prfcnt_block - Block of performance counters + * + * @select: Current performance counter configuration. + * @num_available_counters: Number of counters that are not already configured. + * + */ +struct kbase_ipa_control_prfcnt_block { + struct kbase_ipa_control_prfcnt_config + select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS]; + size_t num_available_counters; +}; + +/** + * struct kbase_ipa_control - Manager of the IPA Control interface. + * + * @blocks: Current configuration of performance counters + * for the IPA Control interface. + * @sessions: State of client sessions, storing information + * like performance counters the client subscribed to + * and latest value read from each counter. + * @lock: Spinlock to serialize access by concurrent clients. + * @rtm_listener_data: Private data for allocating a GPU frequency change + * listener. + * @num_active_sessions: Number of sessions opened by clients. + * @cur_gpu_rate: Current GPU top-level operating frequency, in Hz. + * @rtm_listener_data: Private data for allocating a GPU frequency change + * listener. + * @protm_start: Time (in ns) at which the GPU entered protected mode + */ +struct kbase_ipa_control { + struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM]; + struct kbase_ipa_control_session + sessions[KBASE_IPA_CONTROL_MAX_SESSIONS]; + spinlock_t lock; + void *rtm_listener_data; + size_t num_active_sessions; + u32 cur_gpu_rate; + u64 protm_start; +}; + +/** + * struct kbase_csf_firmware_interface - Interface in the MCU firmware + * + * @node: Interface objects are on the kbase_device:csf.firmware_interfaces + * list using this list_head to link them + * @phys: Array of the physical (tagged) addresses making up this interface + * @name: NULL-terminated string naming the interface + * @num_pages: Number of entries in @phys and @pma (and length of the interface) + * @virtual: Starting GPU virtual address this interface is mapped at + * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes + * @data_start: Offset into firmware image at which the interface data starts + * @data_end: Offset into firmware image at which the interface data ends + * @kernel_map: A kernel mapping of the memory or NULL if not required to be + * mapped in the kernel + * @pma: Array of pointers to protected memory allocations. + */ +struct kbase_csf_firmware_interface { + struct list_head node; + struct tagged_addr *phys; + char *name; + u32 num_pages; + u32 virtual; + u32 flags; + u32 data_start; + u32 data_end; + void *kernel_map; + struct protected_memory_allocation **pma; +}; + +/* + * struct kbase_csf_hwcnt - Object containing members for handling the dump of + * HW counters. + * + * @request_pending: Flag set when HWC requested and used for HWC sample + * done interrupt. + * @enable_pending: Flag set when HWC enable status change and used for + * enable done interrupt. + */ +struct kbase_csf_hwcnt { + bool request_pending; + bool enable_pending; +}; + +/** + * struct kbase_csf_device - Object representing CSF for an instance of GPU + * platform device. + * + * @mcu_mmu: MMU page tables for the MCU firmware + * @firmware_interfaces: List of interfaces defined in the firmware image + * @firmware_config: List of configuration options within the firmware + * image + * @firmware_timeline_metadata: List of timeline meta-data within the firmware + * image + * @fw_cfg_kobj: Pointer to the kobject corresponding to the sysf + * directory that contains a sub-directory for each + * of the configuration option present in the + * firmware image. + * @firmware_trace_buffers: List of trace buffers described in the firmware + * image. + * @shared_interface: Pointer to the interface object containing info for + * the memory area shared between firmware & host. + * @shared_reg_rbtree: RB tree of the memory regions allocated from the + * shared interface segment in MCU firmware address + * space. + * @db_filp: Pointer to a dummy file, that alongwith + * @db_file_offsets, facilitates the use of unqiue + * file offset for the userspace mapping created + * for Hw Doorbell pages. The userspace mapping + * is made to point to this file inside the mmap + * handler. + * @db_file_offsets: Counter that is incremented every time a GPU + * command queue is bound to provide a unique file + * offset range for @db_filp file, so that pte of + * Doorbell page can be zapped through the kernel + * function unmap_mapping_range(). It is incremented + * in page units. + * @dummy_db_page: Address of the dummy page that is mapped in place + * of the real Hw doorbell page for the active GPU + * command queues after they are stopped or after the + * GPU is powered down. + * @dummy_user_reg_page: Address of the dummy page that is mapped in place + * of the real User register page just before the GPU + * is powered down. The User register page is mapped + * in the address space of every process, that created + * a Base context, to enable the access to LATEST_FLUSH + * register from userspace. + * @mali_file_inode: Pointer to the inode corresponding to mali device + * file. This is needed in order to switch to the + * @dummy_user_reg_page on GPU power down. + * All instances of the mali device file will point to + * the same inode. + * @reg_lock: Lock to serialize the MCU firmware related actions + * that affect all contexts such as allocation of + * regions from shared interface area, assignment of + * of hardware doorbell pages, assignment of CSGs, + * sending global requests. + * @event_wait: Wait queue to wait for receiving csf events, i.e. + * the interrupt from CSF firmware, or scheduler state + * changes. + * @interrupt_received: Flag set when the interrupt is received from CSF fw + * @global_iface: The result of parsing the global interface + * structure set up by the firmware, including the + * CSGs, CSs, and their properties + * @scheduler: The CS scheduler instance. + * @reset: Contain members required for GPU reset handling. + * @progress_timeout: Maximum number of GPU clock cycles without forward + * progress to allow, for all tasks running on + * hardware endpoints (e.g. shader cores), before + * terminating a GPU command queue group. + * Must not exceed @GLB_PROGRESS_TIMER_TIMEOUT_MAX. + * @pma_dev: Pointer to protected memory allocator device. + * @firmware_inited: Flag for indicating that the cold-boot stage of + * the MCU has completed. + * @firmware_reloaded: Flag for indicating a firmware reload operation + * in GPU reset has completed. + * @firmware_reload_needed: Flag for indicating that the firmware needs to be + * reloaded as part of the GPU reset action. + * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in + * charge of the shader core's power transitions, and + * the mcu_core_pwroff timeout feature is disabled + * (i.e. configured 0 in the register field). If + * false, the control is delegated to the MCU. + * @firmware_reload_work: Work item for facilitating the procedural actions + * on reloading the firmware. + * @glb_init_request_pending: Flag to indicate that Global requests have been + * sent to the FW after MCU was re-enabled and their + * acknowledgement is pending. + * @fw_error_work: Work item for handling the firmware internal error + * fatal event. + * @ipa_control: IPA Control component manager. + * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input + * in unit of micro-seconds. The firmware does not use + * it directly. + * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input + * in interface required format, ready to be used + * directly in the firmware. + * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into + * the glb_pwoff register. This is separated from + * the @p mcu_core_pwroff_dur_count as an update + * to the latter is asynchronous. + * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time + * window in unit of ms. The firmware does not use it + * directly. + * @gpu_idle_dur_count: The counterpart of the hysteresis time window in + * interface required format, ready to be used + * directly in the firmware. + * @fw_timeout_ms: Timeout value (in milliseconds) used when waiting + * for any request sent to the firmware. + * @hwcnt: Contain members required for handling the dump of + * HW counters. + */ +struct kbase_csf_device { + struct kbase_mmu_table mcu_mmu; + struct list_head firmware_interfaces; + struct list_head firmware_config; + struct list_head firmware_timeline_metadata; + struct kobject *fw_cfg_kobj; + struct kbase_csf_trace_buffers firmware_trace_buffers; + void *shared_interface; + struct rb_root shared_reg_rbtree; + struct file *db_filp; + u32 db_file_offsets; + struct tagged_addr dummy_db_page; + struct tagged_addr dummy_user_reg_page; + struct inode *mali_file_inode; + struct mutex reg_lock; + wait_queue_head_t event_wait; + bool interrupt_received; + struct kbase_csf_global_iface global_iface; + struct kbase_csf_scheduler scheduler; + struct kbase_csf_reset_gpu reset; + atomic64_t progress_timeout; + struct protected_memory_allocator_device *pma_dev; + bool firmware_inited; + bool firmware_reloaded; + bool firmware_reload_needed; + bool firmware_hctl_core_pwr; + struct work_struct firmware_reload_work; + bool glb_init_request_pending; + struct work_struct fw_error_work; + struct kbase_ipa_control ipa_control; + u32 mcu_core_pwroff_dur_us; + u32 mcu_core_pwroff_dur_count; + u32 mcu_core_pwroff_reg_shadow; + u32 gpu_idle_hysteresis_ms; + u32 gpu_idle_dur_count; + unsigned int fw_timeout_ms; + struct kbase_csf_hwcnt hwcnt; +}; + +/** + * struct kbase_as - Object representing an address space of GPU. + * @number: Index at which this address space structure is present + * in an array of address space structures embedded inside + * the &struct kbase_device. + * @pf_wq: Workqueue for processing work items related to + * Page fault, Bus fault and GPU fault handling. + * @work_pagefault: Work item for the Page fault handling. + * @work_busfault: Work item for the Bus fault handling. + * @work_gpufault: Work item for the GPU fault handling. + * @pf_data: Data relating to Page fault. + * @bf_data: Data relating to Bus fault. + * @gf_data: Data relating to GPU fault. + * @current_setup: Stores the MMU configuration for this address space. + */ +struct kbase_as { + int number; + struct workqueue_struct *pf_wq; + struct work_struct work_pagefault; + struct work_struct work_busfault; + struct work_struct work_gpufault; + struct kbase_fault pf_data; + struct kbase_fault bf_data; + struct kbase_fault gf_data; + struct kbase_mmu_setup current_setup; +}; + +#endif /* _KBASE_CSF_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c new file mode 100644 index 0000000..1b31122 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.c @@ -0,0 +1,2337 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "mali_kbase_csf_firmware_cfg.h" +#include "mali_kbase_csf_trace_buffer.h" +#include "mali_kbase_csf_timeout.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_reset_gpu.h" +#include "mali_kbase_ctx_sched.h" +#include "mali_kbase_csf_scheduler.h" +#include "device/mali_kbase_device.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "tl/mali_kbase_timeline_priv.h" +#include "mali_kbase_csf_tl_reader.h" +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" +#include + +#include +#include +#include +#include +#include +#include +#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) +#include +#endif +#include +#include + +#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) + + +static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; +module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); +MODULE_PARM_DESC(fw_name, "firmware image"); + +/* The waiting time for firmware to boot */ +static unsigned int csf_firmware_boot_timeout_ms = 500; +module_param(csf_firmware_boot_timeout_ms, uint, 0444); +MODULE_PARM_DESC(csf_firmware_boot_timeout_ms, + "Maximum time to wait for firmware to boot."); + +#ifdef CONFIG_MALI_DEBUG +/* Makes Driver wait indefinitely for an acknowledgment for the different + * requests it sends to firmware. Otherwise the timeouts interfere with the + * use of debugger for source-level debugging of firmware as Driver initiates + * a GPU reset when a request times out, which always happen when a debugger + * is connected. + */ +bool fw_debug; /* Default value of 0/false */ +module_param(fw_debug, bool, 0444); +MODULE_PARM_DESC(fw_debug, + "Enables effective use of a debugger for debugging firmware code."); +#endif + +#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) +#define FIRMWARE_HEADER_VERSION (0ul) +#define FIRMWARE_HEADER_LENGTH (0x14ul) + +#define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \ + (CSF_FIRMWARE_ENTRY_READ | \ + CSF_FIRMWARE_ENTRY_WRITE | \ + CSF_FIRMWARE_ENTRY_EXECUTE | \ + CSF_FIRMWARE_ENTRY_PROTECTED | \ + CSF_FIRMWARE_ENTRY_SHARED | \ + CSF_FIRMWARE_ENTRY_ZERO | \ + CSF_FIRMWARE_ENTRY_CACHE_MODE) + +#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE (0) +#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1) +#define CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST (2) +#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER (3) +#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4) + +#define CSF_FIRMWARE_CACHE_MODE_NONE (0ul << 3) +#define CSF_FIRMWARE_CACHE_MODE_CACHED (1ul << 3) +#define CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT (2ul << 3) +#define CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT (3ul << 3) + +#define INTERFACE_ENTRY_NAME_OFFSET (0x14) + +#define TL_METADATA_ENTRY_NAME_OFFSET (0x8) + +#define CSF_MAX_FW_STOP_LOOPS (100000) + +#define CSF_GLB_REQ_CFG_MASK \ + (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ + GLB_REQ_CFG_PWROFF_TIMER_MASK) + +static inline u32 input_page_read(const u32 *const input, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return input[offset / sizeof(u32)]; +} + +static inline void input_page_write(u32 *const input, const u32 offset, + const u32 value) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = value; +} + +static inline void input_page_partial_write(u32 *const input, const u32 offset, + u32 value, u32 mask) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = + (input_page_read(input, offset) & ~mask) | (value & mask); +} + +static inline u32 output_page_read(const u32 *const output, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return output[offset / sizeof(u32)]; +} + +static unsigned int entry_type(u32 header) +{ + return header & 0xFF; +} +static unsigned int entry_size(u32 header) +{ + return (header >> 8) & 0xFF; +} +static bool entry_update(u32 header) +{ + return (header >> 30) & 0x1; +} +static bool entry_optional(u32 header) +{ + return (header >> 31) & 0x1; +} + +/** + * struct firmware_timeline_metadata - + * Timeline metadata item within the MCU firmware + * + * @node: List head linking all timeline metadata to + * kbase_device:csf.firmware_timeline_metadata. + * @name: NUL-terminated string naming the metadata. + * @data: Metadata content. + * @size: Metadata size. + */ +struct firmware_timeline_metadata { + struct list_head node; + char *name; + char *data; + size_t size; +}; + +/* The shared interface area, used for communicating with firmware, is managed + * like a virtual memory zone. Reserve the virtual space from that zone + * corresponding to shared interface entry parsed from the firmware image. + * The shared_reg_rbtree should have been initialized before calling this + * function. + */ +static int setup_shared_iface_static_region(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_interface *interface = + kbdev->csf.shared_interface; + struct kbase_va_region *reg; + int ret = -ENOMEM; + + if (!interface) + return -EINVAL; + + reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, + interface->num_pages, KBASE_REG_ZONE_MCU_SHARED); + if (reg) { + ret = kbase_add_va_region_rbtree(kbdev, reg, + interface->virtual, interface->num_pages, 1); + if (ret) + kfree(reg); + else + reg->flags &= ~KBASE_REG_FREE; + } + + return ret; +} + +static int wait_mcu_status_value(struct kbase_device *kbdev, u32 val) +{ + u32 max_loops = CSF_MAX_FW_STOP_LOOPS; + + /* wait for the MCU_STATUS register to reach the given status value */ + while (--max_loops && + (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) != val)) { + } + + return (max_loops == 0) ? -1 : 0; +} + +void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +{ + if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) + dev_err(kbdev->dev, "MCU failed to get disabled"); +} + +static void wait_for_firmware_stop(struct kbase_device *kbdev) +{ + if (wait_mcu_status_value(kbdev, MCU_CNTRL_DISABLE) < 0) { + /* This error shall go away once MIDJM-2371 is closed */ + dev_err(kbdev->dev, "Firmware failed to stop"); + } +} + +static void stop_csf_firmware(struct kbase_device *kbdev) +{ + /* Stop the MCU firmware */ + kbase_csf_firmware_disable_mcu(kbdev); + + wait_for_firmware_stop(kbdev); +} + +static void wait_for_firmware_boot(struct kbase_device *kbdev) +{ + const long wait_timeout = + kbase_csf_timeout_in_jiffies(csf_firmware_boot_timeout_ms); + long remaining; + + /* Firmware will generate a global interface interrupt once booting + * is complete + */ + remaining = wait_event_timeout(kbdev->csf.event_wait, + kbdev->csf.interrupt_received == true, wait_timeout); + + if (!remaining) + dev_err(kbdev->dev, "Timed out waiting for fw boot completion"); + + kbdev->csf.interrupt_received = false; +} + +static void boot_csf_firmware(struct kbase_device *kbdev) +{ + kbase_csf_firmware_enable_mcu(kbdev); + + wait_for_firmware_boot(kbdev); +} + +static void wait_ready(struct kbase_device *kbdev) +{ + u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + u32 val; + + val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); + + /* Wait for a while for the update command to take effect */ + while (--max_loops && (val & AS_STATUS_AS_ACTIVE)) + val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)); + + if (max_loops == 0) + dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n"); +} + +static void unload_mmu_tables(struct kbase_device *kbdev) +{ + unsigned long irq_flags; + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + if (kbdev->pm.backend.gpu_powered) + kbase_mmu_disable_as(kbdev, MCU_AS_NR); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&kbdev->mmu_hw_mutex); +} + +static void load_mmu_tables(struct kbase_device *kbdev) +{ + unsigned long irq_flags; + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); + kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, MCU_AS_NR); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* Wait for a while for the update command to take effect */ + wait_ready(kbdev); +} + +/** + * convert_mem_flags() - Convert firmware memory flags to GPU region flags + * + * Return: GPU memory region flags + * + * @kbdev: Instance of GPU platform device (used to determine system coherency) + * @flags: Flags of an "interface memory setup" section in a firmware image + * @cm: appropriate cache mode chosen for the "interface memory setup" + * section, which could be different from the cache mode requested by + * firmware. + */ +static unsigned long convert_mem_flags(const struct kbase_device * const kbdev, + const u32 flags, u32 *cm) +{ + unsigned long mem_flags = 0; + u32 cache_mode = flags & CSF_FIRMWARE_ENTRY_CACHE_MODE; + bool is_shared = (flags & CSF_FIRMWARE_ENTRY_SHARED) ? true : false; + + /* The memory flags control the access permissions for the MCU, the + * shader cores/tiler are not expected to access this memory + */ + if (flags & CSF_FIRMWARE_ENTRY_READ) + mem_flags |= KBASE_REG_GPU_RD; + + if (flags & CSF_FIRMWARE_ENTRY_WRITE) + mem_flags |= KBASE_REG_GPU_WR; + + if ((flags & CSF_FIRMWARE_ENTRY_EXECUTE) == 0) + mem_flags |= KBASE_REG_GPU_NX; + + if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) + mem_flags |= KBASE_REG_PROTECTED; + + /* Substitute uncached coherent memory for cached coherent memory if + * the system does not support ACE coherency. + */ + if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) && + (kbdev->system_coherency != COHERENCY_ACE)) + cache_mode = CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT; + + /* Substitute uncached incoherent memory for uncached coherent memory + * if the system does not support ACE-Lite coherency. + */ + if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT) && + (kbdev->system_coherency == COHERENCY_NONE)) + cache_mode = CSF_FIRMWARE_CACHE_MODE_NONE; + + *cm = cache_mode; + + switch (cache_mode) { + case CSF_FIRMWARE_CACHE_MODE_NONE: + mem_flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + break; + case CSF_FIRMWARE_CACHE_MODE_CACHED: + mem_flags |= + KBASE_REG_MEMATTR_INDEX( + AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY); + break; + case CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT: + case CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT: + WARN_ON(!is_shared); + mem_flags |= KBASE_REG_SHARE_BOTH | + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); + break; + default: + dev_err(kbdev->dev, + "Firmware contains interface with unsupported cache mode\n"); + break; + } + return mem_flags; +} + +static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, + struct tagged_addr *phys, u32 num_pages, u32 flags, + u32 data_start, u32 data_end) +{ + u32 data_pos = data_start; + u32 data_len = data_end - data_start; + u32 page_num; + u32 page_limit; + + if (flags & CSF_FIRMWARE_ENTRY_ZERO) + page_limit = num_pages; + else + page_limit = (data_len + PAGE_SIZE - 1) / PAGE_SIZE; + + for (page_num = 0; page_num < page_limit; ++page_num) { + struct page *const page = as_page(phys[page_num]); + char *const p = kmap_atomic(page); + u32 const copy_len = min_t(u32, PAGE_SIZE, data_len); + + if (copy_len > 0) { + memcpy(p, data + data_pos, copy_len); + data_pos += copy_len; + data_len -= copy_len; + } + + if (flags & CSF_FIRMWARE_ENTRY_ZERO) { + u32 const zi_len = PAGE_SIZE - copy_len; + + memset(p + copy_len, 0, zi_len); + } + + kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), + PAGE_SIZE, DMA_TO_DEVICE); + kunmap_atomic(p); + } +} + +static int reload_fw_data_sections(struct kbase_device *kbdev) +{ + const u32 magic = FIRMWARE_HEADER_MAGIC; + struct kbase_csf_firmware_interface *interface; + const struct firmware *firmware; + int ret = 0; + + if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { + dev_err(kbdev->dev, + "Failed to reload firmware image '%s'\n", + fw_name); + return -ENOENT; + } + + /* Do couple of basic sanity checks */ + if (firmware->size < FIRMWARE_HEADER_LENGTH) { + dev_err(kbdev->dev, "Firmware image unexpectedly too small\n"); + ret = -EINVAL; + goto out; + } + + if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { + dev_err(kbdev->dev, "Incorrect magic value, firmware image could have been corrupted\n"); + ret = -EINVAL; + goto out; + } + + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + /* Skip reload of text & read only data sections */ + if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) || + !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE)) + continue; + + load_fw_image_section(kbdev, firmware->data, interface->phys, + interface->num_pages, interface->flags, + interface->data_start, interface->data_end); + } + + kbase_csf_firmware_reload_trace_buffers_data(kbdev); + +out: + release_firmware(firmware); + return ret; +} + +/** + * parse_memory_setup_entry() - Process an "interface memory setup" section + * + * Read an "interface memory setup" section from the firmware image and create + * the necessary memory region including the MMU page tables. If successful + * the interface will be added to the kbase_device:csf.firmware_interfaces list. + * + * Return: 0 if successful, negative error code on failure + * + * @kbdev: Kbase device structure + * @fw: The firmware image containing the section + * @entry: Pointer to the start of the section + * @size: Size (in bytes) of the section + */ +static int parse_memory_setup_entry(struct kbase_device *kbdev, + const struct firmware *fw, + const u32 *entry, unsigned int size) +{ + int ret = 0; + const u32 flags = entry[0]; + const u32 virtual_start = entry[1]; + const u32 virtual_end = entry[2]; + const u32 data_start = entry[3]; + const u32 data_end = entry[4]; + u32 num_pages; + char *name; + struct tagged_addr *phys = NULL; + struct kbase_csf_firmware_interface *interface = NULL; + bool allocated_pages = false, protected_mode = false; + unsigned long mem_flags = 0; + u32 cache_mode = 0; + struct protected_memory_allocation **pma = NULL; + + if (data_end < data_start) { + dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", + data_end, data_start); + return -EINVAL; + } + if (virtual_end < virtual_start) { + dev_err(kbdev->dev, "Firmware corrupt, virtual_end < virtual_start (0x%x<0x%x)\n", + virtual_end, virtual_start); + return -EINVAL; + } + if (data_end > fw->size) { + dev_err(kbdev->dev, "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx\n", + data_end, fw->size); + return -EINVAL; + } + + if ((virtual_start & ~PAGE_MASK) != 0 || + (virtual_end & ~PAGE_MASK) != 0) { + dev_err(kbdev->dev, "Firmware corrupt: virtual addresses not page aligned: 0x%x-0x%x\n", + virtual_start, virtual_end); + return -EINVAL; + } + + if ((flags & CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS) != flags) { + dev_err(kbdev->dev, "Firmware contains interface with unsupported flags (0x%x)\n", + flags); + return -EINVAL; + } + + if (flags & CSF_FIRMWARE_ENTRY_PROTECTED) + protected_mode = true; + + if (protected_mode && kbdev->csf.pma_dev == NULL) { + dev_err(kbdev->dev, + "Protected memory allocator not found, Firmware protected mode entry will not be supported"); + return 0; + } + + num_pages = (virtual_end - virtual_start) + >> PAGE_SHIFT; + + phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); + if (!phys) + return -ENOMEM; + + if (protected_mode) { + pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages); + + if (pma == NULL) { + ret = -ENOMEM; + goto out; + } + } else { + ret = kbase_mem_pool_alloc_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + num_pages, phys, false); + if (ret < 0) + goto out; + } + + allocated_pages = true; + load_fw_image_section(kbdev, fw->data, phys, num_pages, flags, + data_start, data_end); + + /* Allocate enough memory for the struct kbase_csf_firmware_interface and + * the name of the interface. An extra byte is allocated to place a + * NUL-terminator in. This should already be included according to the + * specification but here we add it anyway to be robust against a + * corrupt firmware image. + */ + interface = kmalloc(sizeof(*interface) + + size - INTERFACE_ENTRY_NAME_OFFSET + 1, GFP_KERNEL); + if (!interface) { + ret = -ENOMEM; + goto out; + } + name = (void *)(interface + 1); + memcpy(name, entry + (INTERFACE_ENTRY_NAME_OFFSET / sizeof(*entry)), + size - INTERFACE_ENTRY_NAME_OFFSET); + name[size - INTERFACE_ENTRY_NAME_OFFSET] = 0; + + interface->name = name; + interface->phys = phys; + interface->num_pages = num_pages; + interface->virtual = virtual_start; + interface->kernel_map = NULL; + interface->flags = flags; + interface->data_start = data_start; + interface->data_end = data_end; + interface->pma = pma; + + mem_flags = convert_mem_flags(kbdev, flags, &cache_mode); + + if (flags & CSF_FIRMWARE_ENTRY_SHARED) { + struct page **page_list; + u32 i; + pgprot_t cpu_map_prot; + u32 mem_attr_index = KBASE_REG_MEMATTR_VALUE(mem_flags); + + /* Since SHARED memory type was used for mapping shared memory + * on GPU side, it can be mapped as cached on CPU side on both + * types of coherent platforms. + */ + if ((cache_mode == CSF_FIRMWARE_CACHE_MODE_CACHED_COHERENT) || + (cache_mode == CSF_FIRMWARE_CACHE_MODE_UNCACHED_COHERENT)) { + WARN_ON(mem_attr_index != + AS_MEMATTR_INDEX_SHARED); + cpu_map_prot = PAGE_KERNEL; + } else { + WARN_ON(mem_attr_index != + AS_MEMATTR_INDEX_NON_CACHEABLE); + cpu_map_prot = pgprot_writecombine(PAGE_KERNEL); + } + + page_list = kmalloc_array(num_pages, sizeof(*page_list), + GFP_KERNEL); + if (!page_list) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < num_pages; i++) + page_list[i] = as_page(phys[i]); + + interface->kernel_map = vmap(page_list, num_pages, VM_MAP, + cpu_map_prot); + + kfree(page_list); + + if (!interface->kernel_map) { + ret = -ENOMEM; + goto out; + } + } + + /* Start location of the shared interface area is fixed and is + * specified in firmware spec, and so there shall only be a + * single entry with that start address. + */ + if (virtual_start == (KBASE_REG_ZONE_MCU_SHARED_BASE << PAGE_SHIFT)) + kbdev->csf.shared_interface = interface; + + list_add(&interface->node, &kbdev->csf.firmware_interfaces); + + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, + virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags, + KBASE_MEM_GROUP_CSF_FW); + + if (ret != 0) { + dev_err(kbdev->dev, "Failed to insert firmware pages\n"); + /* The interface has been added to the list, so cleanup will + * be handled by firmware unloading + */ + } + + dev_dbg(kbdev->dev, "Processed section '%s'", name); + + return ret; + +out: + if (allocated_pages) { + if (protected_mode) { + kbase_csf_protected_memory_free(kbdev, pma, num_pages); + } else { + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + num_pages, phys, false, false); + } + } + + kfree(phys); + kfree(interface); + return ret; +} + +/** + * parse_timeline_metadata_entry() - Process a "timeline metadata" section + * + * Return: 0 if successful, negative error code on failure + * + * @kbdev: Kbase device structure + * @fw: Firmware image containing the section + * @entry: Pointer to the section + * @size: Size (in bytes) of the section + */ +static int parse_timeline_metadata_entry(struct kbase_device *kbdev, + const struct firmware *fw, const u32 *entry, unsigned int size) +{ + const u32 data_start = entry[0]; + const u32 data_size = entry[1]; + const u32 data_end = data_start + data_size; + const char *name = (char *)&entry[2]; + struct firmware_timeline_metadata *metadata; + const unsigned int name_len = + size - TL_METADATA_ENTRY_NAME_OFFSET; + size_t allocation_size = sizeof(*metadata) + name_len + 1 + data_size; + + if (data_end > fw->size) { + dev_err(kbdev->dev, + "Firmware corrupt, file truncated? data_end=0x%x > fw->size=0x%zx", + data_end, fw->size); + return -EINVAL; + } + + /* Allocate enough space for firmware_timeline_metadata, + * its name and the content. + */ + metadata = kmalloc(allocation_size, GFP_KERNEL); + if (!metadata) + return -ENOMEM; + + metadata->name = (char *)(metadata + 1); + metadata->data = (char *)(metadata + 1) + name_len + 1; + metadata->size = data_size; + + memcpy(metadata->name, name, name_len); + metadata->name[name_len] = 0; + + /* Copy metadata's content. */ + memcpy(metadata->data, fw->data + data_start, data_size); + + list_add(&metadata->node, &kbdev->csf.firmware_timeline_metadata); + + dev_dbg(kbdev->dev, "Timeline metadata '%s'", metadata->name); + + return 0; +} + +/** + * load_firmware_entry() - Process an entry from a firmware image + * + * Read an entry from a firmware image and do any necessary work (e.g. loading + * the data into page accessible to the MCU). + * + * Unknown entries are ignored if the 'optional' flag is set within the entry, + * otherwise the function will fail with -EINVAL + * + * Return: 0 if successful, negative error code on failure + * + * @kbdev: Kbase device + * @fw: Firmware image containing the entry + * @offset: Byte offset within the image of the entry to load + * @header: Header word of the entry + */ +static int load_firmware_entry(struct kbase_device *kbdev, + const struct firmware *fw, + u32 offset, u32 header) +{ + const unsigned int type = entry_type(header); + unsigned int size = entry_size(header); + const bool optional = entry_optional(header); + /* Update is used with configuration and tracebuffer entries to + * initiate a FIRMWARE_CONFIG_UPDATE, instead of triggering a + * silent reset. + */ + const bool updatable = entry_update(header); + const u32 *entry = (void *)(fw->data + offset); + + if ((offset % sizeof(*entry)) || (size % sizeof(*entry))) { + dev_err(kbdev->dev, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", + offset, size); + return -EINVAL; + } + + if (size < sizeof(*entry)) { + dev_err(kbdev->dev, "Size field too small: %u\n", size); + return -EINVAL; + } + + /* Remove the header */ + entry++; + size -= sizeof(*entry); + + switch (type) { + case CSF_FIRMWARE_ENTRY_TYPE_INTERFACE: + /* Interface memory setup */ + if (size < INTERFACE_ENTRY_NAME_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Interface memory setup entry too short (size=%u)\n", + size); + return -EINVAL; + } + return parse_memory_setup_entry(kbdev, fw, entry, size); + case CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION: + /* Configuration option */ + if (size < CONFIGURATION_ENTRY_NAME_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Configuration option entry too short (size=%u)\n", + size); + return -EINVAL; + } + return kbase_csf_firmware_cfg_option_entry_parse( + kbdev, fw, entry, size, updatable); + case CSF_FIRMWARE_ENTRY_TYPE_FUTF_TEST: +#ifndef MALI_KBASE_BUILD + /* FW UTF option */ + if (size < 2*sizeof(*entry)) { + dev_err(kbdev->dev, "FW UTF entry too short (size=%u)\n", + size); + return -EINVAL; + } + return mali_kutf_process_fw_utf_entry(kbdev, fw->data, + fw->size, entry); +#endif + break; + case CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER: + /* Trace buffer */ + if (size < TRACE_BUFFER_ENTRY_NAME_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Trace Buffer entry too short (size=%u)\n", + size); + return -EINVAL; + } + return kbase_csf_firmware_parse_trace_buffer_entry( + kbdev, entry, size, updatable); + case CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA: + /* Meta data section */ + if (size < TL_METADATA_ENTRY_NAME_OFFSET + sizeof(*entry)) { + dev_err(kbdev->dev, "Timeline metadata entry too short (size=%u)\n", + size); + return -EINVAL; + } + return parse_timeline_metadata_entry(kbdev, fw, entry, size); + } + + if (!optional) { + dev_err(kbdev->dev, + "Unsupported non-optional entry type %u in firmware\n", + type); + return -EINVAL; + } + + return 0; +} + +static void free_global_iface(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + + if (iface->groups) { + unsigned int gid; + + for (gid = 0; gid < iface->group_num; ++gid) + kfree(iface->groups[gid].streams); + + kfree(iface->groups); + iface->groups = NULL; + } +} + +/** + * iface_gpu_va_to_cpu - Convert a GPU VA address within the shared interface + * region to a CPU address, using the existing mapping. + * @kbdev: Device pointer + * @gpu_va: GPU VA to convert + * + * Return: A CPU pointer to the location within the shared interface region, or + * NULL on failure. + */ +static inline void *iface_gpu_va_to_cpu(struct kbase_device *kbdev, u32 gpu_va) +{ + struct kbase_csf_firmware_interface *interface = + kbdev->csf.shared_interface; + u8 *kernel_base = interface->kernel_map; + + if (gpu_va < interface->virtual || + gpu_va >= interface->virtual + interface->num_pages * PAGE_SIZE) { + dev_err(kbdev->dev, + "Interface address 0x%x not within %u-page region at 0x%x", + gpu_va, interface->num_pages, + interface->virtual); + return NULL; + } + + return (void *)(kernel_base + (gpu_va - interface->virtual)); +} + +static int parse_cmd_stream_info(struct kbase_device *kbdev, + struct kbase_csf_cmd_stream_info *sinfo, + u32 *stream_base) +{ + sinfo->kbdev = kbdev; + sinfo->features = stream_base[STREAM_FEATURES/4]; + sinfo->input = iface_gpu_va_to_cpu(kbdev, + stream_base[STREAM_INPUT_VA/4]); + sinfo->output = iface_gpu_va_to_cpu(kbdev, + stream_base[STREAM_OUTPUT_VA/4]); + + if (sinfo->input == NULL || sinfo->output == NULL) + return -EINVAL; + + return 0; +} + +static int parse_cmd_stream_group_info(struct kbase_device *kbdev, + struct kbase_csf_cmd_stream_group_info *ginfo, + u32 *group_base, u32 group_stride) +{ + unsigned int sid; + + ginfo->kbdev = kbdev; + ginfo->features = group_base[GROUP_FEATURES/4]; + ginfo->input = iface_gpu_va_to_cpu(kbdev, + group_base[GROUP_INPUT_VA/4]); + ginfo->output = iface_gpu_va_to_cpu(kbdev, + group_base[GROUP_OUTPUT_VA/4]); + + if (ginfo->input == NULL || ginfo->output == NULL) + return -ENOMEM; + + ginfo->suspend_size = group_base[GROUP_SUSPEND_SIZE/4]; + ginfo->protm_suspend_size = group_base[GROUP_PROTM_SUSPEND_SIZE/4]; + ginfo->stream_num = group_base[GROUP_STREAM_NUM/4]; + + if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP || + ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) { + dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u", + ginfo->stream_num, + MIN_SUPPORTED_STREAMS_PER_GROUP, + MAX_SUPPORTED_STREAMS_PER_GROUP); + return -EINVAL; + } + + ginfo->stream_stride = group_base[GROUP_STREAM_STRIDE/4]; + + if (ginfo->stream_num * ginfo->stream_stride > group_stride) { + dev_err(kbdev->dev, + "group stride of 0x%x exceeded by %u CSs with stride 0x%x", + group_stride, ginfo->stream_num, + ginfo->stream_stride); + return -EINVAL; + } + + ginfo->streams = kmalloc_array(ginfo->stream_num, + sizeof(*ginfo->streams), GFP_KERNEL); + + if (!ginfo->streams) + return -ENOMEM; + + for (sid = 0; sid < ginfo->stream_num; sid++) { + int err; + u32 *stream_base = group_base + (STREAM_CONTROL_0 + + ginfo->stream_stride * sid) / 4; + + err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid], + stream_base); + if (err < 0) { + /* caller will free the memory for CSs array */ + return err; + } + } + + return 0; +} + +static u32 get_firmware_version(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_interface *interface = + kbdev->csf.shared_interface; + u32 *shared_info = interface->kernel_map; + + return shared_info[GLB_VERSION/4]; +} + +static int parse_capabilities(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_interface *interface = + kbdev->csf.shared_interface; + u32 *shared_info = interface->kernel_map; + struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + unsigned int gid; + + /* All offsets are in bytes, so divide by 4 for access via a u32 pointer + */ + + /* The version number of the global interface is expected to be a + * non-zero value. If it's not, the firmware may not have booted. + */ + iface->version = get_firmware_version(kbdev); + if (!iface->version) { + dev_err(kbdev->dev, "Version check failed. Firmware may have failed to boot."); + return -EINVAL; + } + + + iface->kbdev = kbdev; + iface->features = shared_info[GLB_FEATURES/4]; + iface->input = iface_gpu_va_to_cpu(kbdev, shared_info[GLB_INPUT_VA/4]); + iface->output = iface_gpu_va_to_cpu(kbdev, + shared_info[GLB_OUTPUT_VA/4]); + + if (iface->input == NULL || iface->output == NULL) + return -ENOMEM; + + iface->group_num = shared_info[GLB_GROUP_NUM/4]; + + if (iface->group_num < MIN_SUPPORTED_CSGS || + iface->group_num > MAX_SUPPORTED_CSGS) { + dev_err(kbdev->dev, + "Interface containing %u CSGs outside of range %u-%u", + iface->group_num, MIN_SUPPORTED_CSGS, + MAX_SUPPORTED_CSGS); + return -EINVAL; + } + + iface->group_stride = shared_info[GLB_GROUP_STRIDE/4]; + iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4]; + + if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { + iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4]; + } else { + iface->instr_features = 0; + } + + if ((GROUP_CONTROL_0 + + (unsigned long)iface->group_num * iface->group_stride) > + (interface->num_pages * PAGE_SIZE)) { + dev_err(kbdev->dev, + "interface size of %u pages exceeded by %u CSGs with stride 0x%x", + interface->num_pages, iface->group_num, + iface->group_stride); + return -EINVAL; + } + + WARN_ON(iface->groups); + + iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), + GFP_KERNEL); + if (!iface->groups) + return -ENOMEM; + + for (gid = 0; gid < iface->group_num; gid++) { + int err; + u32 *group_base = shared_info + (GROUP_CONTROL_0 + + iface->group_stride * gid) / 4; + + err = parse_cmd_stream_group_info(kbdev, &iface->groups[gid], + group_base, iface->group_stride); + if (err < 0) { + free_global_iface(kbdev); + return err; + } + } + + return 0; +} + +static inline void access_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value, const bool read) +{ + struct kbase_csf_firmware_interface *interface; + + list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) { + if ((gpu_addr >= interface->virtual) && + (gpu_addr < interface->virtual + (interface->num_pages << PAGE_SHIFT))) { + u32 offset_bytes = gpu_addr - interface->virtual; + u32 page_num = offset_bytes >> PAGE_SHIFT; + u32 offset_in_page = offset_bytes & ~PAGE_MASK; + struct page *target_page = as_page( + interface->phys[page_num]); + u32 *cpu_addr = kmap_atomic(target_page); + + if (read) { + kbase_sync_single_for_device(kbdev, + kbase_dma_addr(target_page) + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); + + *value = cpu_addr[offset_in_page >> 2]; + } else { + cpu_addr[offset_in_page >> 2] = *value; + + kbase_sync_single_for_device(kbdev, + kbase_dma_addr(target_page) + offset_in_page, + sizeof(u32), DMA_BIDIRECTIONAL); + } + + kunmap_atomic(cpu_addr); + return; + } + } + dev_warn(kbdev->dev, "Invalid GPU VA %x passed\n", gpu_addr); +} + +void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value) +{ + access_firmware_memory(kbdev, gpu_addr, value, true); +} + +void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 value) +{ + access_firmware_memory(kbdev, gpu_addr, &value, false); +} + +void kbase_csf_firmware_cs_input( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset, + const u32 value) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); + input_page_write(info->input, offset, value); +} + +u32 kbase_csf_firmware_cs_input_read( + const struct kbase_csf_cmd_stream_info *const info, + const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = input_page_read(info->input, offset); + + dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_cs_input_mask( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset, + const u32 value, const u32 mask) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", + offset, value, mask); + input_page_partial_write(info->input, offset, value, mask); +} + +u32 kbase_csf_firmware_cs_output( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = output_page_read(info->output, offset); + + dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_csg_input( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", + offset, value); + input_page_write(info->input, offset, value); +} + +u32 kbase_csf_firmware_csg_input_read( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = input_page_read(info->input, offset); + + dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_csg_input_mask( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value, const u32 mask) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", + offset, value, mask); + input_page_partial_write(info->input, offset, value, mask); +} + +u32 kbase_csf_firmware_csg_output( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = output_page_read(info->output, offset); + + dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_global_input( + const struct kbase_csf_global_iface *const iface, const u32 offset, + const u32 value) +{ + const struct kbase_device * const kbdev = iface->kbdev; + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); + input_page_write(iface->input, offset, value); +} + +void kbase_csf_firmware_global_input_mask( + const struct kbase_csf_global_iface *const iface, const u32 offset, + const u32 value, const u32 mask) +{ + const struct kbase_device * const kbdev = iface->kbdev; + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", + offset, value, mask); + input_page_partial_write(iface->input, offset, value, mask); +} + +u32 kbase_csf_firmware_global_input_read( + const struct kbase_csf_global_iface *const iface, const u32 offset) +{ + const struct kbase_device * const kbdev = iface->kbdev; + u32 const val = input_page_read(iface->input, offset); + + dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); + return val; +} + +u32 kbase_csf_firmware_global_output( + const struct kbase_csf_global_iface *const iface, const u32 offset) +{ + const struct kbase_device * const kbdev = iface->kbdev; + u32 const val = output_page_read(iface->output, offset); + + dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); + return val; +} + +/** + * handle_internal_firmware_fatal - Handler for CS internal firmware fault. + * + * @kbdev: Pointer to kbase device + * + * Report group fatal error to user space for all GPU command queue groups + * in the device, terminate them and reset GPU. + */ +static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) +{ + int as; + + for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { + unsigned long flags; + struct kbase_context *kctx; + struct kbase_fault fault; + + if (as == MCU_AS_NR) + continue; + + /* Only handle the fault for an active address space. Lock is + * taken here to atomically get reference to context in an + * active address space and retain its refcount. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); + + if (kctx) { + kbase_ctx_sched_retain_ctx_refcount(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + continue; + } + + fault = (struct kbase_fault) { + .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, + }; + + kbase_csf_ctx_handle_fault(kctx, &fault); + kbase_ctx_sched_release_ctx_lock(kctx); + } + + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); +} + +/** + * firmware_error_worker - Worker function for handling firmware internal error + * + * @data: Pointer to a work_struct embedded in kbase device. + * + * Handle the CS internal firmware error + */ +static void firmware_error_worker(struct work_struct *const data) +{ + struct kbase_device *const kbdev = + container_of(data, struct kbase_device, csf.fw_error_work); + + handle_internal_firmware_fatal(kbdev); +} + +static bool global_request_complete(struct kbase_device *const kbdev, + u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = + &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & + req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & + req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static int wait_for_global_request(struct kbase_device *const kbdev, + u32 const req_mask) +{ + const long wait_timeout = + kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long remaining; + int err = 0; + + remaining = wait_event_timeout(kbdev->csf.event_wait, + global_request_complete(kbdev, req_mask), + wait_timeout); + + if (!remaining) { + dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", + req_mask); + err = -ETIMEDOUT; + } + + return err; +} + +static void set_global_request( + const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); + glb_req ^= req_mask; + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, + req_mask); +} + +static void enable_endpoints_global( + const struct kbase_csf_global_iface *const global_iface, + u64 const shader_core_mask) +{ + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, + shader_core_mask & U32_MAX); + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, + shader_core_mask >> 32); + + set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); +} + +static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, + const struct kbase_csf_global_iface *const global_iface) +{ + u32 pwroff_reg; + + if (kbdev->csf.firmware_hctl_core_pwr) + pwroff_reg = + GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, + GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + else + pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; + + kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, + pwroff_reg); + set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); + + /* Save the programed reg value in its shadow field */ + kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; + + dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg); +} + +static void set_timeout_global( + const struct kbase_csf_global_iface *const global_iface, + u64 const timeout) +{ + kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, + timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); + + set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); +} + +static void global_init(struct kbase_device *const kbdev, u64 core_mask) +{ + u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | + GLB_ACK_IRQ_MASK_PING_MASK | + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | + GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | + GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | + GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | + GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK; + + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Set the coherency mode for protected mode execution */ + WARN_ON(kbdev->system_coherency == COHERENCY_ACE); + kbase_csf_firmware_global_input(global_iface, GLB_PROTM_COHERENCY, + kbdev->system_coherency); + + /* Update shader core allocation enable mask */ + enable_endpoints_global(global_iface, core_mask); + enable_shader_poweroff_timer(kbdev, global_iface); + + set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); + + /* Unmask the interrupts */ + kbase_csf_firmware_global_input(global_iface, + GLB_ACK_IRQ_MASK, ack_irq_mask); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +/** + * global_init_on_boot - Sends a global request to control various features. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface + * + * Currently only the request to enable endpoints and timeout for GPU progress + * timer is sent. + * + * Return: 0 on success, or negative on failure. + */ +static int global_init_on_boot(struct kbase_device *const kbdev) +{ + unsigned long flags; + u64 core_mask; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + core_mask = kbase_pm_ca_get_core_mask(kbdev); + kbdev->csf.firmware_hctl_core_pwr = + kbase_pm_no_mcu_core_pwroff(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + global_init(kbdev, core_mask); + + return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); +} + +void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, + u64 core_mask) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->csf.glb_init_request_pending = true; + kbdev->csf.firmware_hctl_core_pwr = + kbase_pm_no_mcu_core_pwroff(kbdev); + global_init(kbdev, core_mask); +} + +bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON(!kbdev->csf.glb_init_request_pending); + + if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) + kbdev->csf.glb_init_request_pending = false; + + return !kbdev->csf.glb_init_request_pending; +} + +void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, + bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + if (update_core_mask) + enable_endpoints_global(&kbdev->csf.global_iface, core_mask); + if (update_core_pwroff_timer) + enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | + GLB_REQ_CFG_PWROFF_TIMER_MASK); +} + +/** + * kbase_csf_firmware_reload_worker() - + * reload the fw image and re-enable the MCU + * @work: CSF Work item for reloading the firmware. + * + * This helper function will reload the firmware image and re-enable the MCU. + * It is supposed to be called after MCU(GPU) has been reset. + * Unlike the initial boot the firmware binary image is not parsed completely. + * Only the data sections, which were loaded in memory during the initial boot, + * are re-initialized either by zeroing them or copying their data from the + * firmware binary image. The memory allocation for the firmware pages and + * MMU programming is not needed for the reboot, presuming the firmware binary + * file on the filesystem would not change. + */ +static void kbase_csf_firmware_reload_worker(struct work_struct *work) +{ + struct kbase_device *kbdev = container_of(work, struct kbase_device, + csf.firmware_reload_work); + int err; + + dev_info(kbdev->dev, "reloading firmware"); + + /* Reload just the data sections from firmware binary image */ + err = reload_fw_data_sections(kbdev); + if (err) + return; + + kbase_csf_tl_reader_reset(&kbdev->timeline->csf_tl_reader); + + /* Reboot the firmware */ + kbase_csf_firmware_enable_mcu(kbdev); +} + +void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->csf.firmware_reloaded = false; + + if (kbdev->csf.firmware_reload_needed) { + kbdev->csf.firmware_reload_needed = false; + queue_work(system_wq, &kbdev->csf.firmware_reload_work); + } else { + kbase_csf_firmware_enable_mcu(kbdev); + } +} + +void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) +{ + u32 version; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (unlikely(!kbdev->csf.firmware_inited)) + return; + + /* Check firmware rebooted properly: we do not expect + * the version number to change with a running reboot. + */ + version = get_firmware_version(kbdev); + + if (version != kbdev->csf.global_iface.version) + dev_err(kbdev->dev, "Version check failed in firmware reboot."); + + KBASE_KTRACE_ADD(kbdev, FIRMWARE_REBOOT, NULL, 0u); + + /* Tell MCU state machine to transit to next state */ + kbdev->csf.firmware_reloaded = true; + kbase_pm_update_state(kbdev); +} + +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +{ +#define HYSTERESIS_VAL_UNIT_SHIFT (10) + /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ + u64 freq = arch_timer_get_cntfrq(); + u64 dur_val = dur_ms; + u32 cnt_val_u32, reg_val_u32; + bool src_system_timestamp = freq > 0; + + if (!src_system_timestamp) { + /* Get the cycle_counter source alternative */ + spin_lock(&kbdev->pm.clk_rtm.lock); + if (kbdev->pm.clk_rtm.clks[0]) + freq = kbdev->pm.clk_rtm.clks[0]->clock_val; + else + dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + + dev_info(kbdev->dev, "Can't get the timestamp frequency, " + "use cycle counter format with firmware idle hysteresis!"); + } + + /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ + dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; + dur_val = div_u64(dur_val, 1000); + + /* Interface limits the value field to S32_MAX */ + cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + + reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); + /* add the source flag */ + if (src_system_timestamp) + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + else + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); + + return reg_val_u32; +} + +u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) +{ + return kbdev->csf.gpu_idle_hysteresis_ms; +} + +u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) +{ + unsigned long flags; + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", + hysteresis_val); + + return hysteresis_val; +} + +static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +{ +#define PWROFF_VAL_UNIT_SHIFT (10) + /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ + u64 freq = arch_timer_get_cntfrq(); + u64 dur_val = dur_us; + u32 cnt_val_u32, reg_val_u32; + bool src_system_timestamp = freq > 0; + + if (!src_system_timestamp) { + /* Get the cycle_counter source alternative */ + spin_lock(&kbdev->pm.clk_rtm.lock); + if (kbdev->pm.clk_rtm.clks[0]) + freq = kbdev->pm.clk_rtm.clks[0]->clock_val; + else + dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + + dev_info(kbdev->dev, "Can't get the timestamp frequency, " + "use cycle counter with MCU Core Poweroff timer!"); + } + + /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ + dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; + dur_val = div_u64(dur_val, 1000000); + + /* Interface limits the value field to S32_MAX */ + cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + + reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); + /* add the source flag */ + if (src_system_timestamp) + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + else + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); + + return reg_val_u32; +} + +u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ + return kbdev->csf.mcu_core_pwroff_dur_us; +} + +u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) +{ + unsigned long flags; + const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->csf.mcu_core_pwroff_dur_us = dur; + kbdev->csf.mcu_core_pwroff_dur_count = pwroff; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); + + return pwroff; +} + + +int kbase_csf_firmware_early_init(struct kbase_device *kbdev) +{ + init_waitqueue_head(&kbdev->csf.event_wait); + kbdev->csf.interrupt_received = false; + kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS; + + INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); + INIT_LIST_HEAD(&kbdev->csf.firmware_config); + INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); + INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_WORK(&kbdev->csf.firmware_reload_work, + kbase_csf_firmware_reload_worker); + INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + + mutex_init(&kbdev->csf.reg_lock); + + return 0; +} + +int kbase_csf_firmware_init(struct kbase_device *kbdev) +{ + const struct firmware *firmware; + const u32 magic = FIRMWARE_HEADER_MAGIC; + u8 version_major, version_minor; + u32 version_hash; + u32 entry_end_offset; + u32 entry_offset; + int ret; + + lockdep_assert_held(&kbdev->fw_load_lock); + + if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) + return -EINVAL; + kbdev->as_free &= ~MCU_AS_BITMASK; + + ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, + BASE_MEM_GROUP_DEFAULT); + + if (ret != 0) { + /* Release the address space */ + kbdev->as_free |= MCU_AS_BITMASK; + return ret; + } + + kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( + kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS); + + kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; + kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( + kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); + + ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, + "Failed to setup the rb tree for managing shared interface segment\n"); + goto error; + } + + if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { + dev_err(kbdev->dev, + "Failed to load firmware image '%s'\n", + fw_name); + ret = -ENOENT; + goto error; + } + + if (firmware->size < FIRMWARE_HEADER_LENGTH) { + dev_err(kbdev->dev, "Firmware too small\n"); + ret = -EINVAL; + goto error; + } + + if (memcmp(firmware->data, &magic, sizeof(magic)) != 0) { + dev_err(kbdev->dev, "Incorrect firmware magic\n"); + ret = -EINVAL; + goto error; + } + + version_minor = firmware->data[4]; + version_major = firmware->data[5]; + + if (version_major != FIRMWARE_HEADER_VERSION) { + dev_err(kbdev->dev, + "Firmware header version %d.%d not understood\n", + version_major, version_minor); + ret = -EINVAL; + goto error; + } + + memcpy(&version_hash, &firmware->data[8], sizeof(version_hash)); + + dev_notice(kbdev->dev, "Loading Mali firmware 0x%x", version_hash); + + memcpy(&entry_end_offset, &firmware->data[0x10], + sizeof(entry_end_offset)); + + if (entry_end_offset > firmware->size) { + dev_err(kbdev->dev, "Firmware image is truncated\n"); + ret = -EINVAL; + goto error; + } + + entry_offset = FIRMWARE_HEADER_LENGTH; + while (entry_offset < entry_end_offset) { + u32 header; + unsigned int size; + + memcpy(&header, &firmware->data[entry_offset], sizeof(header)); + + size = entry_size(header); + + ret = load_firmware_entry(kbdev, firmware, entry_offset, + header); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to load firmware image\n"); + goto error; + } + entry_offset += size; + } + + if (!kbdev->csf.shared_interface) { + dev_err(kbdev->dev, "Shared interface region not found\n"); + ret = -EINVAL; + goto error; + } else { + ret = setup_shared_iface_static_region(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to insert a region for shared iface entry parsed from fw image\n"); + goto error; + } + } + + ret = kbase_csf_firmware_trace_buffers_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize trace buffers\n"); + goto error; + } + + /* Make sure L2 cache is powered up */ + kbase_pm_wait_for_l2_powered(kbdev); + + /* Load the MMU tables into the selected address space */ + load_mmu_tables(kbdev); + + boot_csf_firmware(kbdev); + + ret = parse_capabilities(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_doorbell_mapping_init(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_scheduler_init(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_setup_dummy_user_reg_page(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_timeout_init(kbdev); + if (ret != 0) + goto error; + + ret = global_init_on_boot(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_firmware_cfg_init(kbdev); + if (ret != 0) + goto error; + + + /* Firmware loaded successfully */ + release_firmware(firmware); + KBASE_KTRACE_ADD(kbdev, FIRMWARE_BOOT, NULL, + (((u64)version_hash) << 32) | + (((u64)version_major) << 8) | version_minor); + return 0; + +error: + kbase_csf_firmware_term(kbdev); + release_firmware(firmware); + return ret; +} + +void kbase_csf_firmware_term(struct kbase_device *kbdev) +{ + unsigned long flags; + int ret = 0; + + cancel_work_sync(&kbdev->csf.fw_error_work); + + ret = kbase_reset_gpu_wait(kbdev); + + WARN(ret, "failed to wait for GPU reset"); + + kbase_csf_firmware_cfg_term(kbdev); + + kbase_csf_timeout_term(kbdev); + + kbase_csf_free_dummy_user_reg_page(kbdev); + + kbase_csf_scheduler_term(kbdev); + + kbase_csf_doorbell_mapping_term(kbdev); + + /* Explicitly trigger the disabling of MCU through the state machine and + * wait for its completion. It may not have been disabled yet due to the + * power policy. + */ + kbdev->pm.backend.mcu_desired = false; + kbase_pm_wait_for_desired_state(kbdev); + + free_global_iface(kbdev); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->csf.firmware_inited = false; + if (WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_OFF)) { + kbdev->pm.backend.mcu_state = KBASE_MCU_OFF; + stop_csf_firmware(kbdev); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + unload_mmu_tables(kbdev); + + kbase_csf_firmware_trace_buffers_term(kbdev); + + while (!list_empty(&kbdev->csf.firmware_interfaces)) { + struct kbase_csf_firmware_interface *interface; + + interface = + list_first_entry(&kbdev->csf.firmware_interfaces, + struct kbase_csf_firmware_interface, + node); + list_del(&interface->node); + + vunmap(interface->kernel_map); + if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) { + kbase_csf_protected_memory_free(kbdev, interface->pma, + interface->num_pages); + } else { + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + interface->num_pages, interface->phys, + true, false); + } + + kfree(interface->phys); + kfree(interface); + } + + while (!list_empty(&kbdev->csf.firmware_timeline_metadata)) { + struct firmware_timeline_metadata *metadata; + + metadata = list_first_entry( + &kbdev->csf.firmware_timeline_metadata, + struct firmware_timeline_metadata, + node); + list_del(&metadata->node); + + kfree(metadata); + } + +#ifndef MALI_KBASE_BUILD + mali_kutf_fw_utf_entry_cleanup(kbdev); +#endif + + /* This will also free up the region allocated for the shared interface + * entry parsed from the firmware image. + */ + kbase_mcu_shared_interface_region_tracker_term(kbdev); + + mutex_destroy(&kbdev->csf.reg_lock); + + kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); + + /* Release the address space */ + kbdev->as_free |= MCU_AS_BITMASK; +} + +void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + const u32 glb_req = + kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + /* The scheduler is assumed to only call the enable when its internal + * state indicates that the idle timer has previously been disabled. So + * on entry the expected field values are: + * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 + * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 + */ + + if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) + dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); + + kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, + kbdev->csf.gpu_idle_dur_count); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, + GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); + + dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", + kbdev->csf.gpu_idle_dur_count); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, + GLB_REQ_REQ_IDLE_DISABLE, + GLB_REQ_IDLE_DISABLE_MASK); + dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +void kbase_csf_firmware_ping(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_PING_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +{ + kbase_csf_firmware_ping(kbdev); + return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); +} + +int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, + u64 const timeout) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int err; + + /* The 'reg_lock' is also taken and is held till the update is not + * complete, to ensure the update of timeout value by multiple Users + * gets serialized. + */ + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_timeout_global(global_iface, timeout); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); + mutex_unlock(&kbdev->csf.reg_lock); + + return err; +} + +void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); + dev_dbg(kbdev->dev, "Sending request to enter protected mode"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); + + if (!err) { + unsigned long irq_flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->protected_mode = true; + kbase_ipa_protection_mode_switch_event(kbdev); + kbase_ipa_control_protm_entered(kbdev); + + kbase_csf_scheduler_spin_lock(kbdev, &irq_flags); + kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface); + kbase_csf_scheduler_spin_unlock(kbdev, irq_flags); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +} + +void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_HALT_MASK); + dev_dbg(kbdev->dev, "Sending request to HALT MCU"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err = 0; + + /* Ensure GPU is powered-up until we complete config update.*/ + kbase_csf_scheduler_pm_active(kbdev); + + /* The 'reg_lock' is also taken and is held till the update is + * complete, to ensure the config update gets serialized. + */ + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); + dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, + GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); + mutex_unlock(&kbdev->csf.reg_lock); + + kbase_csf_scheduler_pm_idle(kbdev); + return err; +} + +/** + * copy_grp_and_stm - Copy CS and/or group data + * + * @iface: Global CSF interface provided by the firmware. + * @group_data: Pointer where to store all the group data + * (sequentially). + * @max_group_num: The maximum number of groups to be read. Can be 0, in + * which case group_data is unused. + * @stream_data: Pointer where to store all the CS data + * (sequentially). + * @max_total_stream_num: The maximum number of CSs to be read. + * Can be 0, in which case stream_data is unused. + * + * Return: Total number of CSs, summed across all groups. + */ +static u32 copy_grp_and_stm( + const struct kbase_csf_global_iface * const iface, + struct basep_cs_group_control * const group_data, + u32 max_group_num, + struct basep_cs_stream_control * const stream_data, + u32 max_total_stream_num) +{ + u32 i, total_stream_num = 0; + + if (WARN_ON((max_group_num > 0) && !group_data)) + max_group_num = 0; + + if (WARN_ON((max_total_stream_num > 0) && !stream_data)) + max_total_stream_num = 0; + + for (i = 0; i < iface->group_num; i++) { + u32 j; + + if (i < max_group_num) { + group_data[i].features = iface->groups[i].features; + group_data[i].stream_num = iface->groups[i].stream_num; + group_data[i].suspend_size = + iface->groups[i].suspend_size; + } + for (j = 0; j < iface->groups[i].stream_num; j++) { + if (total_stream_num < max_total_stream_num) + stream_data[total_stream_num].features = + iface->groups[i].streams[j].features; + total_stream_num++; + } + } + + return total_stream_num; +} + +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, + struct basep_cs_group_control *const group_data, + u32 const max_group_num, + struct basep_cs_stream_control *const stream_data, + u32 const max_total_stream_num, u32 *const glb_version, + u32 *const features, u32 *const group_num, u32 *const prfcnt_size, + u32 *instr_features) +{ + const struct kbase_csf_global_iface * const iface = + &kbdev->csf.global_iface; + + if (WARN_ON(!glb_version) || WARN_ON(!features) || + WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || + WARN_ON(!instr_features)) + return 0; + + *glb_version = iface->version; + *features = iface->features; + *group_num = iface->group_num; + *prfcnt_size = iface->prfcnt_size; + *instr_features = iface->instr_features; + + return copy_grp_and_stm(iface, group_data, max_group_num, + stream_data, max_total_stream_num); +} + +const char *kbase_csf_firmware_get_timeline_metadata( + struct kbase_device *kbdev, const char *name, size_t *size) +{ + struct firmware_timeline_metadata *metadata; + + list_for_each_entry( + metadata, &kbdev->csf.firmware_timeline_metadata, node) { + if (!strcmp(metadata->name, name)) { + *size = metadata->size; + return metadata->data; + } + } + + *size = 0; + return NULL; +} + +int kbase_csf_firmware_mcu_shared_mapping_init( + struct kbase_device *kbdev, + unsigned int num_pages, + unsigned long cpu_map_properties, + unsigned long gpu_map_properties, + struct kbase_csf_mapping *csf_mapping) +{ + struct tagged_addr *phys; + struct kbase_va_region *va_reg; + struct page **page_list; + void *cpu_addr; + int i, ret = 0; + pgprot_t cpu_map_prot = PAGE_KERNEL; + unsigned long gpu_map_prot; + + if (cpu_map_properties & PROT_READ) + cpu_map_prot = PAGE_KERNEL_RO; + + if (kbdev->system_coherency == COHERENCY_ACE) { + gpu_map_prot = + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + } else { + gpu_map_prot = + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + cpu_map_prot = pgprot_writecombine(cpu_map_prot); + }; + + phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); + if (!phys) + goto out; + + page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + goto page_list_alloc_error; + + ret = kbase_mem_pool_alloc_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + num_pages, phys, false); + if (ret <= 0) + goto phys_mem_pool_alloc_error; + + for (i = 0; i < num_pages; i++) + page_list[i] = as_page(phys[i]); + + cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); + if (!cpu_addr) + goto vmap_error; + + va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, + num_pages, KBASE_REG_ZONE_MCU_SHARED); + if (!va_reg) + goto va_region_alloc_error; + + mutex_lock(&kbdev->csf.reg_lock); + ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); + va_reg->flags &= ~KBASE_REG_FREE; + if (ret) + goto va_region_add_error; + mutex_unlock(&kbdev->csf.reg_lock); + + gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); + gpu_map_properties |= gpu_map_prot; + + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, + va_reg->start_pfn, &phys[0], num_pages, + gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); + if (ret) + goto mmu_insert_pages_error; + + kfree(page_list); + csf_mapping->phys = phys; + csf_mapping->cpu_addr = cpu_addr; + csf_mapping->va_reg = va_reg; + csf_mapping->num_pages = num_pages; + + return 0; + +mmu_insert_pages_error: + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(va_reg); +va_region_add_error: + kbase_free_alloced_region(va_reg); + mutex_unlock(&kbdev->csf.reg_lock); +va_region_alloc_error: + vunmap(cpu_addr); +vmap_error: + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + num_pages, phys, false, false); + +phys_mem_pool_alloc_error: + kfree(page_list); +page_list_alloc_error: + kfree(phys); +out: + /* Zero-initialize the mapping to make sure that the termination + * function doesn't try to unmap or free random addresses. + */ + csf_mapping->phys = NULL; + csf_mapping->cpu_addr = NULL; + csf_mapping->va_reg = NULL; + csf_mapping->num_pages = 0; + + return -ENOMEM; +} + +void kbase_csf_firmware_mcu_shared_mapping_term( + struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) +{ + if (csf_mapping->va_reg) { + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(csf_mapping->va_reg); + kbase_free_alloced_region(csf_mapping->va_reg); + mutex_unlock(&kbdev->csf.reg_lock); + } + + if (csf_mapping->phys) { + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + csf_mapping->num_pages, csf_mapping->phys, false, + false); + } + + vunmap(csf_mapping->cpu_addr); + kfree(csf_mapping->phys); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h new file mode 100644 index 0000000..60d7065 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware.h @@ -0,0 +1,811 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_H_ +#define _KBASE_CSF_FIRMWARE_H_ + +#include "device/mali_kbase_device.h" +#include + +/* + * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in: + * Commit a3266bd49c721e2e0a71f352d83713fbd60caadb + * Author: Luis R. Rodriguez + * Date: Fri Aug 17 15:46:29 2018 -0700 + * + * mm: provide a fallback for PAGE_KERNEL_RO for architectures + * + * Some architectures do not define certain PAGE_KERNEL_* flags, this is + * either because: + * + * a) The way to implement some of these flags is *not yet ported*, or + * b) The architecture *has no way* to describe them + * + * [snip] + * + * This can be removed once support of 32bit ARM kernels predating 4.19 is no + * longer required. + */ +#ifndef PAGE_KERNEL_RO +#define PAGE_KERNEL_RO PAGE_KERNEL +#endif + +/* Address space number to claim for the firmware. */ +#define MCU_AS_NR 0 +#define MCU_AS_BITMASK (1 << MCU_AS_NR) + +/* Number of available Doorbells */ +#define CSF_NUM_DOORBELL ((u8)24) + +/* Offset to the first HW doorbell page */ +#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000) + +/* Size of HW Doorbell page, used to calculate the offset to subsequent pages */ +#define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000) + +/* Doorbell 0 is used by the driver. */ +#define CSF_KERNEL_DOORBELL_NR ((u32)0) + +/* Offset of name inside a trace buffer entry in the firmware image */ +#define TRACE_BUFFER_ENTRY_NAME_OFFSET (0x1C) + +/* All implementations of the host interface with major version 0 must comply + * with these restrictions: + */ +/* GLB_GROUP_NUM: At least 3 CSGs, but no more than 31 */ +#define MIN_SUPPORTED_CSGS 3 +#define MAX_SUPPORTED_CSGS 31 +/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */ +#define MIN_SUPPORTED_STREAMS_PER_GROUP 8 +/* Maximum CSs per csg. */ +#define MAX_SUPPORTED_STREAMS_PER_GROUP 32 + +/* Waiting timeout for status change acknowledgment, in milliseconds */ +#define CSF_FIRMWARE_TIMEOUT_MS (3000) /* Relaxed to 3000ms from 800ms due to Android */ + +struct kbase_device; + + +/** + * struct kbase_csf_mapping - Memory mapping for CSF memory. + * @phys: Physical memory allocation used by the mapping. + * @cpu_addr: Starting CPU address for the mapping. + * @va_reg: GPU virtual address region for the mapping. + * @num_pages: Size of the mapping, in memory pages. + */ +struct kbase_csf_mapping { + struct tagged_addr *phys; + void *cpu_addr; + struct kbase_va_region *va_reg; + unsigned int num_pages; +}; + +/** + * struct kbase_csf_trace_buffers - List and state of firmware trace buffers. + * @list: List of trace buffers descriptors. + * @mcu_rw: Metadata for the MCU shared memory mapping used for + * GPU-readable,writable/CPU-writable variables. + * @mcu_write: Metadata for the MCU shared memory mapping used for + * GPU-writable/CPU-readable variables. + */ +struct kbase_csf_trace_buffers { + struct list_head list; + struct kbase_csf_mapping mcu_rw; + struct kbase_csf_mapping mcu_write; +}; + +/** + * struct kbase_csf_cmd_stream_info - CSI provided by the firmware. + * + * @kbdev: Address of the instance of a GPU platform device that implements + * this interface. + * @features: Bit field of CS features (e.g. which types of jobs + * are supported). Bits 7:0 specify the number of work registers(-1). + * Bits 11:8 specify the number of scoreboard entries(-1). + * @input: Address of CSI input page. + * @output: Address of CSI output page. + */ +struct kbase_csf_cmd_stream_info { + struct kbase_device *kbdev; + u32 features; + void *input; + void *output; +}; + +/** + * kbase_csf_firmware_cs_input() - Set a word in a CS's input page + * + * @info: CSI provided by the firmware. + * @offset: Offset of the word to be written, in bytes. + * @value: Value to be written. + */ +void kbase_csf_firmware_cs_input( + const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value); + +/** + * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page + * + * Return: Value of the word read from the CS's input page. + * + * @info: CSI provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + */ +u32 kbase_csf_firmware_cs_input_read( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset); + +/** + * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page + * + * @info: CSI provided by the firmware. + * @offset: Offset of the word to be modified, in bytes. + * @value: Value to be written. + * @mask: Bitmask with the bits to be modified set. + */ +void kbase_csf_firmware_cs_input_mask( + const struct kbase_csf_cmd_stream_info *info, u32 offset, + u32 value, u32 mask); + +/** + * kbase_csf_firmware_cs_output() - Read a word in a CS's output page + * + * Return: Value of the word read from the CS's output page. + * + * @info: CSI provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + */ +u32 kbase_csf_firmware_cs_output( + const struct kbase_csf_cmd_stream_info *info, u32 offset); +/** + * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the + * firmware. + * + * @kbdev: Address of the instance of a GPU platform device that implements + * this interface. + * @features: Bit mask of features. Reserved bits should be 0, and should + * be ignored. + * @input: Address of global interface input page. + * @output: Address of global interface output page. + * @suspend_size: Size in bytes for normal suspend buffer for the CSG + * @protm_suspend_size: Size in bytes for protected mode suspend buffer + * for the CSG. + * @stream_num: Number of CSs in the CSG. + * @stream_stride: Stride in bytes in JASID0 virtual address between + * CS capability structures. + * @streams: Address of an array of CS capability structures. + */ +struct kbase_csf_cmd_stream_group_info { + struct kbase_device *kbdev; + u32 features; + void *input; + void *output; + u32 suspend_size; + u32 protm_suspend_size; + u32 stream_num; + u32 stream_stride; + struct kbase_csf_cmd_stream_info *streams; +}; + +/** + * kbase_csf_firmware_csg_input() - Set a word in a CSG's input page + * + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be written, in bytes. + * @value: Value to be written. + */ +void kbase_csf_firmware_csg_input( + const struct kbase_csf_cmd_stream_group_info *info, u32 offset, + u32 value); + +/** + * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page + * + * Return: Value of the word read from the CSG's input page. + * + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + */ +u32 kbase_csf_firmware_csg_input_read( + const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + +/** + * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's + * input page + * + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be modified, in bytes. + * @value: Value to be written. + * @mask: Bitmask with the bits to be modified set. + */ +void kbase_csf_firmware_csg_input_mask( + const struct kbase_csf_cmd_stream_group_info *info, u32 offset, + u32 value, u32 mask); + +/** + * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page + * + * Return: Value of the word read from the CSG's output page. + * + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + */ +u32 kbase_csf_firmware_csg_output( + const struct kbase_csf_cmd_stream_group_info *info, u32 offset); + +/** + * struct kbase_csf_global_iface - Global CSF interface + * provided by the firmware. + * + * @kbdev: Address of the instance of a GPU platform device that implements + * this interface. + * @version: Bits 31:16 hold the major version number and 15:0 hold the minor + * version number. A higher minor version is backwards-compatible + * with a lower minor version for the same major version. + * @features: Bit mask of features (e.g. whether certain types of job can + * be suspended). Reserved bits should be 0, and should be ignored. + * @input: Address of global interface input page. + * @output: Address of global interface output page. + * @group_num: Number of CSGs supported. + * @group_stride: Stride in bytes in JASID0 virtual address between + * CSG capability structures. + * @prfcnt_size: Performance counters size. + * @instr_features: Instrumentation features. (csf >= 1.1.0) + * @groups: Address of an array of CSG capability structures. + */ +struct kbase_csf_global_iface { + struct kbase_device *kbdev; + u32 version; + u32 features; + void *input; + void *output; + u32 group_num; + u32 group_stride; + u32 prfcnt_size; + u32 instr_features; + struct kbase_csf_cmd_stream_group_info *groups; +}; + +/** + * kbase_csf_firmware_global_input() - Set a word in the global input page + * + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to be written, in bytes. + * @value: Value to be written. + */ +void kbase_csf_firmware_global_input( + const struct kbase_csf_global_iface *iface, u32 offset, u32 value); + +/** + * kbase_csf_firmware_global_input_mask() - Set part of a word in the global + * input page + * + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to be modified, in bytes. + * @value: Value to be written. + * @mask: Bitmask with the bits to be modified set. + */ +void kbase_csf_firmware_global_input_mask( + const struct kbase_csf_global_iface *iface, u32 offset, + u32 value, u32 mask); + +/** + * kbase_csf_firmware_global_input_read() - Read a word in a global input page + * + * Return: Value of the word read from the global input page. + * + * @info: CSG interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + */ +u32 kbase_csf_firmware_global_input_read( + const struct kbase_csf_global_iface *info, u32 offset); + +/** + * kbase_csf_firmware_global_output() - Read a word in the global output page + * + * Return: Value of the word read from the global output page. + * + * @iface: CSF interface provided by the firmware. + * @offset: Offset of the word to be read, in bytes. + */ +u32 kbase_csf_firmware_global_output( + const struct kbase_csf_global_iface *iface, u32 offset); + +/* Calculate the offset to the Hw doorbell page corresponding to the + * doorbell number. + */ +static u32 csf_doorbell_offset(int doorbell_nr) +{ + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + return CSF_HW_DOORBELL_PAGE_OFFSET + + (doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE); +} + +static inline void kbase_csf_ring_doorbell(struct kbase_device *kbdev, + int doorbell_nr) +{ + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + kbase_reg_write(kbdev, csf_doorbell_offset(doorbell_nr), (u32)1); +} + +/** + * kbase_csf_read_firmware_memory - Read a value in a GPU address + * + * This function read a value in a GPU address that belongs to + * a private firmware memory region. The function assumes that the location + * is not permanently mapped on the CPU address space, therefore it maps it + * and then unmaps it to access it independently. + * + * @kbdev: Device pointer + * @gpu_addr: GPU address to read + * @value: output pointer to which the read value will be written. + */ +void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value); + +/** + * kbase_csf_update_firmware_memory - Write a value in a GPU address + * + * This function writes a given value in a GPU address that belongs to + * a private firmware memory region. The function assumes that the destination + * is not permanently mapped on the CPU address space, therefore it maps it + * and then unmaps it to access it independently. + * + * @kbdev: Device pointer + * @gpu_addr: GPU address to write + * @value: Value to write + */ +void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 value); + +/** + * kbase_csf_firmware_early_init() - Early initializatin for the firmware. + * @kbdev: Kbase device + * + * Initialize resources related to the firmware. Must be called at kbase probe. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_early_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_init() - Load the firmware for the CSF MCU + * @kbdev: Kbase device + * + * Request the firmware from user space and load it into memory. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_term() - Unload the firmware + * @kbdev: Kbase device + * + * Frees the memory allocated by kbase_csf_firmware_init() + */ +void kbase_csf_firmware_term(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_ping - Send the ping request to firmware. + * + * The function sends the ping request to firmware. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_ping(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_ping_wait - Send the ping request to firmware and waits. + * + * The function sends the ping request to firmware and waits to confirm it is + * alive. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_ping_wait(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @timeout: The maximum number of GPU cycles that is allowed to elapse + * without forward progress before the driver terminates a GPU + * command queue group. + * + * Configures the progress timeout value used by the firmware to decide + * when to report that a task is not making progress on an endpoint. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout); + +/** + * kbase_csf_enter_protected_mode - Send the Global request to firmware to + * enter protected mode and wait for its + * completion. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_enter_protected_mode(struct kbase_device *kbdev); + +static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev) +{ + return (kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)) == + MCU_STATUS_HALTED); +} + +/** + * kbase_csf_firmware_trigger_mcu_halt - Send the Global request to firmware to + * halt its operation and bring itself + * into a known internal state for warm + * boot later. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_enable_mcu - Send the command to enable MCU + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev) +{ + /* Trigger the boot of MCU firmware, Use the AUTO mode as + * otherwise on fast reset, to exit protected mode, MCU will + * not reboot by itself to enter normal mode. + */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_AUTO); +} + +/** + * kbase_csf_firmware_disable_mcu - Send the command to disable MCU + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev) +{ + kbase_reg_write(kbdev, GPU_CONTROL_REG(MCU_CONTROL), MCU_CNTRL_DISABLE); +} + +/** + * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled + * status. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev); + +/** + * kbase_trigger_firmware_reload - Trigger the reboot of MCU firmware, for the + * cold boot case firmware image would be + * reloaded from filesystem into memory. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_reload_completed - The reboot of MCU firmware has + * completed. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_global_reinit - Send the Global configuration requests + * after the reboot of MCU firmware. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @core_mask: Mask of the enabled shader cores. + */ +void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, + u64 core_mask); + +/** + * kbase_csf_firmware_global_reinit_complete - Check the Global configuration + * requests, sent after the reboot of MCU firmware, have + * completed or not. + * + * Return: true if the Global configuration requests completed otherwise false. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_update_core_attr - Send the Global configuration request + * to update the requested core attribute + * changes. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @update_core_pwroff_timer: If true, signal the firmware needs to update + * the MCU power-off timer value. + * @update_core_mask: If true, need to do the core_mask update with + * the supplied core_mask value. + * @core_mask: New core mask value if update_core_mask is true, + * otherwise unused. + */ +void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, + bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask); + +/** + * kbase_csf_firmware_core_attr_updated - Check the Global configuration + * request has completed or not, that was sent to update + * the core attributes. + * + * Return: true if the Global configuration request to update the core + * attributes has completed, otherwise false. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev); + +/** + * Request the global control block of CSF interface capabilities + * + * Return: Total number of CSs, summed across all groups. + * + * @kbdev: Kbase device. + * @group_data: Pointer where to store all the group data + * (sequentially). + * @max_group_num: The maximum number of groups to be read. + * Can be 0, in which case group_data is unused. + * @stream_data: Pointer where to store all the CS data + * (sequentially). + * @max_total_stream_num: The maximum number of CSs to be read. + * Can be 0, in which case stream_data is unused. + * @glb_version: Where to store the global interface version. + * @features: Where to store a bit mask of features (e.g. + * whether certain types of job can be suspended). + * @group_num: Where to store the number of CSGs + * supported. + * @prfcnt_size: Where to store the size of CSF performance counters, + * in bytes. Bits 31:16 hold the size of firmware + * performance counter data and 15:0 hold the size of + * hardware performance counter data. + * @instr_features: Instrumentation features. Bits 7:4 hold the max size + * of events. Bits 3:0 hold the offset update rate. + * (csf >= 1,1,0) + */ +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, struct basep_cs_group_control *group_data, + u32 max_group_num, struct basep_cs_stream_control *stream_data, + u32 max_total_stream_num, u32 *glb_version, u32 *features, + u32 *group_num, u32 *prfcnt_size, u32 *instr_features); + +/** + * Get CSF firmware header timeline metadata content + * + * Return: The firmware timeline metadata content which match @p name. + * + * @kbdev: Kbase device. + * @name: Name of the metadata which metadata content to be returned. + * @size: Metadata size if specified metadata found. + */ +const char *kbase_csf_firmware_get_timeline_metadata(struct kbase_device *kbdev, + const char *name, size_t *size); + +/** + * kbase_csf_firmware_mcu_shared_mapping_init - + * Allocate and map MCU shared memory. + * + * This helper function allocates memory and maps it on both the CPU + * and the GPU address spaces. Most of the properties of the mapping + * are implicit and will be automatically determined by the function, + * e.g. whether memory is cacheable. + * + * The client is only expected to specify whether the mapping is readable + * or writable in the CPU and the GPU address spaces; any other flag + * will be ignored by the function. + * + * Return: 0 if success, or an error code on failure. + * + * @kbdev: Kbase device the memory mapping shall belong to. + * @num_pages: Number of memory pages to map. + * @cpu_map_properties: Either PROT_READ or PROT_WRITE. + * @gpu_map_properties: Either KBASE_REG_GPU_RD or KBASE_REG_GPU_WR. + * @csf_mapping: Object where to write metadata for the memory mapping. + */ +int kbase_csf_firmware_mcu_shared_mapping_init( + struct kbase_device *kbdev, + unsigned int num_pages, + unsigned long cpu_map_properties, + unsigned long gpu_map_properties, + struct kbase_csf_mapping *csf_mapping); + +/** + * kbase_csf_firmware_mcu_shared_mapping_term - Unmap and free MCU shared memory. + * + * @kbdev: Device pointer. + * @csf_mapping: Metadata of the memory mapping to terminate. + */ +void kbase_csf_firmware_mcu_shared_mapping_term( + struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping); + +#ifndef MALI_KBASE_BUILD +/** + * mali_kutf_process_fw_utf_entry() - Process the "Firmware UTF tests" section + * + * Read "Firmware UTF tests" section from the firmware image and create + * necessary kutf app+suite+tests. + * + * Return: 0 if successful, negative error code on failure. In both cases + * caller will have to invoke mali_kutf_fw_utf_entry_cleanup for the cleanup + * + * @kbdev: Kbase device structure + * @fw_data: Pointer to the start of firmware binary image loaded from disk + * @fw_size: Size (in bytes) of the firmware image + * @entry: Pointer to the start of the section + */ +int mali_kutf_process_fw_utf_entry(struct kbase_device *kbdev, + const void *fw_data, size_t fw_size, const u32 *entry); + +/** + * mali_kutf_fw_utf_entry_cleanup() - Remove the Fw UTF tests debugfs entries + * + * Destroy the kutf apps+suites+tests created on parsing "Firmware UTF tests" + * section from the firmware image. + * + * @kbdev: Kbase device structure + */ +void mali_kutf_fw_utf_entry_cleanup(struct kbase_device *kbdev); +#endif + +#ifdef CONFIG_MALI_DEBUG +extern bool fw_debug; +#endif + +static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs) +{ +#ifdef CONFIG_MALI_DEBUG + return (fw_debug ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(msecs)); +#else + return msecs_to_jiffies(msecs); +#endif +} + +/** + * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis + * monitoring operation + * + * Program the firmware interface with its configured hysteresis count value + * and enable the firmware to act on it. The Caller is + * assumed to hold the kbdev->csf.scheduler.interrupt_lock. + * + * @kbdev: Kbase device structure + */ +void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time + * hysteresis monitoring operation + * + * Program the firmware interface to disable the idle hysteresis timer. The + * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock. + * + * @kbdev: Kbase device structure + */ +void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_get_gpu_idle_hysteresis_time - Get the firmware GPU idle + * detection hysteresis duration + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: the internally recorded hysteresis (nominal) value. + */ +u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_set_gpu_idle_hysteresis_time - Set the firmware GPU idle + * detection hysteresis duration + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @dur: The duration value (unit: milliseconds) for the configuring + * hysteresis field for GPU idle detection + * + * The supplied value will be recorded internally without any change. But the + * actual field value will be subject to hysteresis source frequency scaling + * and maximum value limiting. The default source will be SYSTEM_TIMESTAMP + * counter. But in case the platform is not able to supply it, the GPU + * CYCLE_COUNTER source will be used as an alternative. Bit-31 on the + * returned value is the source configuration flag, and it is set to '1' + * when CYCLE_COUNTER alternative source is used. + * + * Return: the actual internally configured hysteresis field value. + */ +u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur); + +/** + * kbase_csf_firmware_get_mcu_core_pwroff_time - Get the MCU core power-off + * time value + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: the internally recorded MCU core power-off (nominal) value. The unit + * of the value is in micro-seconds. + */ +u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_set_mcu_core_pwroff_time - Set the MCU core power-off + * time value + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @dur: The duration value (unit: micro-seconds) for configuring MCU + * core power-off timer, when the shader cores' power + * transitions are delegated to the MCU (normal operational + * mode) + * + * The supplied value will be recorded internally without any change. But the + * actual field value will be subject to core power-off timer source frequency + * scaling and maximum value limiting. The default source will be + * SYSTEM_TIMESTAMP counter. But in case the platform is not able to supply it, + * the GPU CYCLE_COUNTER source will be used as an alternative. Bit-31 on the + * returned value is the source configuration flag, and it is set to '1' + * when CYCLE_COUNTER alternative source is used. + * + * The configured MCU core power-off timer will only have effect when the host + * driver has delegated the shader cores' power management to MCU. + * + * Return: the actual internal core power-off timer value in register defined + * format. + */ +u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur); + +/** + * kbase_csf_interface_version - Helper function to build the full firmware + * interface version in a format compatible with + * with GLB_VERSION register + * + * @major: major version of csf interface + * @minor: minor version of csf interface + * @patch: patch version of csf interface + * + * Return: firmware interface version + */ +static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch) +{ + return ((major << GLB_VERSION_MAJOR_SHIFT) | + (minor << GLB_VERSION_MINOR_SHIFT) | + (patch << GLB_VERSION_PATCH_SHIFT)); +} + +/** + * kbase_csf_trigger_firmware_config_update - Send a firmware config update. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Any changes done to firmware configuration entry or tracebuffer entry + * requires a GPU silent reset to reflect the configuration changes + * requested, but if Firmware.header.entry.bit(30) is set then we can request a + * FIRMWARE_CONFIG_UPDATE rather than doing a silent reset. + * + * Return: 0 if success, or negative error code on failure. + */ +int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev); +#endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c new file mode 100644 index 0000000..f00acb1 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "mali_kbase_csf_firmware_cfg.h" +#include + +#if CONFIG_SYSFS +#define CSF_FIRMWARE_CFG_SYSFS_DIR_NAME "firmware_config" + +/** + * struct firmware_config - Configuration item within the MCU firmware + * + * The firmware may expose configuration options. Each option has a name, the + * address where the option is controlled and the minimum and maximum values + * that the option can take. + * + * @node: List head linking all options to + * kbase_device:csf.firmware_config + * @kbdev: Pointer to the Kbase device + * @kobj: Kobject corresponding to the sysfs sub-directory, + * inside CSF_FIRMWARE_CFG_SYSFS_DIR_NAME directory, + * representing the configuration option @name. + * @kobj_inited: kobject initialization state + * @updatable: Indicates whether config items can be updated with + * FIRMWARE_CONFIG_UPDATE + * @name: NUL-terminated string naming the option + * @address: The address in the firmware image of the configuration option + * @min: The lowest legal value of the configuration option + * @max: The maximum legal value of the configuration option + * @cur_val: The current value of the configuration option + */ +struct firmware_config { + struct list_head node; + struct kbase_device *kbdev; + struct kobject kobj; + bool kobj_inited; + bool updatable; + char *name; + u32 address; + u32 min; + u32 max; + u32 cur_val; +}; + +#define FW_CFG_ATTR(_name, _mode) \ + struct attribute fw_cfg_attr_##_name = { \ + .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + } + +static FW_CFG_ATTR(min, S_IRUGO); +static FW_CFG_ATTR(max, S_IRUGO); +static FW_CFG_ATTR(cur, S_IRUGO | S_IWUSR); + +static void fw_cfg_kobj_release(struct kobject *kobj) +{ + struct firmware_config *config = + container_of(kobj, struct firmware_config, kobj); + + kfree(config); +} + +static ssize_t show_fw_cfg(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct firmware_config *config = + container_of(kobj, struct firmware_config, kobj); + struct kbase_device *kbdev = config->kbdev; + u32 val = 0; + + if (!kbdev) + return -ENODEV; + + if (attr == &fw_cfg_attr_max) + val = config->max; + else if (attr == &fw_cfg_attr_min) + val = config->min; + else if (attr == &fw_cfg_attr_cur) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + val = config->cur_val; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + dev_warn(kbdev->dev, + "Unexpected read from entry %s/%s", + config->name, attr->name); + return -EINVAL; + } + + return snprintf(buf, PAGE_SIZE, "%u\n", val); +} + +static ssize_t store_fw_cfg(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct firmware_config *config = + container_of(kobj, struct firmware_config, kobj); + struct kbase_device *kbdev = config->kbdev; + + if (!kbdev) + return -ENODEV; + + if (attr == &fw_cfg_attr_cur) { + unsigned long flags; + u32 val; + int ret = kstrtouint(buf, 0, &val); + + if (ret) { + dev_err(kbdev->dev, + "Couldn't process %s/%s write operation.\n" + "Use format \n", + config->name, attr->name); + return -EINVAL; + } + + if ((val < config->min) || (val > config->max)) + return -EINVAL; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (config->cur_val == val) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return count; + } + + /* If configuration update cannot be performed with + * FIRMWARE_CONFIG_UPDATE then we need to do a + * silent reset before we update the memory. + */ + if (!config->updatable) { + /* + * If there is already a GPU reset pending then inform + * the User to retry the write. + */ + if (kbase_reset_gpu_silent(kbdev)) { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, + flags); + return -EAGAIN; + } + } + + /* + * GPU reset request has been placed, now update the + * firmware image. GPU reset will take place only after + * hwaccess_lock is released. + * Update made to firmware image in memory would not + * be lost on GPU reset as configuration entries reside + * in the RONLY section of firmware image, which is not + * reloaded on firmware reboot due to GPU reset. + */ + kbase_csf_update_firmware_memory( + kbdev, config->address, val); + + config->cur_val = val; + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* If we can update the config without firmware reset then + * we need to just trigger FIRMWARE_CONFIG_UPDATE. + */ + if (config->updatable) { + ret = kbase_csf_trigger_firmware_config_update(kbdev); + if (ret) + return ret; + } + + /* Wait for the config update to take effect */ + if (!config->updatable) + kbase_reset_gpu_wait(kbdev); + } else { + dev_warn(kbdev->dev, + "Unexpected write to entry %s/%s", + config->name, attr->name); + return -EINVAL; + } + + return count; +} + +static const struct sysfs_ops fw_cfg_ops = { + .show = &show_fw_cfg, + .store = &store_fw_cfg, +}; + +static struct attribute *fw_cfg_attrs[] = { + &fw_cfg_attr_min, + &fw_cfg_attr_max, + &fw_cfg_attr_cur, + NULL, +}; + +static struct kobj_type fw_cfg_kobj_type = { + .release = &fw_cfg_kobj_release, + .sysfs_ops = &fw_cfg_ops, + .default_attrs = fw_cfg_attrs, +}; + +int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) +{ + struct firmware_config *config; + + kbdev->csf.fw_cfg_kobj = kobject_create_and_add( + CSF_FIRMWARE_CFG_SYSFS_DIR_NAME, &kbdev->dev->kobj); + if (!kbdev->csf.fw_cfg_kobj) { + kobject_put(kbdev->csf.fw_cfg_kobj); + dev_err(kbdev->dev, + "Creation of %s sysfs sub-directory failed\n", + CSF_FIRMWARE_CFG_SYSFS_DIR_NAME); + return -ENOMEM; + } + + list_for_each_entry(config, &kbdev->csf.firmware_config, node) { + int err; + + kbase_csf_read_firmware_memory(kbdev, config->address, + &config->cur_val); + + err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, + kbdev->csf.fw_cfg_kobj, "%s", config->name); + if (err) { + kobject_put(&config->kobj); + dev_err(kbdev->dev, + "Creation of %s sysfs sub-directory failed\n", + config->name); + return err; + } + + config->kobj_inited = true; + } + + return 0; +} + +void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) +{ + while (!list_empty(&kbdev->csf.firmware_config)) { + struct firmware_config *config; + + config = list_first_entry(&kbdev->csf.firmware_config, + struct firmware_config, node); + list_del(&config->node); + + if (config->kobj_inited) { + kobject_del(&config->kobj); + kobject_put(&config->kobj); + } else + kfree(config); + } + + kobject_del(kbdev->csf.fw_cfg_kobj); + kobject_put(kbdev->csf.fw_cfg_kobj); +} + +int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, + const struct firmware *fw, + const u32 *entry, + unsigned int size, bool updatable) +{ + const char *name = (char *)&entry[3]; + struct firmware_config *config; + const unsigned int name_len = size - CONFIGURATION_ENTRY_NAME_OFFSET; + + /* Allocate enough space for struct firmware_config and the + * configuration option name (with NULL termination) + */ + config = kzalloc(sizeof(*config) + name_len + 1, GFP_KERNEL); + + if (!config) + return -ENOMEM; + + config->kbdev = kbdev; + config->updatable = updatable; + config->name = (char *)(config+1); + config->address = entry[0]; + config->min = entry[1]; + config->max = entry[2]; + + memcpy(config->name, name, name_len); + config->name[name_len] = 0; + + list_add(&config->node, &kbdev->csf.firmware_config); + + dev_dbg(kbdev->dev, "Configuration option '%s' at 0x%x range %u-%u", + config->name, config->address, + config->min, config->max); + + return 0; +} +#else +int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) +{ + return 0; +} + +void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev) +{ + /* !CONFIG_SYSFS: Nothing to do here */ +} + +int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, + const struct firmware *fw, + const u32 *entry, unsigned int size) +{ + return 0; +} +#endif /* CONFIG_SYSFS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h new file mode 100644 index 0000000..080c154 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_cfg.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_FIRMWARE_CFG_H_ +#define _KBASE_CSF_FIRMWARE_CFG_H_ + +#include +#include "mali_kbase_csf_firmware.h" +#include + +#define CONFIGURATION_ENTRY_NAME_OFFSET (0xC) + +/** + * kbase_csf_firmware_cfg_init - Create the sysfs directory for configuration + * options present in firmware image. + * + * This function would create a sysfs directory and populate it with a + * sub-directory, that would contain a file per attribute, for every + * configuration option parsed from firmware image. + * + * @kbdev: Pointer to the Kbase device + * + * Return: The initialization error code. + */ +int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_cfg_term - Delete the sysfs directory that was created + * for firmware configuration options. + * + * @kbdev: Pointer to the Kbase device + * + */ +void kbase_csf_firmware_cfg_term(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_cfg_option_entry_parse() - Process a + * "configuration option" section. + * + * Read a "configuration option" section adding it to the + * kbase_device:csf.firmware_config list. + * + * Return: 0 if successful, negative error code on failure + * + * @kbdev: Kbase device structure + * @fw: Firmware image containing the section + * @entry: Pointer to the section + * @size: Size (in bytes) of the section + * @updatable: Indicates if entry can be updated with FIRMWARE_CONFIG_UPDATE + */ +int kbase_csf_firmware_cfg_option_entry_parse(struct kbase_device *kbdev, + const struct firmware *fw, + const u32 *entry, + unsigned int size, + bool updatable); +#endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c new file mode 100644 index 0000000..ae2ad33 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_firmware_no_mali.c @@ -0,0 +1,1389 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "mali_kbase_csf_firmware.h" +#include "mali_kbase_csf_trace_buffer.h" +#include "mali_kbase_csf_timeout.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_reset_gpu.h" +#include "mali_kbase_ctx_sched.h" +#include "device/mali_kbase_device.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "mali_kbase_csf_scheduler.h" +#include "mmu/mali_kbase_mmu.h" +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include +#include +#include +#include +#include +#include +#if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE) +#include +#endif +#include + +#ifdef CONFIG_MALI_DEBUG +/* Makes Driver wait indefinitely for an acknowledgment for the different + * requests it sends to firmware. Otherwise the timeouts interfere with the + * use of debugger for source-level debugging of firmware as Driver initiates + * a GPU reset when a request times out, which always happen when a debugger + * is connected. + */ +bool fw_debug; /* Default value of 0/false */ +module_param(fw_debug, bool, 0444); +MODULE_PARM_DESC(fw_debug, + "Enables effective use of a debugger for debugging firmware code."); +#endif + +#define DUMMY_FW_PAGE_SIZE SZ_4K + +/** + * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware CSs + * + * @cs_kernel_input: CS kernel input memory region + * @cs_kernel_output: CS kernel output memory region + */ +struct dummy_firmware_csi { + u8 cs_kernel_input[DUMMY_FW_PAGE_SIZE]; + u8 cs_kernel_output[DUMMY_FW_PAGE_SIZE]; +}; + +/** + * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware CSGs + * + * @csg_input: CSG kernel input memory region + * @csg_output: CSG kernel output memory region + * @csi: Dummy firmware CSIs + */ +struct dummy_firmware_csg { + u8 csg_input[DUMMY_FW_PAGE_SIZE]; + u8 csg_output[DUMMY_FW_PAGE_SIZE]; + struct dummy_firmware_csi csi[8]; +} dummy_firmware_csg; + +/** + * struct dummy_firmware_interface - Represents a dummy interface in the MCU firmware + * + * @global_input: Global input memory region + * @global_output: Global output memory region + * @csg: Dummy firmware CSGs + * @node: Interface objects are on the kbase_device:csf.firmware_interfaces + * list using this list_head to link them + */ +struct dummy_firmware_interface { + u8 global_input[DUMMY_FW_PAGE_SIZE]; + u8 global_output[DUMMY_FW_PAGE_SIZE]; + struct dummy_firmware_csg csg[8]; + struct list_head node; +} dummy_firmware_interface; + +#define CSF_GLB_REQ_CFG_MASK \ + (GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK | \ + GLB_REQ_CFG_PWROFF_TIMER_MASK) + +static inline u32 input_page_read(const u32 *const input, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return input[offset / sizeof(u32)]; +} + +static inline void input_page_write(u32 *const input, const u32 offset, + const u32 value) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = value; +} + +static inline void input_page_partial_write(u32 *const input, const u32 offset, + u32 value, u32 mask) +{ + WARN_ON(offset % sizeof(u32)); + + input[offset / sizeof(u32)] = + (input_page_read(input, offset) & ~mask) | (value & mask); +} + +static inline u32 output_page_read(const u32 *const output, const u32 offset) +{ + WARN_ON(offset % sizeof(u32)); + + return output[offset / sizeof(u32)]; +} + +static inline void output_page_write(u32 *const output, const u32 offset, + const u32 value) +{ + WARN_ON(offset % sizeof(u32)); + + output[offset / sizeof(u32)] = value; +} + +/** + * invent_memory_setup_entry() - Invent an "interface memory setup" section + * + * Invent an "interface memory setup" section similar to one from a firmware + * image. If successful the interface will be added to the + * kbase_device:csf.firmware_interfaces list. + * + * Return: 0 if successful, negative error code on failure + * + * @kbdev: Kbase device structure + */ +static int invent_memory_setup_entry(struct kbase_device *kbdev) +{ + struct dummy_firmware_interface *interface = NULL; + + /* Allocate enough memory for the struct dummy_firmware_interface. + */ + interface = kzalloc(sizeof(*interface), GFP_KERNEL); + if (!interface) + return -ENOMEM; + + kbdev->csf.shared_interface = interface; + list_add(&interface->node, &kbdev->csf.firmware_interfaces); + + /* NO_MALI: Don't insert any firmware pages */ + return 0; +} + +static void free_global_iface(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + + if (iface->groups) { + unsigned int gid; + + for (gid = 0; gid < iface->group_num; ++gid) + kfree(iface->groups[gid].streams); + + kfree(iface->groups); + iface->groups = NULL; + } +} + +static int invent_cmd_stream_group_info(struct kbase_device *kbdev, + struct kbase_csf_cmd_stream_group_info *ginfo, + struct dummy_firmware_csg *csg) +{ + unsigned int sid; + + ginfo->input = csg->csg_input; + ginfo->output = csg->csg_output; + + ginfo->kbdev = kbdev; + ginfo->features = 0; + ginfo->suspend_size = 64; + ginfo->protm_suspend_size = 64; + ginfo->stream_num = ARRAY_SIZE(csg->csi); + ginfo->stream_stride = 0; + + ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL); + if (ginfo->streams == NULL) { + return -ENOMEM; + } + + for (sid = 0; sid < ginfo->stream_num; ++sid) { + struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid]; + struct dummy_firmware_csi *csi = &csg->csi[sid]; + + stream->input = csi->cs_kernel_input; + stream->output = csi->cs_kernel_output; + + stream->kbdev = kbdev; + stream->features = + STREAM_FEATURES_WORK_REGISTERS_SET(0, 80) | + STREAM_FEATURES_SCOREBOARDS_SET(0, 8) | + STREAM_FEATURES_COMPUTE_SET(0, 1) | + STREAM_FEATURES_FRAGMENT_SET(0, 1) | + STREAM_FEATURES_TILER_SET(0, 1); + } + + return 0; +} + +static int invent_capabilities(struct kbase_device *kbdev) +{ + struct dummy_firmware_interface *interface = kbdev->csf.shared_interface; + struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + unsigned int gid; + + iface->input = interface->global_input; + iface->output = interface->global_output; + + iface->version = 1; + iface->kbdev = kbdev; + iface->features = 0; + iface->prfcnt_size = 64; + + if (iface->version >= kbase_csf_interface_version(1, 1, 0)) { + /* update rate=1, max event size = 1<<8 = 256 */ + iface->instr_features = 0x81; + } else { + iface->instr_features = 0; + } + + iface->group_num = ARRAY_SIZE(interface->csg); + iface->group_stride = 0; + + iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL); + if (iface->groups == NULL) { + return -ENOMEM; + } + + for (gid = 0; gid < iface->group_num; ++gid) { + int err; + + err = invent_cmd_stream_group_info(kbdev, &iface->groups[gid], + &interface->csg[gid]); + if (err < 0) { + free_global_iface(kbdev); + return err; + } + } + + return 0; +} + +void kbase_csf_read_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 *value) +{ + /* NO_MALI: Nothing to do here */ +} + + +void kbase_csf_update_firmware_memory(struct kbase_device *kbdev, + u32 gpu_addr, u32 value) +{ + /* NO_MALI: Nothing to do here */ +} + +void kbase_csf_firmware_cs_input( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset, + const u32 value) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x\n", offset, value); + input_page_write(info->input, offset, value); + + if (offset == CS_REQ) { + /* NO_MALI: Immediately acknowledge requests */ + output_page_write(info->output, CS_ACK, value); + } +} + +u32 kbase_csf_firmware_cs_input_read( + const struct kbase_csf_cmd_stream_info *const info, + const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = input_page_read(info->input, offset); + + dev_dbg(kbdev->dev, "cs input r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_cs_input_mask( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset, + const u32 value, const u32 mask) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "cs input w: reg %08x val %08x mask %08x\n", + offset, value, mask); + + /* NO_MALI: Go through kbase_csf_firmware_cs_input to capture writes */ + kbase_csf_firmware_cs_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); +} + +u32 kbase_csf_firmware_cs_output( + const struct kbase_csf_cmd_stream_info *const info, const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = output_page_read(info->output, offset); + + dev_dbg(kbdev->dev, "cs output r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_csg_input( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x\n", + offset, value); + input_page_write(info->input, offset, value); + + if (offset == CSG_REQ) { + /* NO_MALI: Immediately acknowledge requests */ + output_page_write(info->output, CSG_ACK, value); + } +} + +u32 kbase_csf_firmware_csg_input_read( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = input_page_read(info->input, offset); + + dev_dbg(kbdev->dev, "csg input r: reg %08x val %08x\n", offset, val); + return val; +} + +void kbase_csf_firmware_csg_input_mask( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset, const u32 value, const u32 mask) +{ + const struct kbase_device * const kbdev = info->kbdev; + + dev_dbg(kbdev->dev, "csg input w: reg %08x val %08x mask %08x\n", + offset, value, mask); + + /* NO_MALI: Go through kbase_csf_firmware_csg_input to capture writes */ + kbase_csf_firmware_csg_input(info, offset, (input_page_read(info->input, offset) & ~mask) | (value & mask)); +} + +u32 kbase_csf_firmware_csg_output( + const struct kbase_csf_cmd_stream_group_info *const info, + const u32 offset) +{ + const struct kbase_device * const kbdev = info->kbdev; + u32 const val = output_page_read(info->output, offset); + + dev_dbg(kbdev->dev, "csg output r: reg %08x val %08x\n", offset, val); + return val; +} + +static void +csf_firmware_prfcnt_process(const struct kbase_csf_global_iface *const iface, + const u32 glb_req) +{ + struct kbase_device *kbdev = iface->kbdev; + u32 glb_ack = output_page_read(iface->output, GLB_ACK); + /* If the value of GLB_REQ.PRFCNT_SAMPLE is different from the value of + * GLB_ACK.PRFCNT_SAMPLE, the CSF will sample the performance counters. + */ + if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_SAMPLE_MASK) { + /* NO_MALI only uses the first buffer in the ring buffer. */ + input_page_write(iface->input, GLB_PRFCNT_EXTRACT, 0); + output_page_write(iface->output, GLB_PRFCNT_INSERT, 1); + kbase_reg_write(kbdev, GPU_COMMAND, GPU_COMMAND_PRFCNT_SAMPLE); + } + + /* Propagate enable masks to model if request to enable. */ + if (glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) { + u32 tiler_en, l2_en, sc_en; + + tiler_en = input_page_read(iface->input, GLB_PRFCNT_TILER_EN); + l2_en = input_page_read(iface->input, GLB_PRFCNT_MMU_L2_EN); + sc_en = input_page_read(iface->input, GLB_PRFCNT_SHADER_EN); + + /* NO_MALI platform enabled all CSHW counters by default. */ + kbase_reg_write(kbdev, PRFCNT_TILER_EN, tiler_en); + kbase_reg_write(kbdev, PRFCNT_MMU_L2_EN, l2_en); + kbase_reg_write(kbdev, PRFCNT_SHADER_EN, sc_en); + } +} + +void kbase_csf_firmware_global_input( + const struct kbase_csf_global_iface *const iface, const u32 offset, + const u32 value) +{ + const struct kbase_device * const kbdev = iface->kbdev; + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x\n", offset, value); + input_page_write(iface->input, offset, value); + + if (offset == GLB_REQ) { + csf_firmware_prfcnt_process(iface, value); + /* NO_MALI: Immediately acknowledge requests */ + output_page_write(iface->output, GLB_ACK, value); + } +} + +void kbase_csf_firmware_global_input_mask( + const struct kbase_csf_global_iface *const iface, const u32 offset, + const u32 value, const u32 mask) +{ + const struct kbase_device * const kbdev = iface->kbdev; + + dev_dbg(kbdev->dev, "glob input w: reg %08x val %08x mask %08x\n", + offset, value, mask); + + /* NO_MALI: Go through kbase_csf_firmware_global_input to capture writes */ + kbase_csf_firmware_global_input(iface, offset, (input_page_read(iface->input, offset) & ~mask) | (value & mask)); +} + +u32 kbase_csf_firmware_global_input_read( + const struct kbase_csf_global_iface *const iface, const u32 offset) +{ + const struct kbase_device * const kbdev = iface->kbdev; + u32 const val = input_page_read(iface->input, offset); + + dev_dbg(kbdev->dev, "glob input r: reg %08x val %08x\n", offset, val); + return val; +} + +u32 kbase_csf_firmware_global_output( + const struct kbase_csf_global_iface *const iface, const u32 offset) +{ + const struct kbase_device * const kbdev = iface->kbdev; + u32 const val = output_page_read(iface->output, offset); + + dev_dbg(kbdev->dev, "glob output r: reg %08x val %08x\n", offset, val); + return val; +} + +/** + * handle_internal_firmware_fatal - Handler for CS internal firmware fault. + * + * @kbdev: Pointer to kbase device + * + * Report group fatal error to user space for all GPU command queue groups + * in the device, terminate them and reset GPU. + */ +static void handle_internal_firmware_fatal(struct kbase_device *const kbdev) +{ + int as; + + for (as = 0; as < kbdev->nr_hw_address_spaces; as++) { + unsigned long flags; + struct kbase_context *kctx; + struct kbase_fault fault; + + if (as == MCU_AS_NR) + continue; + + /* Only handle the fault for an active address space. Lock is + * taken here to atomically get reference to context in an + * active address space and retain its refcount. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as); + + if (kctx) { + kbase_ctx_sched_retain_ctx_refcount(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + continue; + } + + fault = (struct kbase_fault) { + .status = GPU_EXCEPTION_TYPE_SW_FAULT_1, + }; + + kbase_csf_ctx_handle_fault(kctx, &fault); + kbase_ctx_sched_release_ctx_lock(kctx); + } + + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); +} + +/** + * firmware_error_worker - Worker function for handling firmware internal error + * + * @data: Pointer to a work_struct embedded in kbase device. + * + * Handle the CS internal firmware error + */ +static void firmware_error_worker(struct work_struct *const data) +{ + struct kbase_device *const kbdev = + container_of(data, struct kbase_device, csf.fw_error_work); + + handle_internal_firmware_fatal(kbdev); +} + +static bool global_request_complete(struct kbase_device *const kbdev, + u32 const req_mask) +{ + struct kbase_csf_global_iface *global_iface = + &kbdev->csf.global_iface; + bool complete = false; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & + req_mask) == + (kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & + req_mask)) + complete = true; + + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + return complete; +} + +static int wait_for_global_request(struct kbase_device *const kbdev, + u32 const req_mask) +{ + const long wait_timeout = + kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long remaining; + int err = 0; + + remaining = wait_event_timeout(kbdev->csf.event_wait, + global_request_complete(kbdev, req_mask), + wait_timeout); + + if (!remaining) { + dev_warn(kbdev->dev, "Timed out waiting for global request %x to complete", + req_mask); + err = -ETIMEDOUT; + } + + return err; +} + +static void set_global_request( + const struct kbase_csf_global_iface *const global_iface, + u32 const req_mask) +{ + u32 glb_req; + + kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev); + + glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK); + glb_req ^= req_mask; + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, + req_mask); +} + +static void enable_endpoints_global( + const struct kbase_csf_global_iface *const global_iface, + u64 const shader_core_mask) +{ + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_LO, + shader_core_mask & U32_MAX); + kbase_csf_firmware_global_input(global_iface, GLB_ALLOC_EN_HI, + shader_core_mask >> 32); + + set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK); +} + +static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, + const struct kbase_csf_global_iface *const global_iface) +{ + u32 pwroff_reg; + + if (kbdev->csf.firmware_hctl_core_pwr) + pwroff_reg = + GLB_PWROFF_TIMER_TIMER_SOURCE_SET(DISABLE_GLB_PWROFF_TIMER, + GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + else + pwroff_reg = kbdev->csf.mcu_core_pwroff_dur_count; + + kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, + pwroff_reg); + set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); + + /* Save the programed reg value in its shadow field */ + kbdev->csf.mcu_core_pwroff_reg_shadow = pwroff_reg; +} + +static void set_timeout_global( + const struct kbase_csf_global_iface *const global_iface, + u64 const timeout) +{ + kbase_csf_firmware_global_input(global_iface, GLB_PROGRESS_TIMER, + timeout / GLB_PROGRESS_TIMER_TIMEOUT_SCALE); + + set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK); +} + +static void global_init(struct kbase_device *const kbdev, u64 core_mask) +{ + u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | + GLB_ACK_IRQ_MASK_PING_MASK | + GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | + GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK | + GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK | + GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | + GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | + GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK; + + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + /* Update shader core allocation enable mask */ + enable_endpoints_global(global_iface, core_mask); + enable_shader_poweroff_timer(kbdev, global_iface); + + set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev)); + + /* Unmask the interrupts */ + kbase_csf_firmware_global_input(global_iface, + GLB_ACK_IRQ_MASK, ack_irq_mask); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +/** + * global_init_on_boot - Sends a global request to control various features. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Currently only the request to enable endpoints and cycle counter is sent. + * + * Return: 0 on success, or negative on failure. + */ +static int global_init_on_boot(struct kbase_device *const kbdev) +{ + unsigned long flags; + u64 core_mask; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + core_mask = kbase_pm_ca_get_core_mask(kbdev); + kbdev->csf.firmware_hctl_core_pwr = + kbase_pm_no_mcu_core_pwroff(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + global_init(kbdev, core_mask); + + return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK); +} + +void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev, + u64 core_mask) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->csf.glb_init_request_pending = true; + kbdev->csf.firmware_hctl_core_pwr = + kbase_pm_no_mcu_core_pwroff(kbdev); + global_init(kbdev, core_mask); +} + +bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + WARN_ON(!kbdev->csf.glb_init_request_pending); + + if (global_request_complete(kbdev, CSF_GLB_REQ_CFG_MASK)) + kbdev->csf.glb_init_request_pending = false; + + return !kbdev->csf.glb_init_request_pending; +} + +void kbase_csf_firmware_update_core_attr(struct kbase_device *kbdev, + bool update_core_pwroff_timer, bool update_core_mask, u64 core_mask) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + if (update_core_mask) + enable_endpoints_global(&kbdev->csf.global_iface, core_mask); + if (update_core_pwroff_timer) + enable_shader_poweroff_timer(kbdev, &kbdev->csf.global_iface); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +bool kbase_csf_firmware_core_attr_updated(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK | + GLB_REQ_CFG_PWROFF_TIMER_MASK); +} + +static void kbase_csf_firmware_reload_worker(struct work_struct *work) +{ + struct kbase_device *kbdev = container_of(work, struct kbase_device, + csf.firmware_reload_work); + unsigned long flags; + + /* Reboot the firmware */ + kbase_csf_firmware_enable_mcu(kbdev); + + /* Tell MCU state machine to transit to next state */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->csf.firmware_reloaded = true; + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->csf.firmware_reloaded = false; + + if (kbdev->csf.firmware_reload_needed) { + kbdev->csf.firmware_reload_needed = false; + queue_work(system_wq, &kbdev->csf.firmware_reload_work); + } else { + kbase_csf_firmware_enable_mcu(kbdev); + kbdev->csf.firmware_reloaded = true; + } +} + +void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (unlikely(!kbdev->csf.firmware_inited)) + return; + + /* Tell MCU state machine to transit to next state */ + kbdev->csf.firmware_reloaded = true; + kbase_pm_update_state(kbdev); +} + +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +{ +#define HYSTERESIS_VAL_UNIT_SHIFT (10) + /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ + u64 freq = arch_timer_get_cntfrq(); + u64 dur_val = dur_ms; + u32 cnt_val_u32, reg_val_u32; + bool src_system_timestamp = freq > 0; + + if (!src_system_timestamp) { + /* Get the cycle_counter source alternative */ + spin_lock(&kbdev->pm.clk_rtm.lock); + if (kbdev->pm.clk_rtm.clks[0]) + freq = kbdev->pm.clk_rtm.clks[0]->clock_val; + else + dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + + dev_info(kbdev->dev, "Can't get the timestamp frequency, " + "use cycle counter format with firmware idle hysteresis!"); + } + + /* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */ + dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; + dur_val = div_u64(dur_val, 1000); + + /* Interface limits the value field to S32_MAX */ + cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + + reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); + /* add the source flag */ + if (src_system_timestamp) + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + else + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); + + return reg_val_u32; +} + +u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) +{ + return kbdev->csf.gpu_idle_hysteresis_ms; +} + +u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) +{ + unsigned long flags; + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbdev->csf.gpu_idle_hysteresis_ms = dur; + kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", + hysteresis_val); + + return hysteresis_val; +} + +static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +{ +#define PWROFF_VAL_UNIT_SHIFT (10) + /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ + u64 freq = arch_timer_get_cntfrq(); + u64 dur_val = dur_us; + u32 cnt_val_u32, reg_val_u32; + bool src_system_timestamp = freq > 0; + + if (!src_system_timestamp) { + /* Get the cycle_counter source alternative */ + spin_lock(&kbdev->pm.clk_rtm.lock); + if (kbdev->pm.clk_rtm.clks[0]) + freq = kbdev->pm.clk_rtm.clks[0]->clock_val; + else + dev_warn(kbdev->dev, "No GPU clock, unexpected integration issue!"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + + dev_info(kbdev->dev, "Can't get the timestamp frequency, " + "use cycle counter with MCU Core Poweroff timer!"); + } + + /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ + dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; + dur_val = div_u64(dur_val, 1000000); + + /* Interface limits the value field to S32_MAX */ + cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + + reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); + /* add the source flag */ + if (src_system_timestamp) + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); + else + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, + GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); + + return reg_val_u32; +} + +u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ + return kbdev->csf.mcu_core_pwroff_dur_us; +} + +u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) +{ + unsigned long flags; + const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->csf.mcu_core_pwroff_dur_us = dur; + kbdev->csf.mcu_core_pwroff_dur_count = pwroff; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "MCU Core Poweroff input update: 0x%.8x", pwroff); + + return pwroff; +} + +int kbase_csf_firmware_early_init(struct kbase_device *kbdev) +{ + init_waitqueue_head(&kbdev->csf.event_wait); + kbdev->csf.interrupt_received = false; + kbdev->csf.fw_timeout_ms = CSF_FIRMWARE_TIMEOUT_MS; + + INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); + INIT_LIST_HEAD(&kbdev->csf.firmware_config); + INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); + INIT_WORK(&kbdev->csf.firmware_reload_work, + kbase_csf_firmware_reload_worker); + INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); + + mutex_init(&kbdev->csf.reg_lock); + + return 0; +} + +int kbase_csf_firmware_init(struct kbase_device *kbdev) +{ + int ret; + + lockdep_assert_held(&kbdev->fw_load_lock); + + if (WARN_ON((kbdev->as_free & MCU_AS_BITMASK) == 0)) + return -EINVAL; + kbdev->as_free &= ~MCU_AS_BITMASK; + + ret = kbase_mmu_init(kbdev, &kbdev->csf.mcu_mmu, NULL, + BASE_MEM_GROUP_DEFAULT); + + if (ret != 0) { + /* Release the address space */ + kbdev->as_free |= MCU_AS_BITMASK; + return ret; + } + + kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS; + kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( + kbdev, FIRMWARE_IDLE_HYSTERESIS_TIME_MS); + + ret = kbase_mcu_shared_interface_region_tracker_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, + "Failed to setup the rb tree for managing shared interface segment\n"); + goto error; + } + + ret = invent_memory_setup_entry(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to load firmware entry\n"); + goto error; + } + + /* Make sure L2 cache is powered up */ + kbase_pm_wait_for_l2_powered(kbdev); + + /* NO_MALI: Don't init trace buffers */ + + /* NO_MALI: Don't load the MMU tables or boot CSF firmware */ + + ret = invent_capabilities(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_doorbell_mapping_init(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_setup_dummy_user_reg_page(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_scheduler_init(kbdev); + if (ret != 0) + goto error; + + ret = kbase_csf_timeout_init(kbdev); + if (ret != 0) + goto error; + + ret = global_init_on_boot(kbdev); + if (ret != 0) + goto error; + + return 0; + +error: + kbase_csf_firmware_term(kbdev); + return ret; +} + +void kbase_csf_firmware_term(struct kbase_device *kbdev) +{ + cancel_work_sync(&kbdev->csf.fw_error_work); + + kbase_csf_timeout_term(kbdev); + + /* NO_MALI: Don't stop firmware or unload MMU tables */ + + kbase_mmu_term(kbdev, &kbdev->csf.mcu_mmu); + + kbase_csf_scheduler_term(kbdev); + + kbase_csf_free_dummy_user_reg_page(kbdev); + + kbase_csf_doorbell_mapping_term(kbdev); + + free_global_iface(kbdev); + + /* Release the address space */ + kbdev->as_free |= MCU_AS_BITMASK; + + while (!list_empty(&kbdev->csf.firmware_interfaces)) { + struct dummy_firmware_interface *interface; + + interface = list_first_entry(&kbdev->csf.firmware_interfaces, + struct dummy_firmware_interface, node); + list_del(&interface->node); + + /* NO_MALI: No cleanup in dummy interface necessary */ + + kfree(interface); + } + + /* NO_MALI: No trace buffers to terminate */ + +#ifndef MALI_KBASE_BUILD + mali_kutf_fw_utf_entry_cleanup(kbdev); +#endif + + mutex_destroy(&kbdev->csf.reg_lock); + + /* This will also free up the region allocated for the shared interface + * entry parsed from the firmware image. + */ + kbase_mcu_shared_interface_region_tracker_term(kbdev); +} + +void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + u32 glb_req; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + /* The scheduler is assumed to only call the enable when its internal + * state indicates that the idle timer has previously been disabled. So + * on entry the expected field values are: + * 1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0 + * 2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0 + */ + + glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + if (glb_req & GLB_REQ_IDLE_ENABLE_MASK) + dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!"); + + kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, + kbdev->csf.gpu_idle_dur_count); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, + GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); + + dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", + kbdev->csf.gpu_idle_dur_count); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, + GLB_REQ_REQ_IDLE_DISABLE, + GLB_REQ_IDLE_DISABLE_MASK); + + dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer"); + + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +void kbase_csf_firmware_ping(struct kbase_device *const kbdev) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_PING_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev) +{ + kbase_csf_firmware_ping(kbdev); + return wait_for_global_request(kbdev, GLB_REQ_PING_MASK); +} + +int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, + u64 const timeout) +{ + const struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags; + int err; + + /* The 'reg_lock' is also taken and is held till the update is not + * complete, to ensure the update of timeout value by multiple Users + * gets serialized. + */ + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_timeout_global(global_iface, timeout); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, GLB_REQ_CFG_PROGRESS_TIMER_MASK); + mutex_unlock(&kbdev->csf.reg_lock); + + return err; +} + +void kbase_csf_enter_protected_mode(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK); + dev_dbg(kbdev->dev, "Sending request to enter protected mode"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK); +} + +void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + set_global_request(global_iface, GLB_REQ_HALT_MASK); + dev_dbg(kbdev->dev, "Sending request to HALT MCU"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) +{ + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + unsigned long flags; + int err = 0; + + /* The 'reg_lock' is also taken and is held till the update is + * complete, to ensure the config update gets serialized. + */ + mutex_lock(&kbdev->csf.reg_lock); + kbase_csf_scheduler_spin_lock(kbdev, &flags); + + set_global_request(global_iface, GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); + dev_dbg(kbdev->dev, "Sending request for FIRMWARE_CONFIG_UPDATE"); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + err = wait_for_global_request(kbdev, + GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); + mutex_unlock(&kbdev->csf.reg_lock); + return err; +} + +/** + * copy_grp_and_stm - Copy CS and/or group data + * + * @iface: Global CSF interface provided by + * the firmware. + * @group_data: Pointer where to store all the group data + * (sequentially). + * @max_group_num: The maximum number of groups to be read. Can be 0, in + * which case group_data is unused. + * @stream_data: Pointer where to store all the stream data + * (sequentially). + * @max_total_stream_num: The maximum number of streams to be read. + * Can be 0, in which case stream_data is unused. + * + * Return: Total number of CSs, summed across all groups. + */ +static u32 copy_grp_and_stm( + const struct kbase_csf_global_iface * const iface, + struct basep_cs_group_control * const group_data, + u32 max_group_num, + struct basep_cs_stream_control * const stream_data, + u32 max_total_stream_num) +{ + u32 i, total_stream_num = 0; + + if (WARN_ON((max_group_num > 0) && !group_data)) + max_group_num = 0; + + if (WARN_ON((max_total_stream_num > 0) && !stream_data)) + max_total_stream_num = 0; + + for (i = 0; i < iface->group_num; i++) { + u32 j; + + if (i < max_group_num) { + group_data[i].features = iface->groups[i].features; + group_data[i].stream_num = iface->groups[i].stream_num; + group_data[i].suspend_size = + iface->groups[i].suspend_size; + } + for (j = 0; j < iface->groups[i].stream_num; j++) { + if (total_stream_num < max_total_stream_num) + stream_data[total_stream_num].features = + iface->groups[i].streams[j].features; + total_stream_num++; + } + } + + return total_stream_num; +} + +u32 kbase_csf_firmware_get_glb_iface( + struct kbase_device *kbdev, + struct basep_cs_group_control *const group_data, + u32 const max_group_num, + struct basep_cs_stream_control *const stream_data, + u32 const max_total_stream_num, u32 *const glb_version, + u32 *const features, u32 *const group_num, u32 *const prfcnt_size, + u32 *const instr_features) +{ + const struct kbase_csf_global_iface * const iface = + &kbdev->csf.global_iface; + + if (WARN_ON(!glb_version) || WARN_ON(!features) || + WARN_ON(!group_num) || WARN_ON(!prfcnt_size) || + WARN_ON(!instr_features)) + return 0; + + *glb_version = iface->version; + *features = iface->features; + *group_num = iface->group_num; + *prfcnt_size = iface->prfcnt_size; + *instr_features = iface->instr_features; + + return copy_grp_and_stm(iface, group_data, max_group_num, + stream_data, max_total_stream_num); +} + +const char *kbase_csf_firmware_get_timeline_metadata( + struct kbase_device *kbdev, const char *name, size_t *size) +{ + if (WARN_ON(!kbdev) || + WARN_ON(!name) || + WARN_ON(!size)) { + return NULL; + } + + *size = 0; + return NULL; +} + +void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev) +{ + /* NO_MALI: Nothing to do here */ +} + +int kbase_csf_firmware_mcu_shared_mapping_init( + struct kbase_device *kbdev, + unsigned int num_pages, + unsigned long cpu_map_properties, + unsigned long gpu_map_properties, + struct kbase_csf_mapping *csf_mapping) +{ + struct tagged_addr *phys; + struct kbase_va_region *va_reg; + struct page **page_list; + void *cpu_addr; + int i, ret = 0; + pgprot_t cpu_map_prot = PAGE_KERNEL; + unsigned long gpu_map_prot; + + if (cpu_map_properties & PROT_READ) + cpu_map_prot = PAGE_KERNEL_RO; + + if (kbdev->system_coherency == COHERENCY_ACE) { + gpu_map_prot = + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE); + } else { + gpu_map_prot = + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + cpu_map_prot = pgprot_writecombine(cpu_map_prot); + }; + + phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); + if (!phys) + goto out; + + page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + goto page_list_alloc_error; + + ret = kbase_mem_pool_alloc_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + num_pages, phys, false); + if (ret <= 0) + goto phys_mem_pool_alloc_error; + + for (i = 0; i < num_pages; i++) + page_list[i] = as_page(phys[i]); + + cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); + if (!cpu_addr) + goto vmap_error; + + va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, + num_pages, KBASE_REG_ZONE_MCU_SHARED); + if (!va_reg) + goto va_region_alloc_error; + + mutex_lock(&kbdev->csf.reg_lock); + ret = kbase_add_va_region_rbtree(kbdev, va_reg, 0, num_pages, 1); + va_reg->flags &= ~KBASE_REG_FREE; + if (ret) + goto va_region_add_error; + mutex_unlock(&kbdev->csf.reg_lock); + + gpu_map_properties &= (KBASE_REG_GPU_RD | KBASE_REG_GPU_WR); + gpu_map_properties |= gpu_map_prot; + + ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, + va_reg->start_pfn, &phys[0], num_pages, + gpu_map_properties, KBASE_MEM_GROUP_CSF_FW); + if (ret) + goto mmu_insert_pages_error; + + kfree(page_list); + csf_mapping->phys = phys; + csf_mapping->cpu_addr = cpu_addr; + csf_mapping->va_reg = va_reg; + csf_mapping->num_pages = num_pages; + + return 0; + +mmu_insert_pages_error: + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(va_reg); +va_region_add_error: + kbase_free_alloced_region(va_reg); + mutex_unlock(&kbdev->csf.reg_lock); +va_region_alloc_error: + vunmap(cpu_addr); +vmap_error: + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + num_pages, phys, false, false); + +phys_mem_pool_alloc_error: + kfree(page_list); +page_list_alloc_error: + kfree(phys); +out: + /* Zero-initialize the mapping to make sure that the termination + * function doesn't try to unmap or free random addresses. + */ + csf_mapping->phys = NULL; + csf_mapping->cpu_addr = NULL; + csf_mapping->va_reg = NULL; + csf_mapping->num_pages = 0; + + return -ENOMEM; +} + +void kbase_csf_firmware_mcu_shared_mapping_term( + struct kbase_device *kbdev, struct kbase_csf_mapping *csf_mapping) +{ + if (csf_mapping->va_reg) { + mutex_lock(&kbdev->csf.reg_lock); + kbase_remove_va_region(csf_mapping->va_reg); + kbase_free_alloced_region(csf_mapping->va_reg); + mutex_unlock(&kbdev->csf.reg_lock); + } + + if (csf_mapping->phys) { + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + csf_mapping->num_pages, csf_mapping->phys, false, + false); + } + + vunmap(csf_mapping->cpu_addr); + kfree(csf_mapping->phys); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c new file mode 100644 index 0000000..96746c6 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "mali_kbase_csf_heap_context_alloc.h" + +/* Size of one heap context structure, in bytes. */ +#define HEAP_CTX_SIZE ((size_t)32) + +/* Total size of the GPU memory region allocated for heap contexts, in bytes. */ +#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE) + +/** + * sub_alloc - Sub-allocate a heap context from a GPU memory region + * + * @ctx_alloc: Pointer to the heap context allocator. + * + * Return: GPU virtual address of the allocated heap context or 0 on failure. + */ +static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc) +{ + struct kbase_context *const kctx = ctx_alloc->kctx; + int heap_nr = 0; + size_t ctx_offset = 0; + u64 heap_gpu_va = 0; + struct kbase_vmap_struct mapping; + void *ctx_ptr = NULL; + + lockdep_assert_held(&ctx_alloc->lock); + + heap_nr = find_first_zero_bit(ctx_alloc->in_use, + MAX_TILER_HEAPS); + + if (unlikely(heap_nr >= MAX_TILER_HEAPS)) { + dev_err(kctx->kbdev->dev, + "No free tiler heap contexts in the pool\n"); + return 0; + } + + ctx_offset = heap_nr * HEAP_CTX_SIZE; + heap_gpu_va = ctx_alloc->gpu_va + ctx_offset; + ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va, + HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping); + + if (unlikely(!ctx_ptr)) { + dev_err(kctx->kbdev->dev, + "Failed to map tiler heap context %d (0x%llX)\n", + heap_nr, heap_gpu_va); + return 0; + } + + memset(ctx_ptr, 0, HEAP_CTX_SIZE); + kbase_vunmap(ctx_ptr, &mapping); + + bitmap_set(ctx_alloc->in_use, heap_nr, 1); + + dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n", + heap_nr, heap_gpu_va); + + return heap_gpu_va; +} + +/** + * sub_free - Free a heap context sub-allocated from a GPU memory region + * + * @ctx_alloc: Pointer to the heap context allocator. + * @heap_gpu_va: The GPU virtual address of a heap context structure to free. + */ +static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va) +{ + struct kbase_context *const kctx = ctx_alloc->kctx; + u64 ctx_offset = 0; + unsigned int heap_nr = 0; + + lockdep_assert_held(&ctx_alloc->lock); + + if (WARN_ON(!ctx_alloc->region)) + return; + + if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va)) + return; + + ctx_offset = heap_gpu_va - ctx_alloc->gpu_va; + + if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) || + WARN_ON(ctx_offset % HEAP_CTX_SIZE)) + return; + + heap_nr = ctx_offset / HEAP_CTX_SIZE; + dev_dbg(kctx->kbdev->dev, + "Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va); + + bitmap_clear(ctx_alloc->in_use, heap_nr, 1); +} + +int kbase_csf_heap_context_allocator_init( + struct kbase_csf_heap_context_allocator *const ctx_alloc, + struct kbase_context *const kctx) +{ + /* We cannot pre-allocate GPU memory here because the + * custom VA zone may not have been created yet. + */ + ctx_alloc->kctx = kctx; + ctx_alloc->region = NULL; + ctx_alloc->gpu_va = 0; + + mutex_init(&ctx_alloc->lock); + bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS); + + dev_dbg(kctx->kbdev->dev, + "Initialized a tiler heap context allocator\n"); + + return 0; +} + +void kbase_csf_heap_context_allocator_term( + struct kbase_csf_heap_context_allocator *const ctx_alloc) +{ + struct kbase_context *const kctx = ctx_alloc->kctx; + + dev_dbg(kctx->kbdev->dev, + "Terminating tiler heap context allocator\n"); + + if (ctx_alloc->region) { + kbase_gpu_vm_lock(kctx); + ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, ctx_alloc->region); + kbase_gpu_vm_unlock(kctx); + } + + mutex_destroy(&ctx_alloc->lock); +} + +u64 kbase_csf_heap_context_allocator_alloc( + struct kbase_csf_heap_context_allocator *const ctx_alloc) +{ + struct kbase_context *const kctx = ctx_alloc->kctx; + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE; + u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE); + u64 heap_gpu_va = 0; + +#ifdef CONFIG_MALI_VECTOR_DUMP + flags |= BASE_MEM_PROT_CPU_RD; +#endif + + mutex_lock(&ctx_alloc->lock); + + /* If the pool of heap contexts wasn't already allocated then + * allocate it. + */ + if (!ctx_alloc->region) { + ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, + 0, &flags, &ctx_alloc->gpu_va); + } + + /* If the pool still isn't allocated then an error occurred. */ + if (unlikely(!ctx_alloc->region)) { + dev_err(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts\n"); + } else { + heap_gpu_va = sub_alloc(ctx_alloc); + } + + mutex_unlock(&ctx_alloc->lock); + + return heap_gpu_va; +} + +void kbase_csf_heap_context_allocator_free( + struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va) +{ + mutex_lock(&ctx_alloc->lock); + sub_free(ctx_alloc, heap_gpu_va); + mutex_unlock(&ctx_alloc->lock); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h new file mode 100644 index 0000000..993db63 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include + +#ifndef _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ +#define _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ + +/** + * kbase_csf_heap_context_allocator_init - Initialize an allocator for heap + * contexts + * @ctx_alloc: Pointer to the heap context allocator to initialize. + * @kctx: Pointer to the kbase context. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_csf_heap_context_allocator_init( + struct kbase_csf_heap_context_allocator *const ctx_alloc, + struct kbase_context *const kctx); + +/** + * kbase_csf_heap_context_allocator_term - Terminate an allocator for heap + * contexts + * @ctx_alloc: Pointer to the heap context allocator to terminate. + */ +void kbase_csf_heap_context_allocator_term( + struct kbase_csf_heap_context_allocator *const ctx_alloc); + +/** + * kbase_csf_heap_context_allocator_alloc - Allocate a heap context structure + * + * If this function is successful then it returns the address of a + * zero-initialized heap context structure for use by the firmware. + * + * @ctx_alloc: Pointer to the heap context allocator. + * + * Return: GPU virtual address of the allocated heap context or 0 on failure. + */ +u64 kbase_csf_heap_context_allocator_alloc( + struct kbase_csf_heap_context_allocator *const ctx_alloc); + +/** + * kbase_csf_heap_context_allocator_free - Free a heap context structure + * + * This function returns a heap context structure to the free pool of unused + * contexts for possible reuse by a future call to + * @kbase_csf_heap_context_allocator_alloc. + * + * @ctx_alloc: Pointer to the heap context allocator. + * @heap_gpu_va: The GPU virtual address of a heap context structure that + * was allocated for the firmware. + */ +void kbase_csf_heap_context_allocator_free( + struct kbase_csf_heap_context_allocator *const ctx_alloc, + u64 const heap_gpu_va); + +#endif /* _KBASE_CSF_HEAP_CONTEXT_ALLOC_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c new file mode 100644 index 0000000..4e26a49 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.c @@ -0,0 +1,2258 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include "device/mali_kbase_device.h" +#include "mali_kbase_csf.h" +#include + +#if IS_ENABLED(CONFIG_SYNC_FILE) +#include "mali_kbase_fence.h" +#include "mali_kbase_sync.h" + +static DEFINE_SPINLOCK(kbase_csf_fence_lock); +#endif + +static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, + bool ignore_waits); + +static void kcpu_queue_process_worker(struct work_struct *data); + +static int kbase_kcpu_map_import_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct kbase_va_region *reg; + int ret = 0; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + /* Take the processes mmap lock */ + down_read(kbase_mem_get_process_mmap_lock()); + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + import_info->handle); + + if (kbase_is_region_invalid_or_free(reg) || + !kbase_mem_is_imported(reg->gpu_alloc->type)) { + ret = -EINVAL; + goto out; + } + + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + /* Pin the physical pages backing the user buffer while + * we are in the process context and holding the mmap lock. + * The dma mapping & GPU mapping of the pages would be done + * when the MAP_IMPORT operation is executed. + * + * Though the pages would be pinned, no reference is taken + * on the physical pages tracking object. When the last + * reference to the tracking object is dropped the pages + * would be unpinned if they weren't unpinned before. + */ + ret = kbase_jd_user_buf_pin_pages(kctx, reg); + if (ret) + goto out; + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT; + current_command->info.import.gpu_va = import_info->handle; + +out: + kbase_gpu_vm_unlock(kctx); + /* Release the processes mmap lock */ + up_read(kbase_mem_get_process_mmap_lock()); + + return ret; +} + +static int kbase_kcpu_unmap_import_prepare_internal( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command, + enum base_kcpu_command_type type) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct kbase_va_region *reg; + int ret = 0; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + import_info->handle); + + if (kbase_is_region_invalid_or_free(reg) || + !kbase_mem_is_imported(reg->gpu_alloc->type)) { + ret = -EINVAL; + goto out; + } + + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { + /* The pages should have been pinned when MAP_IMPORT + * was enqueued previously. + */ + if (reg->gpu_alloc->nents != + reg->gpu_alloc->imported.user_buf.nr_pages) { + ret = -EINVAL; + goto out; + } + } + + current_command->type = type; + current_command->info.import.gpu_va = import_info->handle; + +out: + kbase_gpu_vm_unlock(kctx); + + return ret; +} + +static int kbase_kcpu_unmap_import_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command) +{ + return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, + import_info, current_command, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT); +} + +static int kbase_kcpu_unmap_import_force_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_import_info *import_info, + struct kbase_kcpu_command *current_command) +{ + return kbase_kcpu_unmap_import_prepare_internal(kcpu_queue, + import_info, current_command, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE); +} + +/** + * kbase_jit_add_to_pending_alloc_list() - Pend JIT allocation + * + * @queue: The queue containing this JIT allocation + * @cmd: The JIT allocation that is blocking this queue + */ +static void kbase_jit_add_to_pending_alloc_list( + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *cmd) +{ + struct kbase_context *const kctx = queue->kctx; + struct list_head *target_list_head = + &kctx->csf.kcpu_queues.jit_blocked_queues; + struct kbase_kcpu_command_queue *blocked_queue; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + list_for_each_entry(blocked_queue, + &kctx->csf.kcpu_queues.jit_blocked_queues, + jit_blocked) { + struct kbase_kcpu_command const*const jit_alloc_cmd = + &blocked_queue->commands[blocked_queue->start_offset]; + + WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC); + if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) { + target_list_head = &blocked_queue->jit_blocked; + break; + } + } + + list_add_tail(&queue->jit_blocked, target_list_head); +} + +/** + * kbase_kcpu_jit_allocate_process() - Process JIT allocation + * + * @queue: The queue containing this JIT allocation + * @cmd: The JIT allocation command + */ +static int kbase_kcpu_jit_allocate_process( + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *cmd) +{ + struct kbase_context *const kctx = queue->kctx; + struct kbase_kcpu_command_jit_alloc_info *alloc_info = + &cmd->info.jit_alloc; + struct base_jit_alloc_info *info = alloc_info->info; + struct kbase_vmap_struct mapping; + struct kbase_va_region *reg; + u32 count = alloc_info->count; + u64 *ptr, new_addr; + u32 i; + int ret; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (alloc_info->blocked) { + list_del(&queue->jit_blocked); + alloc_info->blocked = false; + } + + if (WARN_ON(!info)) + return -EINVAL; + + /* Check if all JIT IDs are not in use */ + for (i = 0; i < count; i++, info++) { + /* The JIT ID is still in use so fail the allocation */ + if (kctx->jit_alloc[info->id]) { + dev_warn(kctx->kbdev->dev, "JIT ID still in use\n"); + return -EINVAL; + } + } + + /* Now start the allocation loop */ + for (i = 0, info = alloc_info->info; i < count; i++, info++) { + /* Create a JIT allocation */ + reg = kbase_jit_allocate(kctx, info, true); + if (!reg) { + bool can_block = false; + struct kbase_kcpu_command const *jit_cmd; + + list_for_each_entry(jit_cmd, &kctx->csf.kcpu_queues.jit_cmds_head, info.jit_alloc.node) { + if (jit_cmd == cmd) + break; + + if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) { + u8 const*const free_ids = jit_cmd->info.jit_free.ids; + + if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) { + /* + * A JIT free which is active + * and submitted before this + * command. + */ + can_block = true; + break; + } + } + } + + if (!can_block) { + /* + * No prior JIT_FREE command is active. Roll + * back previous allocations and fail. + */ + dev_warn_ratelimited(kctx->kbdev->dev, "JIT alloc command failed: %pK\n", cmd); + ret = -ENOMEM; + goto fail; + } + + /* There are pending frees for an active allocation + * so we should wait to see whether they free the + * memory. Add to the list of atoms for which JIT + * allocation is pending. + */ + kbase_jit_add_to_pending_alloc_list(queue, cmd); + alloc_info->blocked = true; + + /* Rollback, the whole set will be re-attempted */ + while (i-- > 0) { + info--; + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + kctx->jit_alloc[info->id] = NULL; + } + + return -EAGAIN; + } + + /* Bind it to the user provided ID. */ + kctx->jit_alloc[info->id] = reg; + } + + for (i = 0, info = alloc_info->info; i < count; i++, info++) { + /* + * Write the address of the JIT allocation to the user provided + * GPU allocation. + */ + ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), + &mapping); + if (!ptr) { + ret = -ENOMEM; + goto fail; + } + + reg = kctx->jit_alloc[info->id]; + new_addr = reg->start_pfn << PAGE_SHIFT; + *ptr = new_addr; + kbase_vunmap(kctx, &mapping); + } + + return 0; + +fail: + /* Roll back completely */ + for (i = 0, info = alloc_info->info; i < count; i++, info++) { + /* Free the allocations that were successful. + * Mark all the allocations including the failed one and the + * other un-attempted allocations in the set, so we know they + * are in use. + */ + if (kctx->jit_alloc[info->id]) + kbase_jit_free(kctx, kctx->jit_alloc[info->id]); + + kctx->jit_alloc[info->id] = KBASE_RESERVED_REG_JIT_ALLOC; + } + + return ret; +} + +static int kbase_kcpu_jit_allocate_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_jit_alloc_info *alloc_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + void __user *data = u64_to_user_ptr(alloc_info->info); + struct base_jit_alloc_info *info; + u32 count = alloc_info->count; + int ret = 0; + u32 i; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (!data || count > kcpu_queue->kctx->jit_max_allocations || + count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto out; + } + + info = kmalloc_array(count, sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto out; + } + + if (copy_from_user(info, data, sizeof(*info) * count) != 0) { + ret = -EINVAL; + goto out_free; + } + + for (i = 0; i < count; i++) { + ret = kbasep_jit_alloc_validate(kctx, &info[i]); + if (ret) + goto out_free; + } + + /* Search for duplicate JIT ids */ + for (i = 0; i < (count - 1); i++) { + u32 j; + + for (j = (i + 1); j < count; j++) { + if (info[i].id == info[j].id) { + ret = -EINVAL; + goto out_free; + } + } + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC; + list_add_tail(¤t_command->info.jit_alloc.node, + &kctx->csf.kcpu_queues.jit_cmds_head); + current_command->info.jit_alloc.info = info; + current_command->info.jit_alloc.count = count; + current_command->info.jit_alloc.blocked = false; + + return 0; +out_free: + kfree(info); +out: + return ret; +} + +/** + * kbase_kcpu_jit_allocate_finish() - Finish handling the JIT_ALLOC command + * + * @queue: The queue containing this JIT allocation + * @cmd: The JIT allocation command + */ +static void kbase_kcpu_jit_allocate_finish( + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *cmd) +{ + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + /* Remove this command from the jit_cmds_head list */ + list_del(&cmd->info.jit_alloc.node); + + /* + * If we get to this point we must have already cleared the blocked + * flag, otherwise it'd be a bug. + */ + if (WARN_ON(cmd->info.jit_alloc.blocked)) { + list_del(&queue->jit_blocked); + cmd->info.jit_alloc.blocked = false; + } + + kfree(cmd->info.jit_alloc.info); +} + +/** + * kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands + * + * @kctx: The context containing the blocked JIT_ALLOC commands + */ +static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx) +{ + struct kbase_kcpu_command_queue *blocked_queue; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + /* + * Reschedule all queues blocked by JIT_ALLOC commands. + * NOTE: This code traverses the list of blocked queues directly. It + * only works as long as the queued works are not executed at the same + * time. This precondition is true since we're holding the + * kbase_csf_kcpu_queue_context.lock . + */ + list_for_each_entry(blocked_queue, + &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked) + queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work); +} + +static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command *const cmd) +{ + struct kbase_kcpu_command_jit_free_info const *const free_info = + &cmd->info.jit_free; + u8 const *const ids = free_info->ids; + u32 const count = free_info->count; + u32 i; + int rc = 0; + struct kbase_context *kctx = queue->kctx; + + if (WARN_ON(!ids)) + return -EINVAL; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( + queue->kctx->kbdev, queue); + + for (i = 0; i < count; i++) { + u64 pages_used = 0; + int item_err = 0; + + if (!kctx->jit_alloc[ids[i]]) { + dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n"); + rc = -EINVAL; + item_err = rc; + } else { + struct kbase_va_region *const reg = kctx->jit_alloc[ids[i]]; + + /* + * If the ID is valid but the allocation request failed, still + * succeed this command but don't try and free the allocation. + */ + if (reg != KBASE_RESERVED_REG_JIT_ALLOC) { + pages_used = reg->gpu_alloc->nents; + kbase_jit_free(kctx, reg); + } + + kctx->jit_alloc[ids[i]] = NULL; + } + + KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( + queue->kctx->kbdev, queue, item_err, pages_used); + } + + /* Free the list of ids */ + kfree(ids); + + /* + * Remove this command from the jit_cmds_head list and retry pending + * allocations. + */ + list_del(&cmd->info.jit_free.node); + kbase_kcpu_jit_retry_pending_allocs(kctx); + + return rc; +} + +static int kbase_kcpu_jit_free_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_jit_free_info *free_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + void __user *data = u64_to_user_ptr(free_info->ids); + u8 *ids; + u32 count = free_info->count; + int ret; + u32 i; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + /* Sanity checks */ + if (!count || count > ARRAY_SIZE(kctx->jit_alloc)) { + ret = -EINVAL; + goto out; + } + + /* Copy the information for safe access and future storage */ + ids = kmalloc_array(count, sizeof(*ids), GFP_KERNEL); + if (!ids) { + ret = -ENOMEM; + goto out; + } + + if (!data) { + ret = -EINVAL; + goto out_free; + } + + if (copy_from_user(ids, data, sizeof(*ids) * count)) { + ret = -EINVAL; + goto out_free; + } + + for (i = 0; i < count; i++) { + /* Fail the command if ID sent is zero */ + if (!ids[i]) { + ret = -EINVAL; + goto out_free; + } + } + + /* Search for duplicate JIT ids */ + for (i = 0; i < (count - 1); i++) { + u32 j; + + for (j = (i + 1); j < count; j++) { + if (ids[i] == ids[j]) { + ret = -EINVAL; + goto out_free; + } + } + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE; + list_add_tail(¤t_command->info.jit_free.node, + &kctx->csf.kcpu_queues.jit_cmds_head); + current_command->info.jit_free.ids = ids; + current_command->info.jit_free.count = count; + + return 0; +out_free: + kfree(ids); +out: + return ret; +} + +static int kbase_csf_queue_group_suspend_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_group_suspend_info *suspend_buf, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct kbase_suspend_copy_buffer *sus_buf = NULL; + u64 addr = suspend_buf->buffer; + u64 page_addr = addr & PAGE_MASK; + u64 end_addr = addr + suspend_buf->size - 1; + u64 last_page_addr = end_addr & PAGE_MASK; + int nr_pages = (last_page_addr - page_addr) / PAGE_SIZE + 1; + int pinned_pages = 0, ret = 0; + struct kbase_va_region *reg; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (suspend_buf->size < + kctx->kbdev->csf.global_iface.groups[0].suspend_size) + return -EINVAL; + + ret = kbase_csf_queue_group_handle_is_valid(kctx, + suspend_buf->group_handle); + if (ret) + return ret; + + sus_buf = kzalloc(sizeof(*sus_buf), GFP_KERNEL); + if (!sus_buf) + return -ENOMEM; + + sus_buf->size = suspend_buf->size; + sus_buf->nr_pages = nr_pages; + sus_buf->offset = addr & ~PAGE_MASK; + + sus_buf->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + if (!sus_buf->pages) { + ret = -ENOMEM; + goto out_clean_sus_buf; + } + + /* Check if the page_addr is a valid GPU VA from SAME_VA zone, + * otherwise consider it is a CPU VA corresponding to the Host + * memory allocated by userspace. + */ + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + page_addr); + + if (kbase_is_region_invalid_or_free(reg)) { + kbase_gpu_vm_unlock(kctx); + pinned_pages = get_user_pages_fast(page_addr, nr_pages, 1, + sus_buf->pages); + kbase_gpu_vm_lock(kctx); + + if (pinned_pages < 0) { + ret = pinned_pages; + goto out_clean_pages; + } + if (pinned_pages != nr_pages) { + ret = -EINVAL; + goto out_clean_pages; + } + } else { + struct tagged_addr *page_array; + u64 start, end, i; + + if (!(reg->flags & BASE_MEM_SAME_VA) || + reg->nr_pages < nr_pages || + kbase_reg_current_backed_size(reg) != + reg->nr_pages) { + ret = -EINVAL; + goto out_clean_pages; + } + + start = PFN_DOWN(page_addr) - reg->start_pfn; + end = start + nr_pages; + + if (end > reg->nr_pages) { + ret = -EINVAL; + goto out_clean_pages; + } + + sus_buf->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); + page_array = kbase_get_cpu_phy_pages(reg); + page_array += start; + + for (i = 0; i < nr_pages; i++, page_array++) + sus_buf->pages[i] = as_page(*page_array); + } + + kbase_gpu_vm_unlock(kctx); + current_command->type = BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND; + current_command->info.suspend_buf_copy.sus_buf = sus_buf; + current_command->info.suspend_buf_copy.group_handle = + suspend_buf->group_handle; + return ret; + +out_clean_pages: + kbase_gpu_vm_unlock(kctx); + kfree(sus_buf->pages); +out_clean_sus_buf: + kfree(sus_buf); + + return ret; +} + +static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx, + struct kbase_suspend_copy_buffer *sus_buf, + u8 group_handle) +{ + return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle); +} + +static enum kbase_csf_event_callback_action event_cqs_callback(void *param) +{ + struct kbase_kcpu_command_queue *kcpu_queue = + (struct kbase_kcpu_command_queue *)param; + struct kbase_context *const kctx = kcpu_queue->kctx; + + queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); + + return KBASE_CSF_EVENT_CALLBACK_KEEP; +} + +static void cleanup_cqs_wait(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_info *cqs_wait) +{ + WARN_ON(!cqs_wait->nr_objs); + WARN_ON(!cqs_wait->objs); + WARN_ON(!cqs_wait->signaled); + WARN_ON(!queue->cqs_wait_count); + + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(cqs_wait->signaled); + kfree(cqs_wait->objs); + cqs_wait->signaled = NULL; + cqs_wait->objs = NULL; +} + +static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_info *cqs_wait) +{ + u32 i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_wait->objs)) + return -EINVAL; + + /* Skip the CQS waits that have already been signaled when processing */ + for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) { + if (!test_bit(i, cqs_wait->signaled)) { + struct kbase_vmap_struct *mapping; + bool sig_set; + u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx, + cqs_wait->objs[i].addr, &mapping); + + if (!queue->command_started) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( + kbdev, queue); + queue->command_started = true; + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START, + queue, cqs_wait->nr_objs, 0); + } + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_wait->objs[i].addr); + queue->has_error = true; + return -EINVAL; + } + + sig_set = evt[BASEP_EVENT_VAL_INDEX] > cqs_wait->objs[i].val; + if (sig_set) { + bool error = false; + + bitmap_set(cqs_wait->signaled, i, 1); + if ((cqs_wait->inherit_err_flags & (1U << i)) && + evt[BASEP_EVENT_ERR_INDEX] > 0) { + queue->has_error = true; + error = true; + } + + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_END, + queue, cqs_wait->objs[i].addr, + error); + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( + kbdev, queue, + evt[BASEP_EVENT_ERR_INDEX]); + queue->command_started = false; + } + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + + if (!sig_set) + break; + } + } + + /* For the queue to progress further, all cqs objects should get + * signaled. + */ + return bitmap_full(cqs_wait->signaled, cqs_wait->nr_objs); +} + +static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue, + struct base_kcpu_command_cqs_wait_info *cqs_wait_info, + struct kbase_kcpu_command *current_command) +{ + struct base_cqs_wait_info *objs; + unsigned int nr_objs = cqs_wait_info->nr_objs; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + if (++queue->cqs_wait_count == 1) { + if (kbase_csf_event_wait_add(queue->kctx, + event_cqs_callback, queue)) { + kfree(objs); + queue->cqs_wait_count--; + return -ENOMEM; + } + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT; + current_command->info.cqs_wait.nr_objs = nr_objs; + current_command->info.cqs_wait.objs = objs; + current_command->info.cqs_wait.inherit_err_flags = + cqs_wait_info->inherit_err_flags; + + current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs), + sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL); + if (!current_command->info.cqs_wait.signaled) { + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(objs); + return -ENOMEM; + } + + return 0; +} + +static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_set_info *cqs_set) +{ + unsigned int i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_set->objs)) + return; + + for (i = 0; i < cqs_set->nr_objs; i++) { + struct kbase_vmap_struct *mapping; + u32 *evt; + + evt = (u32 *)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_set->objs[i].addr, &mapping); + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, + evt ? 0 : 1); + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_set->objs[i].addr); + queue->has_error = true; + } else { + evt[BASEP_EVENT_ERR_INDEX] = queue->has_error; + /* Set to signaled */ + evt[BASEP_EVENT_VAL_INDEX]++; + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + + KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_SET, + queue, cqs_set->objs[i].addr, + evt[BASEP_EVENT_ERR_INDEX]); + } + } + + kbase_csf_event_signal_notify_gpu(queue->kctx); + + kfree(cqs_set->objs); + cqs_set->objs = NULL; +} + +static int kbase_kcpu_cqs_set_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_cqs_set_info *cqs_set_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct base_cqs_set *objs; + unsigned int nr_objs = cqs_set_info->nr_objs; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_set_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET; + current_command->info.cqs_set.nr_objs = nr_objs; + current_command->info.cqs_set.objs = objs; + + return 0; +} + +static void cleanup_cqs_wait_operation(struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ + WARN_ON(!cqs_wait_operation->nr_objs); + WARN_ON(!cqs_wait_operation->objs); + WARN_ON(!cqs_wait_operation->signaled); + WARN_ON(!queue->cqs_wait_count); + + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(cqs_wait_operation->signaled); + kfree(cqs_wait_operation->objs); + cqs_wait_operation->signaled = NULL; + cqs_wait_operation->objs = NULL; +} + +static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_wait_operation_info *cqs_wait_operation) +{ + u32 i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_wait_operation->objs)) + return -EINVAL; + + /* Skip the CQS waits that have already been signaled when processing */ + for (i = find_first_zero_bit(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); i < cqs_wait_operation->nr_objs; i++) { + if (!test_bit(i, cqs_wait_operation->signaled)) { + struct kbase_vmap_struct *mapping; + bool sig_set; + u64 *evt = (u64 *)kbase_phy_alloc_mapping_get(queue->kctx, + cqs_wait_operation->objs[i].addr, &mapping); + + /* GPUCORE-28172 RDT to review */ + if (!queue->command_started) + queue->command_started = true; + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_wait_operation->objs[i].addr); + queue->has_error = true; + return -EINVAL; + } + + switch (cqs_wait_operation->objs[i].operation) { + case BASEP_CQS_WAIT_OPERATION_LE: + sig_set = *evt <= cqs_wait_operation->objs[i].val; + break; + case BASEP_CQS_WAIT_OPERATION_GT: + sig_set = *evt > cqs_wait_operation->objs[i].val; + break; + default: + dev_warn(kbdev->dev, + "Unsupported CQS wait operation %d", cqs_wait_operation->objs[i].operation); + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + queue->has_error = true; + + return -EINVAL; + } + + /* Increment evt up to the error_state value depending on the CQS data type */ + switch (cqs_wait_operation->objs[i].data_type) { + default: + dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_wait_operation->objs[i].data_type); + /* Fallthrough - hint to compiler that there's really only 2 options at present */ + case BASEP_CQS_DATA_TYPE_U32: + evt = (u64 *)((u8 *)evt + sizeof(u32)); + break; + case BASEP_CQS_DATA_TYPE_U64: + evt = (u64 *)((u8 *)evt + sizeof(u64)); + break; + } + + if (sig_set) { + bitmap_set(cqs_wait_operation->signaled, i, 1); + if ((cqs_wait_operation->inherit_err_flags & (1U << i)) && + *evt > 0) { + queue->has_error = true; + } + + /* GPUCORE-28172 RDT to review */ + + queue->command_started = false; + } + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + + if (!sig_set) + break; + } + } + + /* For the queue to progress further, all cqs objects should get + * signaled. + */ + return bitmap_full(cqs_wait_operation->signaled, cqs_wait_operation->nr_objs); +} + +static int kbase_kcpu_cqs_wait_operation_prepare(struct kbase_kcpu_command_queue *queue, + struct base_kcpu_command_cqs_wait_operation_info *cqs_wait_operation_info, + struct kbase_kcpu_command *current_command) +{ + struct base_cqs_wait_operation_info *objs; + unsigned int nr_objs = cqs_wait_operation_info->nr_objs; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_wait_operation_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + if (++queue->cqs_wait_count == 1) { + if (kbase_csf_event_wait_add(queue->kctx, + event_cqs_callback, queue)) { + kfree(objs); + queue->cqs_wait_count--; + return -ENOMEM; + } + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION; + current_command->info.cqs_wait_operation.nr_objs = nr_objs; + current_command->info.cqs_wait_operation.objs = objs; + current_command->info.cqs_wait_operation.inherit_err_flags = + cqs_wait_operation_info->inherit_err_flags; + + current_command->info.cqs_wait_operation.signaled = kcalloc(BITS_TO_LONGS(nr_objs), + sizeof(*current_command->info.cqs_wait_operation.signaled), GFP_KERNEL); + if (!current_command->info.cqs_wait_operation.signaled) { + if (--queue->cqs_wait_count == 0) { + kbase_csf_event_wait_remove(queue->kctx, + event_cqs_callback, queue); + } + + kfree(objs); + return -ENOMEM; + } + + return 0; +} + +static void kbase_kcpu_cqs_set_operation_process( + struct kbase_device *kbdev, + struct kbase_kcpu_command_queue *queue, + struct kbase_kcpu_command_cqs_set_operation_info *cqs_set_operation) +{ + unsigned int i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!cqs_set_operation->objs)) + return; + + for (i = 0; i < cqs_set_operation->nr_objs; i++) { + struct kbase_vmap_struct *mapping; + u64 *evt; + + evt = (u64 *)kbase_phy_alloc_mapping_get( + queue->kctx, cqs_set_operation->objs[i].addr, &mapping); + + /* GPUCORE-28172 RDT to review */ + + if (!evt) { + dev_warn(kbdev->dev, + "Sync memory %llx already freed", cqs_set_operation->objs[i].addr); + queue->has_error = true; + } else { + switch (cqs_set_operation->objs[i].operation) { + case BASEP_CQS_SET_OPERATION_ADD: + *evt += cqs_set_operation->objs[i].val; + break; + case BASEP_CQS_SET_OPERATION_SET: + *evt = cqs_set_operation->objs[i].val; + break; + default: + dev_warn(kbdev->dev, + "Unsupported CQS set operation %d", cqs_set_operation->objs[i].operation); + queue->has_error = true; + break; + } + + /* Increment evt up to the error_state value depending on the CQS data type */ + switch (cqs_set_operation->objs[i].data_type) { + default: + dev_warn(kbdev->dev, "Unreachable data_type=%d", cqs_set_operation->objs[i].data_type); + /* Fallthrough - hint to compiler that there's really only 2 options at present */ + case BASEP_CQS_DATA_TYPE_U32: + evt = (u64 *)((u8 *)evt + sizeof(u32)); + break; + case BASEP_CQS_DATA_TYPE_U64: + evt = (u64 *)((u8 *)evt + sizeof(u64)); + break; + } + + /* GPUCORE-28172 RDT to review */ + + /* Always propagate errors */ + *evt = queue->has_error; + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); + } + } + + kbase_csf_event_signal_notify_gpu(queue->kctx); + + kfree(cqs_set_operation->objs); + cqs_set_operation->objs = NULL; +} + +static int kbase_kcpu_cqs_set_operation_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_cqs_set_operation_info *cqs_set_operation_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + struct base_cqs_set_operation_info *objs; + unsigned int nr_objs = cqs_set_operation_info->nr_objs; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS) + return -EINVAL; + + if (!nr_objs) + return -EINVAL; + + objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL); + if (!objs) + return -ENOMEM; + + if (copy_from_user(objs, u64_to_user_ptr(cqs_set_operation_info->objs), + nr_objs * sizeof(*objs))) { + kfree(objs); + return -ENOMEM; + } + + current_command->type = BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION; + current_command->info.cqs_set_operation.nr_objs = nr_objs; + current_command->info.cqs_set_operation.objs = objs; + + return 0; +} + +#if IS_ENABLED(CONFIG_SYNC_FILE) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +static void kbase_csf_fence_wait_callback(struct fence *fence, + struct fence_cb *cb) +#else +static void kbase_csf_fence_wait_callback(struct dma_fence *fence, + struct dma_fence_cb *cb) +#endif +{ + struct kbase_kcpu_command_fence_info *fence_info = container_of(cb, + struct kbase_kcpu_command_fence_info, fence_cb); + struct kbase_kcpu_command_queue *kcpu_queue = fence_info->kcpu_queue; + struct kbase_context *const kctx = kcpu_queue->kctx; + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, kcpu_queue, + fence->context, fence->seqno); + + /* Resume kcpu command queue processing. */ + queue_work(kctx->csf.kcpu_queues.wq, &kcpu_queue->work); +} + +static void kbase_kcpu_fence_wait_cancel( + struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!fence_info->fence)) + return; + + if (kcpu_queue->fence_wait_processed) { + bool removed = dma_fence_remove_callback(fence_info->fence, + &fence_info->fence_cb); + + if (removed) + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_WAIT_END, + kcpu_queue, fence_info->fence->context, + fence_info->fence->seqno); + } + + /* Release the reference which is kept by the kcpu_queue */ + kbase_fence_put(fence_info->fence); + kcpu_queue->fence_wait_processed = false; + + fence_info->fence = NULL; +} + +/** + * kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command + * + * @kcpu_queue: The queue containing the fence wait command + * @fence_info: Reference to a fence for which the command is waiting + * + * Return: 0 if fence wait is blocked, 1 if it is unblocked, negative error if + * an error has occurred and fence should no longer be waited on. + */ +static int kbase_kcpu_fence_wait_process( + struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + int fence_status = 0; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + + lockdep_assert_held(&kcpu_queue->kctx->csf.kcpu_queues.lock); + + if (WARN_ON(!fence_info->fence)) + return -EINVAL; + + fence = fence_info->fence; + + if (kcpu_queue->fence_wait_processed) { + fence_status = dma_fence_get_status(fence); + } else { + int cb_err = dma_fence_add_callback(fence, + &fence_info->fence_cb, + kbase_csf_fence_wait_callback); + + KBASE_KTRACE_ADD_CSF_KCPU(kcpu_queue->kctx->kbdev, + FENCE_WAIT_START, kcpu_queue, + fence->context, fence->seqno); + fence_status = cb_err; + if (cb_err == 0) + kcpu_queue->fence_wait_processed = true; + else if (cb_err == -ENOENT) + fence_status = dma_fence_get_status(fence); + } + + /* + * At this point fence status can contain 3 types of values: + * - Value 0 to represent that fence in question is not signalled yet + * - Value 1 to represent that fence in question is signalled without + * errors + * - Negative error code to represent that some error has occurred such + * that waiting on it is no longer valid. + */ + + if (fence_status) + kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info); + + return fence_status; +} + +static int kbase_kcpu_fence_wait_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_in; +#else + struct dma_fence *fence_in; +#endif + struct base_fence fence; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), + sizeof(fence))) + return -ENOMEM; + + fence_in = sync_file_get_fence(fence.basep.fd); + + if (!fence_in) + return -ENOENT; + + current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_WAIT; + current_command->info.fence.fence = fence_in; + current_command->info.fence.kcpu_queue = kcpu_queue; + + return 0; +} + +static int kbase_kcpu_fence_signal_process( + struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + int ret; + + if (WARN_ON(!fence_info->fence)) + return -EINVAL; + + ret = dma_fence_signal(fence_info->fence); + + if (unlikely(ret < 0)) { + dev_warn(kctx->kbdev->dev, + "fence_signal() failed with %d\n", ret); + } + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, FENCE_SIGNAL, kcpu_queue, + fence_info->fence->context, + fence_info->fence->seqno); + + dma_fence_put(fence_info->fence); + fence_info->fence = NULL; + + return ret; +} + +static int kbase_kcpu_fence_signal_prepare( + struct kbase_kcpu_command_queue *kcpu_queue, + struct base_kcpu_command_fence_info *fence_info, + struct kbase_kcpu_command *current_command) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence_out; +#else + struct dma_fence *fence_out; +#endif + struct base_fence fence; + struct sync_file *sync_file; + int ret = 0; + int fd; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), + sizeof(fence))) + return -EFAULT; + + fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL); + if (!fence_out) + return -ENOMEM; + + dma_fence_init(fence_out, + &kbase_fence_ops, + &kbase_csf_fence_lock, + kcpu_queue->fence_context, + ++kcpu_queue->fence_seqno); + +#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) + /* Take an extra reference to the fence on behalf of the sync file. + * This is only needded on older kernels where sync_file_create() + * does not take its own reference. This was changed in v4.9.68 + * where sync_file_create() now takes its own reference. + */ + dma_fence_get(fence_out); +#endif + + /* create a sync_file fd representing the fence */ + sync_file = sync_file_create(fence_out); + if (!sync_file) { +#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE) + dma_fence_put(fence_out); +#endif + ret = -ENOMEM; + goto file_create_fail; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + ret = fd; + goto fd_flags_fail; + } + + fd_install(fd, sync_file->file); + + fence.basep.fd = fd; + + current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; + current_command->info.fence.fence = fence_out; + + if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence, + sizeof(fence))) { + ret = -EFAULT; + goto fd_flags_fail; + } + + return 0; + +fd_flags_fail: + fput(sync_file->file); +file_create_fail: + dma_fence_put(fence_out); + + return ret; +} +#endif /* CONFIG_SYNC_FILE */ + +static void kcpu_queue_process_worker(struct work_struct *data) +{ + struct kbase_kcpu_command_queue *queue = container_of(data, + struct kbase_kcpu_command_queue, work); + + mutex_lock(&queue->kctx->csf.kcpu_queues.lock); + + kcpu_queue_process(queue, false); + + mutex_unlock(&queue->kctx->csf.kcpu_queues.lock); +} + +static int delete_queue(struct kbase_context *kctx, u32 id) +{ + int err = 0; + + mutex_lock(&kctx->csf.kcpu_queues.lock); + + if ((id < KBASEP_MAX_KCPU_QUEUES) && kctx->csf.kcpu_queues.array[id]) { + struct kbase_kcpu_command_queue *queue = + kctx->csf.kcpu_queues.array[id]; + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_DESTROY, + queue, queue->num_pending_cmds, queue->cqs_wait_count); + + /* Drain the remaining work for this queue first and go past + * all the waits. + */ + kcpu_queue_process(queue, true); + + /* All commands should have been processed */ + WARN_ON(queue->num_pending_cmds); + + /* All CQS wait commands should have been cleaned up */ + WARN_ON(queue->cqs_wait_count); + + kctx->csf.kcpu_queues.array[id] = NULL; + bitmap_clear(kctx->csf.kcpu_queues.in_use, id, 1); + + /* Fire the tracepoint with the mutex held to enforce correct + * ordering with the summary stream. + */ + KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(kctx->kbdev, queue); + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + cancel_work_sync(&queue->work); + + kfree(queue); + } else { + dev_warn(kctx->kbdev->dev, + "Attempt to delete a non-existent KCPU queue\n"); + mutex_unlock(&kctx->csf.kcpu_queues.lock); + err = -EINVAL; + } + return err; +} + +static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( + struct kbase_device *kbdev, + const struct kbase_kcpu_command_queue *queue, + const struct kbase_kcpu_command_jit_alloc_info *jit_alloc, + int alloc_status) +{ + u8 i; + + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( + kbdev, queue); + for (i = 0; i < jit_alloc->count; i++) { + const u8 id = jit_alloc->info[i].id; + const struct kbase_va_region *reg = queue->kctx->jit_alloc[id]; + u64 gpu_alloc_addr = 0; + u64 mmu_flags = 0; + + if ((alloc_status == 0) && !WARN_ON(!reg) && + !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) { +#ifdef CONFIG_MALI_VECTOR_DUMP + struct tagged_addr phy = {0}; +#endif /* CONFIG_MALI_VECTOR_DUMP */ + + gpu_alloc_addr = reg->start_pfn << PAGE_SHIFT; +#ifdef CONFIG_MALI_VECTOR_DUMP + mmu_flags = kbase_mmu_create_ate(kbdev, + phy, reg->flags, + MIDGARD_MMU_BOTTOMLEVEL, + queue->kctx->jit_group_id); +#endif /* CONFIG_MALI_VECTOR_DUMP */ + } + KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( + kbdev, queue, alloc_status, gpu_alloc_addr, mmu_flags); + } +} + +static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( + struct kbase_device *kbdev, + const struct kbase_kcpu_command_queue *queue) +{ + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( + kbdev, queue); +} + +static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( + struct kbase_device *kbdev, + const struct kbase_kcpu_command_queue *queue) +{ + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( + kbdev, queue); +} + +static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, + bool ignore_waits) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + bool process_next = true; + size_t i; + + lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock); + + for (i = 0; i != queue->num_pending_cmds; ++i) { + struct kbase_kcpu_command *cmd = + &queue->commands[(u8)(queue->start_offset + i)]; + int status; + + switch (cmd->type) { + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: + if (!queue->command_started) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( + kbdev, queue); + queue->command_started = true; + } + + status = 0; +#if IS_ENABLED(CONFIG_SYNC_FILE) + if (ignore_waits) { + kbase_kcpu_fence_wait_cancel(queue, + &cmd->info.fence); + } else { + status = kbase_kcpu_fence_wait_process(queue, + &cmd->info.fence); + + if (status == 0) + process_next = false; + else if (status < 0) + queue->has_error = true; + } +#else + dev_warn(kbdev->dev, + "unexpected fence wait command found\n"); + + status = -EINVAL; + queue->has_error = true; +#endif + + if (process_next) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( + kbdev, queue, status < 0 ? status : 0); + queue->command_started = false; + } + break; + case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( + kbdev, queue); + + status = 0; + +#if IS_ENABLED(CONFIG_SYNC_FILE) + status = kbase_kcpu_fence_signal_process( + queue, &cmd->info.fence); + + if (status < 0) + queue->has_error = true; +#else + dev_warn(kbdev->dev, + "unexpected fence signal command found\n"); + + status = -EINVAL; + queue->has_error = true; +#endif + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( + kbdev, queue, status); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + status = kbase_kcpu_cqs_wait_process(kbdev, queue, + &cmd->info.cqs_wait); + + if (!status && !ignore_waits) { + process_next = false; + } else { + /* Either all CQS objects were signaled or + * there was an error or the queue itself is + * being deleted. + * In all cases can move to the next command. + * TBD: handle the error + */ + cleanup_cqs_wait(queue, &cmd->info.cqs_wait); + } + + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET: + kbase_kcpu_cqs_set_process(kbdev, queue, + &cmd->info.cqs_set); + + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + status = kbase_kcpu_cqs_wait_operation_process(kbdev, queue, + &cmd->info.cqs_wait_operation); + + if (!status && !ignore_waits) { + process_next = false; + } else { + /* Either all CQS objects were signaled or + * there was an error or the queue itself is + * being deleted. + * In all cases can move to the next command. + * TBD: handle the error + */ + cleanup_cqs_wait_operation(queue, &cmd->info.cqs_wait_operation); + } + + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + kbase_kcpu_cqs_set_operation_process(kbdev, queue, + &cmd->info.cqs_set_operation); + + break; + case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: + /* Clear the queue's error state */ + queue->has_error = false; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( + kbdev, queue); + break; + case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: { + struct kbase_ctx_ext_res_meta *meta = NULL; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( + kbdev, queue); + + kbase_gpu_vm_lock(queue->kctx); + meta = kbase_sticky_resource_acquire( + queue->kctx, cmd->info.import.gpu_va); + kbase_gpu_vm_unlock(queue->kctx); + + if (meta == NULL) { + queue->has_error = true; + dev_warn(kbdev->dev, + "failed to map an external resource\n"); + } + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( + kbdev, queue, meta ? 0 : 1); + break; + } + case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: { + bool ret; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( + kbdev, queue); + + kbase_gpu_vm_lock(queue->kctx); + ret = kbase_sticky_resource_release( + queue->kctx, NULL, cmd->info.import.gpu_va); + kbase_gpu_vm_unlock(queue->kctx); + + if (!ret) { + queue->has_error = true; + dev_warn(kbdev->dev, + "failed to release the reference. resource not found\n"); + } + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( + kbdev, queue, ret ? 0 : 1); + break; + } + case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: { + bool ret; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( + kbdev, queue); + + kbase_gpu_vm_lock(queue->kctx); + ret = kbase_sticky_resource_release_force( + queue->kctx, NULL, cmd->info.import.gpu_va); + kbase_gpu_vm_unlock(queue->kctx); + + if (!ret) { + queue->has_error = true; + dev_warn(kbdev->dev, + "failed to release the reference. resource not found\n"); + } + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( + kbdev, queue, ret ? 0 : 1); + break; + } + case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: + { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( + kbdev, queue); + + status = kbase_kcpu_jit_allocate_process(queue, cmd); + if (status == -EAGAIN) { + process_next = false; + } else { + if (status != 0) + queue->has_error = true; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO( + kbdev, queue, &cmd->info.jit_alloc, + status); + + kbase_kcpu_jit_allocate_finish(queue, cmd); + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( + kbdev, queue); + } + break; + } + case BASE_KCPU_COMMAND_TYPE_JIT_FREE: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( + kbdev, queue); + + status = kbase_kcpu_jit_free_process(queue, cmd); + if (status) + queue->has_error = true; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END( + kbdev, queue); + break; + case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: { + struct kbase_suspend_copy_buffer *sus_buf = + cmd->info.suspend_buf_copy.sus_buf; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( + kbdev, queue); + + status = kbase_csf_queue_group_suspend_process( + queue->kctx, sus_buf, + cmd->info.suspend_buf_copy.group_handle); + if (status) + queue->has_error = true; + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( + kbdev, queue, status); + + if (!sus_buf->cpu_alloc) { + int i; + + for (i = 0; i < sus_buf->nr_pages; i++) + put_page(sus_buf->pages[i]); + } else { + kbase_mem_phy_alloc_kernel_unmapped( + sus_buf->cpu_alloc); + kbase_mem_phy_alloc_put(sus_buf->cpu_alloc); + } + + kfree(sus_buf->pages); + kfree(sus_buf); + break; + } +#if MALI_UNIT_TEST + case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: { + u64 time = ktime_get_raw_ns(); + void *target_page = kmap(*cmd->info.sample_time.page); + + if (target_page) { + memcpy(target_page + + cmd->info.sample_time.page_offset, + &time, sizeof(time)); + kunmap(*cmd->info.sample_time.page); + } else { + dev_warn(kbdev->dev, + "Could not kmap target page\n"); + queue->has_error = true; + } + put_page(*cmd->info.sample_time.page); + kfree(cmd->info.sample_time.page); + break; + } +#endif /* MALI_UNIT_TEST */ + default: + dev_warn(kbdev->dev, + "Unrecognized command type\n"); + break; + } /* switch */ + + /*TBD: error handling */ + + if (!process_next) + break; + } + + if (i > 0) { + queue->start_offset += i; + queue->num_pending_cmds -= i; + + /* If an attempt to enqueue commands failed then we must raise + * an event in case the client wants to retry now that there is + * free space in the buffer. + */ + if (queue->enqueue_failed) { + queue->enqueue_failed = false; + kbase_csf_event_signal_cpu_only(queue->kctx); + } + } +} + +static size_t kcpu_queue_get_space(struct kbase_kcpu_command_queue *queue) +{ + return KBASEP_KCPU_QUEUE_SIZE - queue->num_pending_cmds; +} + +static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( + const struct kbase_kcpu_command_queue *queue, + const struct kbase_kcpu_command *cmd) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + + switch (cmd->type) { + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( + kbdev, queue, cmd->info.fence.fence); + break; + case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( + kbdev, queue, cmd->info.fence.fence); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + { + const struct base_cqs_wait_info *waits = + cmd->info.cqs_wait.objs; + u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags; + unsigned int i; + + for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( + kbdev, queue, waits[i].addr, waits[i].val, + (inherit_err_flags & ((u32)1 << i)) ? 1 : 0); + } + break; + } + case BASE_KCPU_COMMAND_TYPE_CQS_SET: + { + const struct base_cqs_set *sets = cmd->info.cqs_set.objs; + unsigned int i; + + for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( + kbdev, queue, sets[i].addr); + } + break; + } + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + { + /* GPUCORE-28172 RDT to review */ + break; + } + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + { + /* GPUCORE-28172 RDT to review */ + break; + } + case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, + queue); + break; + case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( + kbdev, queue, cmd->info.import.gpu_va); + break; + case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( + kbdev, queue, cmd->info.import.gpu_va); + break; + case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( + kbdev, queue, cmd->info.import.gpu_va); + break; + case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: + { + u8 i; + + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( + kbdev, queue); + for (i = 0; i < cmd->info.jit_alloc.count; i++) { + const struct base_jit_alloc_info *info = + &cmd->info.jit_alloc.info[i]; + + KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( + kbdev, queue, info->gpu_alloc_addr, + info->va_pages, info->commit_pages, + info->extension, info->id, info->bin_id, + info->max_allocations, info->flags, + info->usage_id); + } + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( + kbdev, queue); + break; + } + case BASE_KCPU_COMMAND_TYPE_JIT_FREE: + { + u8 i; + + KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( + kbdev, queue); + for (i = 0; i < cmd->info.jit_free.count; i++) { + KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( + kbdev, queue, cmd->info.jit_free.ids[i]); + } + KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( + kbdev, queue); + break; + } + case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( + kbdev, queue, cmd->info.suspend_buf_copy.sus_buf, + cmd->info.suspend_buf_copy.group_handle); + break; +#if MALI_UNIT_TEST + case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: + /* + * This is test-only KCPU command, no need to have a timeline + * entry + */ + break; +#endif /* MALI_UNIT_TEST */ + } +} + +int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_enqueue *enq) +{ + struct kbase_kcpu_command_queue *queue = NULL; + void __user *user_cmds = u64_to_user_ptr(enq->addr); + int ret = 0; + u32 i; + + /* The offset to the first command that is being processed or yet to + * be processed is of u8 type, so the number of commands inside the + * queue cannot be more than 256. + */ + BUILD_BUG_ON(KBASEP_KCPU_QUEUE_SIZE > 256); + + /* Whilst the backend interface allows enqueueing multiple commands in + * a single operation, the Base interface does not expose any mechanism + * to do so. And also right now the handling is missing for the case + * where multiple commands are submitted and the enqueue of one of the + * command in the set fails after successfully enqueuing other commands + * in the set. + */ + if (enq->nr_commands != 1) { + dev_err(kctx->kbdev->dev, + "More than one commands enqueued\n"); + return -EINVAL; + } + + mutex_lock(&kctx->csf.kcpu_queues.lock); + + if (!kctx->csf.kcpu_queues.array[enq->id]) { + ret = -EINVAL; + goto out; + } + + queue = kctx->csf.kcpu_queues.array[enq->id]; + + if (kcpu_queue_get_space(queue) < enq->nr_commands) { + ret = -EBUSY; + queue->enqueue_failed = true; + goto out; + } + + /* Copy all command's info to the command buffer. + * Note: it would be more efficient to process all commands in-line + * until we encounter an unresolved CQS_ / FENCE_WAIT, however, the + * interface allows multiple commands to be enqueued so we must account + * for the possibility to roll back. + */ + + for (i = 0; (i != enq->nr_commands) && !ret; ++i, ++kctx->csf.kcpu_queues.num_cmds) { + struct kbase_kcpu_command *kcpu_cmd = + &queue->commands[(u8)(queue->start_offset + queue->num_pending_cmds + i)]; + struct base_kcpu_command command; + unsigned int j; + + if (copy_from_user(&command, user_cmds, sizeof(command))) { + ret = -EFAULT; + goto out; + } + + user_cmds = (void __user *)((uintptr_t)user_cmds + + sizeof(struct base_kcpu_command)); + + for (j = 0; j < sizeof(command.padding); j++) { + if (command.padding[j] != 0) { + dev_dbg(kctx->kbdev->dev, + "base_kcpu_command padding not 0\n"); + ret = -EINVAL; + goto out; + } + } + + kcpu_cmd->enqueue_ts = kctx->csf.kcpu_queues.num_cmds; + switch (command.type) { + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: +#if IS_ENABLED(CONFIG_SYNC_FILE) + ret = kbase_kcpu_fence_wait_prepare(queue, + &command.info.fence, kcpu_cmd); +#else + ret = -EINVAL; + dev_warn(kctx->kbdev->dev, "fence wait command unsupported\n"); +#endif + break; + case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: +#if IS_ENABLED(CONFIG_SYNC_FILE) + ret = kbase_kcpu_fence_signal_prepare(queue, + &command.info.fence, kcpu_cmd); +#else + ret = -EINVAL; + dev_warn(kctx->kbdev->dev, "fence signal command unsupported\n"); +#endif + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + ret = kbase_kcpu_cqs_wait_prepare(queue, + &command.info.cqs_wait, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET: + ret = kbase_kcpu_cqs_set_prepare(queue, + &command.info.cqs_set, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: + ret = kbase_kcpu_cqs_wait_operation_prepare(queue, + &command.info.cqs_wait_operation, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: + ret = kbase_kcpu_cqs_set_operation_prepare(queue, + &command.info.cqs_set_operation, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: + kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER; + ret = 0; + break; + case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: + ret = kbase_kcpu_map_import_prepare(queue, + &command.info.import, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: + ret = kbase_kcpu_unmap_import_prepare(queue, + &command.info.import, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: + ret = kbase_kcpu_unmap_import_force_prepare(queue, + &command.info.import, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: + ret = kbase_kcpu_jit_allocate_prepare(queue, + &command.info.jit_alloc, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_JIT_FREE: + ret = kbase_kcpu_jit_free_prepare(queue, + &command.info.jit_free, kcpu_cmd); + break; + case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: + ret = kbase_csf_queue_group_suspend_prepare(queue, + &command.info.suspend_buf_copy, + kcpu_cmd); + break; +#if MALI_UNIT_TEST + case BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: { + int const page_cnt = 1; + + kcpu_cmd->type = BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME; + kcpu_cmd->info.sample_time.page_addr = + command.info.sample_time.time & PAGE_MASK; + kcpu_cmd->info.sample_time.page_offset = + command.info.sample_time.time & ~PAGE_MASK; + kcpu_cmd->info.sample_time.page = kcalloc( + page_cnt, sizeof(struct page *), GFP_KERNEL); + if (!kcpu_cmd->info.sample_time.page) { + ret = -ENOMEM; + } else { + int pinned_pages = get_user_pages_fast( + kcpu_cmd->info.sample_time.page_addr, + page_cnt, 1, + kcpu_cmd->info.sample_time.page); + + if (pinned_pages < 0) { + ret = pinned_pages; + kfree(kcpu_cmd->info.sample_time.page); + } else if (pinned_pages != page_cnt) { + ret = -EINVAL; + kfree(kcpu_cmd->info.sample_time.page); + } + } + + break; + } +#endif /* MALI_UNIT_TEST */ + default: + dev_warn(queue->kctx->kbdev->dev, + "Unknown command type %u\n", command.type); + ret = -EINVAL; + break; + } + } + + if (!ret) { + /* We only instrument the enqueues after all commands have been + * successfully enqueued, as if we do them during the enqueue + * and there is an error, we won't be able to roll them back + * like is done for the command enqueues themselves. + */ + for (i = 0; i != enq->nr_commands; ++i) { + u8 cmd_idx = (u8)(queue->start_offset + queue->num_pending_cmds + i); + + KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND( + queue, &queue->commands[cmd_idx]); + } + + queue->num_pending_cmds += enq->nr_commands; + kcpu_queue_process(queue, false); + } else { + /* Roll back the number of enqueued commands */ + kctx->csf.kcpu_queues.num_cmds -= i; + } + +out: + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + return ret; +} + +int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx) +{ + int idx; + + bitmap_zero(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); + + for (idx = 0; idx < KBASEP_MAX_KCPU_QUEUES; ++idx) + kctx->csf.kcpu_queues.array[idx] = NULL; + + kctx->csf.kcpu_queues.wq = alloc_workqueue("mali_kbase_csf_kcpu", + WQ_UNBOUND | WQ_HIGHPRI, 0); + if (!kctx->csf.kcpu_queues.wq) + return -ENOMEM; + + mutex_init(&kctx->csf.kcpu_queues.lock); + + kctx->csf.kcpu_queues.num_cmds = 0; + + return 0; +} + +void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx) +{ + while (!bitmap_empty(kctx->csf.kcpu_queues.in_use, + KBASEP_MAX_KCPU_QUEUES)) { + int id = find_first_bit(kctx->csf.kcpu_queues.in_use, + KBASEP_MAX_KCPU_QUEUES); + + if (WARN_ON(!kctx->csf.kcpu_queues.array[id])) + clear_bit(id, kctx->csf.kcpu_queues.in_use); + else + (void)delete_queue(kctx, id); + } + + destroy_workqueue(kctx->csf.kcpu_queues.wq); + mutex_destroy(&kctx->csf.kcpu_queues.lock); +} + +int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_delete *del) +{ + return delete_queue(kctx, (u32)del->id); +} + +int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_new *newq) +{ + struct kbase_kcpu_command_queue *queue; + int idx; + int ret = 0; + + /* The queue id is of u8 type and we use the index of the kcpu_queues + * array as an id, so the number of elements in the array can't be + * more than 256. + */ + BUILD_BUG_ON(KBASEP_MAX_KCPU_QUEUES > 256); + + mutex_lock(&kctx->csf.kcpu_queues.lock); + + idx = find_first_zero_bit(kctx->csf.kcpu_queues.in_use, + KBASEP_MAX_KCPU_QUEUES); + if (idx >= (int)KBASEP_MAX_KCPU_QUEUES) { + ret = -ENOMEM; + goto out; + } + + if (WARN_ON(kctx->csf.kcpu_queues.array[idx])) { + ret = -EINVAL; + goto out; + } + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + + if (!queue) { + ret = -ENOMEM; + goto out; + } + + bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1); + kctx->csf.kcpu_queues.array[idx] = queue; + queue->kctx = kctx; + queue->start_offset = 0; + queue->num_pending_cmds = 0; +#if IS_ENABLED(CONFIG_SYNC_FILE) + queue->fence_context = dma_fence_context_alloc(1); + queue->fence_seqno = 0; + queue->fence_wait_processed = false; +#endif + queue->enqueue_failed = false; + queue->command_started = false; + INIT_LIST_HEAD(&queue->jit_blocked); + queue->has_error = false; + INIT_WORK(&queue->work, kcpu_queue_process_worker); + queue->id = idx; + + newq->id = idx; + + /* Fire the tracepoint with the mutex held to enforce correct ordering + * with the summary stream. + */ + KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( + kctx->kbdev, queue, kctx->id, queue->num_pending_cmds); + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue, + queue->fence_context, 0); +out: + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + return ret; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h new file mode 100644 index 0000000..2f6da55 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_KCPU_H_ +#define _KBASE_CSF_KCPU_H_ + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +#include +#else +#include +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + +/* The maximum number of KCPU commands in flight, enqueueing more commands + * than this value shall block. + */ +#define KBASEP_KCPU_QUEUE_SIZE ((size_t)256) + +/** + * struct kbase_kcpu_command_import_info - Structure which holds information + * about the buffer to be imported + * + * @gpu_va: Address of the buffer to be imported. + */ +struct kbase_kcpu_command_import_info { + u64 gpu_va; +}; + +/** + * struct kbase_kcpu_command_fence_info - Structure which holds information + * about the fence object enqueued in the kcpu command queue + * + * @fence_cb: Fence callback + * @fence: Fence + * @kcpu_queue: kcpu command queue + */ +struct kbase_kcpu_command_fence_info { +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence_cb fence_cb; + struct fence *fence; +#else + struct dma_fence_cb fence_cb; + struct dma_fence *fence; +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ + struct kbase_kcpu_command_queue *kcpu_queue; +}; + +/** + * struct kbase_kcpu_command_cqs_set_info - Structure which holds information + * about CQS objects for the kcpu CQS set command + * + * @objs: Array of structures which define CQS objects to be used by + * the kcpu command. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_set_info { + struct base_cqs_set *objs; + unsigned int nr_objs; +}; + +/** + * struct kbase_kcpu_command_cqs_wait_info - Structure which holds information + * about CQS objects for the kcpu CQS wait command + * + * @objs: Array of structures which define CQS objects to be used by + * the kcpu command. + * @signaled: Bit array used to report the status of the CQS wait objects. + * 1 is signaled, 0 otherwise. + * @nr_objs: Number of CQS objects in the array. + * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field + * to be served as the source for importing into the + * queue's error-state. + */ +struct kbase_kcpu_command_cqs_wait_info { + struct base_cqs_wait_info *objs; + unsigned long *signaled; + unsigned int nr_objs; + u32 inherit_err_flags; +}; + +/** + * struct kbase_kcpu_command_cqs_set_operation_info - Structure which holds information + * about CQS objects for the kcpu CQS timeline set command + * + * @objs: Array of structures which define CQS timeline objects to be used by + * the kcpu command. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_set_operation_info { + struct base_cqs_set_operation_info *objs; + unsigned int nr_objs; +}; + +/** + * struct kbase_kcpu_command_cqs_wait_operation_info - Structure which holds information + * about CQS objects for the kcpu CQS timeline wait command + * + * @objs: Array of structures which define CQS timeline objects to be used by + * the kcpu command. + * @signaled: Bit array used to report the status of the CQS wait objects. + * 1 is signaled, 0 otherwise. + * @nr_objs: Number of CQS objects in the array. + */ +struct kbase_kcpu_command_cqs_wait_operation_info { + struct base_cqs_wait_operation_info *objs; + unsigned long *signaled; + unsigned int nr_objs; + u32 inherit_err_flags; +}; + +/** + * struct kbase_kcpu_command_jit_alloc_info - Structure which holds information + * needed for the kcpu command for jit allocations + * + * @node: Used to keep track of all JIT free/alloc commands in submission + * order. This must be located in the front of this struct to + * match that of kbase_kcpu_command_jit_free_info. + * @info: Array of objects of the struct base_jit_alloc_info type which + * specify jit allocations to be made by the kcpu command. + * @count: Number of jit alloc objects in the array. + * @blocked: Whether this allocation has been put into the pending list to + * be retried later. + */ +struct kbase_kcpu_command_jit_alloc_info { + struct list_head node; + struct base_jit_alloc_info *info; + u8 count; + bool blocked; +}; + +/** + * struct kbase_kcpu_command_jit_free_info - Structure which holds information + * needed for the kcpu jit free command + * + * @node: Used to keep track of all JIT free/alloc commands in submission + * order. This must be located in the front of this struct to + * match that of kbase_kcpu_command_jit_alloc_info. + * @ids: Array of identifiers of jit allocations which are to be freed + * by the kcpu command. + * @count: Number of elements in the array. + */ +struct kbase_kcpu_command_jit_free_info { + struct list_head node; + u8 *ids; + u8 count; +}; + +/** + * struct kbase_suspend_copy_buffer - information about the suspend buffer + * to be copied. + * + * @size: size of the suspend buffer in bytes. + * @pages: pointer to an array of pointers to the pages which contain + * the user buffer. + * @nr_pages: number of pages. + * @offset: offset into the pages + * @cpu_alloc: Reference to physical pages of suspend buffer allocation. + */ +struct kbase_suspend_copy_buffer { + size_t size; + struct page **pages; + int nr_pages; + size_t offset; + struct kbase_mem_phy_alloc *cpu_alloc; +}; + +/** + * struct base_kcpu_command_group_suspend - structure which contains + * suspend buffer data captured for a suspended queue group. + * + * @sus_buf: Pointer to the structure which contains details of the + * user buffer and its kernel pinned pages. + * @group_handle: Handle to the mapping of CSG. + */ +struct kbase_kcpu_command_group_suspend_info { + struct kbase_suspend_copy_buffer *sus_buf; + u8 group_handle; +}; + +#if MALI_UNIT_TEST +struct kbase_kcpu_command_sample_time_info { + u64 page_addr; + u64 page_offset; + struct page **page; +}; +#endif /* MALI_UNIT_TEST */ + +/** + * struct kbase_cpu_command - Command which is to be part of the kernel + * command queue + * + * @type: Type of the command. + * @enqueue_ts: Denotes the relative time of enqueueing, a smaller value + * indicates that it has been enqueued earlier. + * @info: Structure which holds information about the command + * dependent on the command type. + * @info.fence: Fence + * @info.cqs_wait: CQS wait + * @info.cqs_set: CQS set + * @info.import: import + * @info.jit_alloc: jit allocation + * @info.jit_free: jit deallocation + * @info.suspend_buf_copy: suspend buffer copy + * @info.sample_time: sample time + */ +struct kbase_kcpu_command { + enum base_kcpu_command_type type; + u64 enqueue_ts; + union { + struct kbase_kcpu_command_fence_info fence; + struct kbase_kcpu_command_cqs_wait_info cqs_wait; + struct kbase_kcpu_command_cqs_set_info cqs_set; + struct kbase_kcpu_command_cqs_wait_operation_info cqs_wait_operation; + struct kbase_kcpu_command_cqs_set_operation_info cqs_set_operation; + struct kbase_kcpu_command_import_info import; + struct kbase_kcpu_command_jit_alloc_info jit_alloc; + struct kbase_kcpu_command_jit_free_info jit_free; + struct kbase_kcpu_command_group_suspend_info suspend_buf_copy; +#if MALI_UNIT_TEST + struct kbase_kcpu_command_sample_time_info sample_time; +#endif /* MALI_UNIT_TEST */ + } info; +}; + +/** + * struct kbase_kcpu_command_queue - a command queue executed by the kernel + * + * @kctx: The context to which this command queue belongs. + * @commands: Array of commands which have been successfully + * enqueued to this command queue. + * @work: struct work_struct which contains a pointer to + * the function which handles processing of kcpu + * commands enqueued into a kcpu command queue; + * part of kernel API for processing workqueues + * @start_offset: Index of the command to be executed next + * @id: KCPU command queue ID. + * @num_pending_cmds: The number of commands enqueued but not yet + * executed or pending + * @cqs_wait_count: Tracks the number of CQS wait commands enqueued + * @fence_context: The dma-buf fence context number for this kcpu + * queue. A unique context number is allocated for + * each kcpu queue. + * @fence_seqno: The dma-buf fence sequence number for the fence + * that is returned on the enqueue of fence signal + * command. This is increased every time the + * fence signal command is queued. + * @fence_wait_processed: Used to avoid reprocessing of the fence wait + * command which has blocked the processing of + * commands that follow it. + * @enqueue_failed: Indicates that no space has become available in + * the buffer since an enqueue operation failed + * because of insufficient free space. + * @command_started: Indicates that the command at the front of the + * queue has been started in a previous queue + * process, but was not completed due to some + * unmet dependencies. Ensures that instrumentation + * of the execution start of these commands is only + * fired exactly once. + * @has_error: Indicates that the kcpu queue is in error mode + * or without errors since last cleaned. + * @jit_blocked: Used to keep track of command queues blocked + * by a pending JIT allocation command. + */ +struct kbase_kcpu_command_queue { + struct kbase_context *kctx; + struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; + struct work_struct work; + u8 start_offset; + u8 id; + u16 num_pending_cmds; + u32 cqs_wait_count; + u64 fence_context; + unsigned int fence_seqno; + bool fence_wait_processed; + bool enqueue_failed; + bool command_started; + struct list_head jit_blocked; + bool has_error; +}; + +/** + * kbase_csf_kcpu_queue_new - Create new KCPU command queue. + * + * @kctx: Pointer to the kbase context within which the KCPU command + * queue will be created. + * @newq: Pointer to the structure which contains information about + * the new KCPU command queue to be created. + */ +int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_new *newq); + +/** + * kbase_csf_kcpu_queue_delete - Delete KCPU command queue. + * + * Return: 0 if successful, -EINVAL if the queue ID is invalid. + * + * @kctx: Pointer to the kbase context from which the KCPU command + * queue is to be deleted. + * @del: Pointer to the structure which specifies the KCPU command + * queue to be deleted. + */ +int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_delete *del); + +/** + * kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command + * queue. + * + * @kctx: Pointer to the kbase context within which the KCPU command + * is to be enqueued into the KCPU command queue. + * @enq: Pointer to the structure which specifies the KCPU command + * as well as the KCPU command queue into which the command + * is to be enqueued. + */ +int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_enqueue *enq); + +/** + * kbase_csf_kcpu_queue_context_init - Initialize the kernel CPU queues context + * for a GPU address space + * + * @kctx: Pointer to the kbase context being initialized. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx); + +/** + * kbase_csf_kcpu_queue_context_term - Terminate the kernel CPU queues context + * for a GPU address space + * + * This function deletes any kernel CPU queues that weren't deleted before + * context termination. + * + * @kctx: Pointer to the kbase context being terminated. + */ +void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx); + +#endif /* _KBASE_CSF_KCPU_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c new file mode 100644 index 0000000..0a2cde0 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_kcpu_debugfs.h" +#include +#include + +#if IS_ENABLED(CONFIG_SYNC_FILE) +#include "mali_kbase_sync.h" +#endif + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** + * kbasep_csf_kcpu_debugfs_print_queue() - Print additional info for KCPU + * queues blocked on CQS wait commands. + * + * @file: The seq_file to print to + * @kctx: The context of the KCPU queue + * @waits: Pointer to the KCPU CQS wait command info + */ +static void kbasep_csf_kcpu_debugfs_print_cqs_waits(struct seq_file *file, + struct kbase_context *kctx, + struct kbase_kcpu_command_cqs_wait_info *waits) +{ + unsigned int i; + + for (i = 0; i < waits->nr_objs; i++) { + struct kbase_vmap_struct *mapping; + u32 val; + char const *msg; + u32 *const cpu_ptr = (u32 *)kbase_phy_alloc_mapping_get(kctx, + waits->objs[i].addr, &mapping); + + if (!cpu_ptr) + return; + + val = *cpu_ptr; + + kbase_phy_alloc_mapping_put(kctx, mapping); + + msg = (waits->inherit_err_flags && (1U << i)) ? "true" : + "false"; + seq_printf(file, " %llx(%u > %u, inherit_err: %s), ", + waits->objs[i].addr, val, waits->objs[i].val, msg); + } +} + +/** + * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue + * + * @file: The seq_file to print to + * @kctx: The context of the KCPU queue + * @queue: Pointer to the KCPU queue + */ +static void kbasep_csf_kcpu_debugfs_print_queue(struct seq_file *file, + struct kbase_context *kctx, + struct kbase_kcpu_command_queue *queue) +{ + if (WARN_ON(!queue)) + return; + + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + seq_printf(file, "%16u, %11u, %7u, %13llu %8u", + queue->num_pending_cmds, queue->enqueue_failed, + queue->command_started ? 1 : 0, + queue->fence_context, queue->fence_seqno); + + if (queue->command_started) { + struct kbase_kcpu_command *cmd = + &queue->commands[queue->start_offset]; + switch (cmd->type) { +#if IS_ENABLED(CONFIG_SYNC_FILE) + case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: + { + struct kbase_sync_fence_info info; + + kbase_sync_fence_info_get(cmd->info.fence.fence, &info); + seq_printf(file, ", Fence %pK %s %s", + info.fence, info.name, + kbase_sync_status_string(info.status)); + break; + } +#endif + case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: + seq_puts(file, ", CQS "); + kbasep_csf_kcpu_debugfs_print_cqs_waits(file, kctx, + &cmd->info.cqs_wait); + break; + default: + seq_puts(file, ", U, Unknown blocking command"); + break; + } + } + + seq_puts(file, "\n"); +} + +/** + * kbasep_csf_kcpu_debugfs_show() - Print the KCPU queues debug information + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase_context + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_kcpu_debugfs_show(struct seq_file *file, void *data) +{ + struct kbase_context *kctx = file->private; + unsigned long idx; + + seq_printf(file, "MALI_CSF_KCPU_DEBUGFS_VERSION: v%u\n", MALI_CSF_KCPU_DEBUGFS_VERSION); + seq_puts(file, "Queue Idx(err-mode), Pending Commands, Enqueue err, Blocked, Fence context & seqno, (Wait Type, Additional info)\n"); + mutex_lock(&kctx->csf.kcpu_queues.lock); + + idx = find_first_bit(kctx->csf.kcpu_queues.in_use, + KBASEP_MAX_KCPU_QUEUES); + + while (idx < KBASEP_MAX_KCPU_QUEUES) { + struct kbase_kcpu_command_queue *queue = + kctx->csf.kcpu_queues.array[idx]; + + seq_printf(file, "%9lu( %s ), ", idx, + queue->has_error ? "InErr" : "NoErr"); + kbasep_csf_kcpu_debugfs_print_queue(file, kctx, + kctx->csf.kcpu_queues.array[idx]); + + idx = find_next_bit(kctx->csf.kcpu_queues.in_use, + KBASEP_MAX_KCPU_QUEUES, idx + 1); + } + + mutex_unlock(&kctx->csf.kcpu_queues.lock); + return 0; +} + +static int kbasep_csf_kcpu_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_csf_kcpu_debugfs_show, in->i_private); +} + +static const struct file_operations kbasep_csf_kcpu_debugfs_fops = { + .open = kbasep_csf_kcpu_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("kcpu_queues", mode, kctx->kctx_dentry, + kctx, &kbasep_csf_kcpu_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kctx->kbdev->dev, + "Unable to create KCPU debugfs entry"); + } +} + + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h new file mode 100644 index 0000000..08f2fda --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_kcpu_debugfs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_KCPU_DEBUGFS_H_ +#define _KBASE_CSF_KCPU_DEBUGFS_H_ + +/* Forward declaration */ +struct kbase_context; + +#define MALI_CSF_KCPU_DEBUGFS_VERSION 0 + +/** + * kbase_csf_kcpu_debugfs_init() - Create a debugfs entry for KCPU queues + * + * @kctx: The kbase_context for which to create the debugfs entry + */ +void kbase_csf_kcpu_debugfs_init(struct kbase_context *kctx); + +#endif /* _KBASE_CSF_KCPU_DEBUGFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c new file mode 100644 index 0000000..5997483 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_protected_memory.h" +#include + +#if IS_ENABLED(CONFIG_OF) +#include +#endif + +int kbase_csf_protected_memory_init(struct kbase_device *const kbdev) +{ + int err = 0; + +#if IS_ENABLED(CONFIG_OF) + struct device_node *pma_node = of_parse_phandle(kbdev->dev->of_node, + "protected-memory-allocator", 0); + if (!pma_node) { + dev_info(kbdev->dev, "Protected memory allocator not available\n"); + } else { + struct platform_device *const pdev = + of_find_device_by_node(pma_node); + + kbdev->csf.pma_dev = NULL; + if (!pdev) { + dev_err(kbdev->dev, "Platform device for Protected memory allocator not found\n"); + } else { + kbdev->csf.pma_dev = platform_get_drvdata(pdev); + if (!kbdev->csf.pma_dev) { + dev_info(kbdev->dev, "Protected memory allocator is not ready\n"); + err = -EPROBE_DEFER; + } else if (!try_module_get(kbdev->csf.pma_dev->owner)) { + dev_err(kbdev->dev, "Failed to get Protected memory allocator module\n"); + err = -ENODEV; + } else { + dev_info(kbdev->dev, "Protected memory allocator successfully loaded\n"); + } + } + of_node_put(pma_node); + } +#endif + + return err; +} + +void kbase_csf_protected_memory_term(struct kbase_device *const kbdev) +{ + if (kbdev->csf.pma_dev) + module_put(kbdev->csf.pma_dev->owner); +} + +struct protected_memory_allocation ** + kbase_csf_protected_memory_alloc( + struct kbase_device *const kbdev, + struct tagged_addr *phys, + size_t num_pages) +{ + size_t i; + struct protected_memory_allocator_device *pma_dev = + kbdev->csf.pma_dev; + struct protected_memory_allocation **pma = + kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL); + + if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma) + return NULL; + + for (i = 0; i < num_pages; i++) { + pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, + KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER); + if (!pma[i]) + break; + + phys[i] = as_tagged(pma_dev->ops.pma_get_phys_addr(pma_dev, + pma[i])); + } + + if (i != num_pages) { + kbase_csf_protected_memory_free(kbdev, pma, i); + return NULL; + } + + return pma; +} + +void kbase_csf_protected_memory_free( + struct kbase_device *const kbdev, + struct protected_memory_allocation **pma, + size_t num_pages) +{ + size_t i; + struct protected_memory_allocator_device *pma_dev = + kbdev->csf.pma_dev; + + if (WARN_ON(!pma_dev) || WARN_ON(!pma)) + return; + + for (i = 0; i < num_pages; i++) + pma_dev->ops.pma_free_page(pma_dev, pma[i]); + + kfree(pma); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h new file mode 100644 index 0000000..4c0609e --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_protected_memory.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_PROTECTED_MEMORY_H_ +#define _KBASE_CSF_PROTECTED_MEMORY_H_ + +#include "mali_kbase.h" +/** + * kbase_csf_protected_memory_init - Initilaise protected memory allocator. + * + * @kbdev: Device pointer. + * + * Return: 0 if success, or an error code on failure. + */ +int kbase_csf_protected_memory_init(struct kbase_device *const kbdev); + +/** + * kbase_csf_protected_memory_term - Terminate prtotected memory allocator. + * + * @kbdev: Device pointer. + */ +void kbase_csf_protected_memory_term(struct kbase_device *const kbdev); + +/** + * kbase_csf_protected_memory_alloc - Allocate protected memory pages. + * + * @kbdev: Device pointer. + * @phys: Array of physical addresses to be filled in by the protected + * memory allocator. + * @num_pages: Number of pages requested to be allocated. + * + * Return: Pointer to an array of protected memory allocations on success, + * or NULL on failure. + */ +struct protected_memory_allocation ** + kbase_csf_protected_memory_alloc( + struct kbase_device *const kbdev, + struct tagged_addr *phys, + size_t num_pages); + +/** + * kbase_csf_protected_memory_free - Free the allocated + * protected memory pages + * + * @kbdev: Device pointer. + * @pma: Array of pointer to protected memory allocations. + * @num_pages: Number of pages to be freed. + */ +void kbase_csf_protected_memory_free( + struct kbase_device *const kbdev, + struct protected_memory_allocation **pma, + size_t num_pages); +#endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c new file mode 100644 index 0000000..f6d61d7 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_reset_gpu.c @@ -0,0 +1,629 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Waiting timeout for GPU reset to complete */ +#define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */ +#define DUMP_DWORDS_PER_LINE (4) +/* 16 characters needed for a 8 byte value in hex & 1 character for space */ +#define DUMP_HEX_CHARS_PER_DWORD ((2 * 8) + 1) +#define DUMP_HEX_CHARS_PER_LINE \ + (DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD) + +static inline bool +kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state) +{ + return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); +} + +static inline bool +kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state) +{ + return (state == KBASE_CSF_RESET_GPU_COMMITTED || + state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT); +} + +static inline bool +kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state) +{ + return (state == KBASE_CSF_RESET_GPU_HAPPENING); +} + +/** + * DOC: Mechanism for coherent access to the HW with respect to GPU reset + * + * Access to the HW from non-atomic context outside of the reset thread must + * use kbase_reset_gpu_prevent_and_wait() / kbase_reset_gpu_try_prevent(). + * + * This currently works by taking the &kbase_device's csf.reset.sem, for + * 'write' access by the GPU reset thread and 'read' access by every other + * thread. The use of this rw_semaphore means: + * + * - there will be mutual exclusion (and thus waiting) between the thread doing + * reset ('writer') and threads trying to access the GPU for 'normal' + * operations ('readers') + * + * - multiple threads may prevent reset from happening without serializing each + * other prematurely. Note that at present the wait for reset to finish has + * to be done higher up in the driver than actual GPU access, at a point + * where it won't cause lock ordering issues. At such a point, some paths may + * actually lead to no GPU access, but we would prefer to avoid serializing + * at that level + * + * - lockdep (if enabled in the kernel) will check such uses for deadlock + * + * If instead &kbase_device's csf.reset.wait &wait_queue_head_t were used on + * its own, we'd also need to add a &lockdep_map and appropriate lockdep calls + * to make use of lockdep checking in all places where the &wait_queue_head_t + * is waited upon or signaled. + * + * Indeed places where we wait on &kbase_device's csf.reset.wait (such as + * kbase_reset_gpu_wait()) are the only places where we need extra call(s) to + * lockdep, and they are made on the existing rw_semaphore. + * + * For non-atomic access, the &kbase_device's csf.reset.state member should be + * checked instead, such as by using kbase_reset_gpu_is_active(). + * + * Ideally the &rw_semaphore should be replaced in future with a single mutex + * that protects any access to the GPU, via reset or otherwise. + */ + +int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev) +{ + down_read(&kbdev->csf.reset.sem); + + if (atomic_read(&kbdev->csf.reset.state) == + KBASE_CSF_RESET_GPU_FAILED) { + up_read(&kbdev->csf.reset.sem); + return -ENOMEM; + } + + if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { + up_read(&kbdev->csf.reset.sem); + return -EFAULT; + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu_prevent_and_wait); + +int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev) +{ + if (!down_read_trylock(&kbdev->csf.reset.sem)) + return -EAGAIN; + + if (atomic_read(&kbdev->csf.reset.state) == + KBASE_CSF_RESET_GPU_FAILED) { + up_read(&kbdev->csf.reset.sem); + return -ENOMEM; + } + + if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) { + up_read(&kbdev->csf.reset.sem); + return -EFAULT; + } + + return 0; +} + +void kbase_reset_gpu_allow(struct kbase_device *kbdev) +{ + up_read(&kbdev->csf.reset.sem); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu_allow); + +void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev) +{ +#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + lockdep_assert_held_read(&kbdev->csf.reset.sem); +#else + lockdep_assert_held(&kbdev->csf.reset.sem); +#endif + WARN_ON(kbase_reset_gpu_is_active(kbdev)); +} + +void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev) +{ + if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED) + return; + +#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + lockdep_assert_held_read(&kbdev->csf.reset.sem); +#else + lockdep_assert_held(&kbdev->csf.reset.sem); +#endif + WARN_ON(kbase_reset_gpu_is_active(kbdev)); +} + +/* Mark the reset as now happening, and synchronize with other threads that + * might be trying to access the GPU + */ +static void kbase_csf_reset_begin_hw_access_sync( + struct kbase_device *kbdev, + enum kbase_csf_reset_gpu_state initial_reset_state) +{ + unsigned long hwaccess_lock_flags; + unsigned long scheduler_spin_lock_flags; + + /* Note this is a WARN/atomic_set because it is a software issue for a + * race to be occurring here + */ + WARN_ON(!kbase_csf_reset_state_is_committed(initial_reset_state)); + + down_write(&kbdev->csf.reset.sem); + + /* Threads in atomic context accessing the HW will hold one of these + * locks, so synchronize with them too. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); + kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); + atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING); + kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); +} + +/* Mark the reset as finished and allow others threads to once more access the + * GPU + */ +static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev, + int err_during_reset, + bool firmware_inited) +{ + unsigned long hwaccess_lock_flags; + unsigned long scheduler_spin_lock_flags; + + WARN_ON(!kbase_csf_reset_state_is_active( + atomic_read(&kbdev->csf.reset.state))); + + /* Once again, we synchronize with atomic context threads accessing the + * HW, as otherwise any actions they defer could get lost + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); + kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); + + if (!err_during_reset) { + atomic_set(&kbdev->csf.reset.state, + KBASE_CSF_RESET_GPU_NOT_PENDING); + } else { + dev_err(kbdev->dev, "Reset failed to complete"); + atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED); + } + + kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); + + /* Invoke the scheduling tick after formally finishing the reset, + * otherwise the tick might start too soon and notice that reset + * is still in progress. + */ + up_write(&kbdev->csf.reset.sem); + wake_up(&kbdev->csf.reset.wait); + + if (!err_during_reset && likely(firmware_inited)) + kbase_csf_scheduler_enable_tick_timer(kbdev); +} + +static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) +{ + kbase_io_history_dump(kbdev); + + dev_err(kbdev->dev, "Register state:"); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x MCU_STATUS=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); + dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x TILER_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)), + kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG))); +} + +static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev) +{ + u8 *buf, *line_str; + unsigned int read_size; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); + + if (tb == NULL) { + dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); + return; + } + + buf = kmalloc(PAGE_SIZE + DUMP_HEX_CHARS_PER_LINE + 1, GFP_KERNEL); + if (buf == NULL) { + dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped"); + return; + } + line_str = &buf[PAGE_SIZE]; + + dev_err(kbdev->dev, "Firmware trace buffer dump:"); + while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, buf, + PAGE_SIZE))) { + u64 *ptr = (u64 *)buf; + u32 num_dwords; + + for (num_dwords = read_size / sizeof(u64); + num_dwords >= DUMP_DWORDS_PER_LINE; + num_dwords -= DUMP_DWORDS_PER_LINE) { + dev_err(kbdev->dev, "%016llx %016llx %016llx %016llx", + ptr[0], ptr[1], ptr[2], ptr[3]); + ptr += DUMP_DWORDS_PER_LINE; + } + + if (num_dwords) { + int pos = 0; + + while (num_dwords--) { + pos += snprintf(line_str + pos, + DUMP_HEX_CHARS_PER_DWORD + 1, + "%016llx ", ptr[0]); + ptr++; + } + + dev_err(kbdev->dev, "%s", line_str); + } + } + + kfree(buf); +} + +/** + * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the + * event of an error during GPU reset. + * @kbdev: Pointer to KBase device + */ +static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev) +{ + unsigned long flags; + + /* Treat this as an unrecoverable error for HWCNT */ + kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface); + + /* Re-enable counters to ensure matching enable/disable pair. + * This might reduce the hwcnt disable count to 0, and therefore + * trigger actual re-enabling of hwcnt. + * However, as the backend is now in the unrecoverable error state, + * re-enabling will immediately fail and put the context into the error + * state, preventing the hardware from being touched (which could have + * risked a hang). + */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, + bool firmware_inited, bool silent) +{ + unsigned long flags; + int err; + + WARN_ON(kbdev->irq_reset_flush); + /* The reset must now be happening otherwise other threads will not + * have been synchronized with to stop their access to the HW + */ +#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE + lockdep_assert_held_write(&kbdev->csf.reset.sem); +#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE + lockdep_assert_held_exclusive(&kbdev->csf.reset.sem); +#else + lockdep_assert_held(&kbdev->csf.reset.sem); +#endif + WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + + /* Reset the scheduler state before disabling the interrupts as suspend + * of active CSG slots would also be done as a part of reset. + */ + if (likely(firmware_inited)) + kbase_csf_scheduler_reset(kbdev); + cancel_work_sync(&kbdev->csf.firmware_reload_work); + + dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n"); + /* This call will block until counters are disabled. + */ + kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock(&kbdev->mmu_mask_change); + kbase_pm_reset_start_locked(kbdev); + + dev_dbg(kbdev->dev, + "We're about to flush out the IRQs and their bottom halves\n"); + kbdev->irq_reset_flush = true; + + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts + */ + kbase_pm_disable_interrupts_nolock(kbdev); + + spin_unlock(&kbdev->mmu_mask_change); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n"); + /* Must be done without any locks IRQ handlers will take. + */ + kbase_synchronize_irqs(kbdev); + + dev_dbg(kbdev->dev, "Flush out any in-flight work items\n"); + kbase_flush_mmu_wqs(kbdev); + + dev_dbg(kbdev->dev, + "The flush has completed so reset the active indicator\n"); + kbdev->irq_reset_flush = false; + + mutex_lock(&kbdev->pm.lock); + if (!silent) + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + RESET_TIMEOUT); + + /* Output the state of some interesting registers to help in the + * debugging of GPU resets, and dump the firmware trace buffer + */ + if (!silent) { + kbase_csf_debug_dump_registers(kbdev); + if (likely(firmware_inited)) + kbase_csf_dump_firmware_trace_buffer(kbdev); + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ipa_control_handle_gpu_reset_pre(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Tell hardware counters a reset is about to occur. + * If the backend is in an unrecoverable error state (e.g. due to + * firmware being unresponsive) this will transition the backend out of + * it, on the assumption a reset will fix whatever problem there was. + */ + kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface); + + /* Reset the GPU */ + err = kbase_pm_init_hw(kbdev, 0); + + mutex_unlock(&kbdev->pm.lock); + + if (WARN_ON(err)) { + kbase_csf_hwcnt_on_reset_error(kbdev); + return err; + } + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_restore_all_as(kbdev); + kbase_ipa_control_handle_gpu_reset_post(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + kbase_pm_enable_interrupts(kbdev); + + mutex_lock(&kbdev->pm.lock); + kbase_pm_reset_complete(kbdev); + /* Synchronously wait for the reload of firmware to complete */ + err = kbase_pm_wait_for_desired_state(kbdev); + mutex_unlock(&kbdev->pm.lock); + + if (WARN_ON(err)) { + kbase_csf_hwcnt_on_reset_error(kbdev); + return err; + } + + /* Re-enable GPU hardware counters */ + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + if (!silent) + dev_err(kbdev->dev, "Reset complete"); + + return 0; +} + +static void kbase_csf_reset_gpu_worker(struct work_struct *data) +{ + struct kbase_device *kbdev = container_of(data, struct kbase_device, + csf.reset.work); + bool firmware_inited; + unsigned long flags; + int err = 0; + const enum kbase_csf_reset_gpu_state initial_reset_state = + atomic_read(&kbdev->csf.reset.state); + + /* Ensure any threads (e.g. executing the CSF scheduler) have finished + * using the HW + */ + kbase_csf_reset_begin_hw_access_sync(kbdev, initial_reset_state); + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + firmware_inited = kbdev->csf.firmware_inited; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + bool silent = + kbase_csf_reset_state_is_silent(initial_reset_state); + + err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent); + kbase_pm_context_idle(kbdev); + } + + kbase_disjoint_state_down(kbdev); + + /* Allow other threads to once again use the GPU */ + kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited); +} + +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags) +{ + if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR) + kbase_hwcnt_backend_csf_on_unrecoverable_error( + &kbdev->hwcnt_gpu_iface); + + if (atomic_cmpxchg(&kbdev->csf.reset.state, + KBASE_CSF_RESET_GPU_NOT_PENDING, + KBASE_CSF_RESET_GPU_PREPARED) != + KBASE_CSF_RESET_GPU_NOT_PENDING) + /* Some other thread is already resetting the GPU */ + return false; + + return true; +} +KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return kbase_prepare_to_reset_gpu(kbdev, flags); +} + +void kbase_reset_gpu(struct kbase_device *kbdev) +{ + /* Note this is a WARN/atomic_set because it is a software issue for + * a race to be occurring here + */ + if (WARN_ON(atomic_read(&kbdev->csf.reset.state) != + KBASE_RESET_GPU_PREPARED)) + return; + + atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED); + dev_err(kbdev->dev, "Preparing to soft-reset GPU\n"); + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu); + +void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_reset_gpu(kbdev); +} + +int kbase_reset_gpu_silent(struct kbase_device *kbdev) +{ + if (atomic_cmpxchg(&kbdev->csf.reset.state, + KBASE_CSF_RESET_GPU_NOT_PENDING, + KBASE_CSF_RESET_GPU_COMMITTED_SILENT) != + KBASE_CSF_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return -EAGAIN; + } + + kbase_disjoint_state_up(kbdev); + + queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work); + + return 0; +} + +bool kbase_reset_gpu_is_active(struct kbase_device *kbdev) +{ + enum kbase_csf_reset_gpu_state reset_state = + atomic_read(&kbdev->csf.reset.state); + + /* For CSF, the reset is considered active only when the reset worker + * is actually executing and other threads would have to wait for it to + * complete + */ + return kbase_csf_reset_state_is_active(reset_state); +} + +int kbase_reset_gpu_wait(struct kbase_device *kbdev) +{ + const long wait_timeout = + kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS); + long remaining; + + /* Inform lockdep we might be trying to wait on a reset (as + * would've been done with down_read() - which has no 'timeout' + * variant), then use wait_event_timeout() to implement the timed + * wait. + * + * in CONFIG_PROVE_LOCKING builds, this should catch potential 'time + * bound' deadlocks such as: + * - incorrect lock order with respect to others locks + * - current thread has prevented reset + * - current thread is executing the reset worker + */ + might_lock_read(&kbdev->csf.reset.sem); + + remaining = wait_event_timeout( + kbdev->csf.reset.wait, + (atomic_read(&kbdev->csf.reset.state) == + KBASE_CSF_RESET_GPU_NOT_PENDING) || + (atomic_read(&kbdev->csf.reset.state) == + KBASE_CSF_RESET_GPU_FAILED), + wait_timeout); + + if (!remaining) { + dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete"); + return -ETIMEDOUT; + } else if (atomic_read(&kbdev->csf.reset.state) == + KBASE_CSF_RESET_GPU_FAILED) { + return -ENOMEM; + } + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait); + +int kbase_reset_gpu_init(struct kbase_device *kbdev) +{ + kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", 0, 1); + if (kbdev->csf.reset.workq == NULL) + return -ENOMEM; + + INIT_WORK(&kbdev->csf.reset.work, kbase_csf_reset_gpu_worker); + + init_waitqueue_head(&kbdev->csf.reset.wait); + init_rwsem(&kbdev->csf.reset.sem); + + return 0; +} + +void kbase_reset_gpu_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->csf.reset.workq); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c new file mode 100644 index 0000000..5b795d6 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.c @@ -0,0 +1,5063 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "mali_kbase_config_defaults.h" +#include +#include +#include +#include "mali_kbase_csf.h" +#include +#include +#include +#include +#include + +/* Value to indicate that a queue group is not groups_to_schedule list */ +#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) + +/* Waiting timeout for scheduler state change for descheduling a CSG */ +#define CSG_SCHED_STOP_TIMEOUT_MS (50) + +#define CSG_SUSPEND_ON_RESET_WAIT_TIMEOUT_MS DEFAULT_RESET_TIMEOUT_MS + +/* Maximum number of endpoints which may run tiler jobs. */ +#define CSG_TILER_MAX ((u8)1) + +/* Maximum dynamic CSG slot priority value */ +#define MAX_CSG_SLOT_PRIORITY ((u8)15) + +/* CSF scheduler time slice value */ +#define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */ + +/* + * CSF scheduler time threshold for converting "tock" requests into "tick" if + * they come too close to the end of a tick interval. This avoids scheduling + * twice in a row. + */ +#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \ + CSF_SCHEDULER_TIME_TICK_MS + +#define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \ + msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS) + +/* Nanoseconds per millisecond */ +#define NS_PER_MS ((u64)1000 * 1000) + +/* + * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock" + * requests are not serviced immediately, but shall wait for a minimum time in + * order to reduce load on the CSF scheduler thread. + */ +#define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */ + +/* CS suspended and is idle (empty ring buffer) */ +#define CS_IDLE_FLAG (1 << 0) + +/* CS suspended and is wait for a CQS condition */ +#define CS_WAIT_SYNC_FLAG (1 << 1) + +/* 2 GPU address space slots are reserved for MCU and privileged context for HW + * counter dumping. TODO remove the slot reserved for latter in GPUCORE-26293. + */ +#define NUM_RESERVED_AS_SLOTS (2) + +static int scheduler_group_schedule(struct kbase_queue_group *group); +static void remove_group_from_idle_wait(struct kbase_queue_group *const group); +static +void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, + struct kbase_queue_group *const group, + enum kbase_csf_group_state run_state); +static struct kbase_queue_group *scheduler_get_protm_enter_async_group( + struct kbase_device *const kbdev, + struct kbase_queue_group *const group); +static struct kbase_queue_group *get_tock_top_group( + struct kbase_csf_scheduler *const scheduler); +static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev); +static int suspend_active_queue_groups(struct kbase_device *kbdev, + unsigned long *slot_mask); +static void schedule_in_cycle(struct kbase_queue_group *group, bool force); + +#define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) + +/** + * tick_timer_callback() - Callback function for the scheduling tick hrtimer + * + * @timer: Pointer to the device + * + * This function will enqueue the scheduling tick work item for immediate + * execution, if it has not been queued already. + * + * Return: enum value to indicate that timer should not be restarted. + */ +static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev = container_of(timer, struct kbase_device, + csf.scheduler.tick_timer); + + kbase_csf_scheduler_advance_tick(kbdev); + return HRTIMER_NORESTART; +} + +/** + * start_tick_timer() - Start the scheduling tick hrtimer. + * + * @kbdev: Pointer to the device + * + * This function will start the scheduling tick hrtimer and is supposed to + * be called only from the tick work item function. The tick hrtimer should + * should not be active already. + */ +static void start_tick_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + WARN_ON(scheduler->tick_timer_active); + if (likely(!work_pending(&scheduler->tick_work))) { + scheduler->tick_timer_active = true; + + hrtimer_start(&scheduler->tick_timer, + HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), + HRTIMER_MODE_REL); + } + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +/** + * cancel_tick_timer() - Cancel the scheduling tick hrtimer + * + * @kbdev: Pointer to the device + */ +static void cancel_tick_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + scheduler->tick_timer_active = false; + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + hrtimer_cancel(&scheduler->tick_timer); +} + +/** + * enqueue_tick_work() - Enqueue the scheduling tick work item + * + * @kbdev: Pointer to the device + * + * This function will queue the scheduling tick work item for immediate + * execution. This shall only be called when both the tick hrtimer and tick + * work item are not active/pending. + */ +static void enqueue_tick_work(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + WARN_ON(scheduler->tick_timer_active); + queue_work(scheduler->wq, &scheduler->tick_work); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) +{ + WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); +} + +static int acquire_doorbell(struct kbase_device *kbdev) +{ + int doorbell_nr; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + doorbell_nr = find_first_zero_bit( + kbdev->csf.scheduler.doorbell_inuse_bitmap, + CSF_NUM_DOORBELL); + + if (doorbell_nr >= CSF_NUM_DOORBELL) + return KBASEP_USER_DB_NR_INVALID; + + set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap); + + return doorbell_nr; +} + +static void unassign_user_doorbell_from_group(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { + release_doorbell(kbdev, group->doorbell_nr); + group->doorbell_nr = KBASEP_USER_DB_NR_INVALID; + } +} + +static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev, + struct kbase_queue *queue) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + mutex_lock(&kbdev->csf.reg_lock); + + if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) { + queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; + /* After this the dummy page would be mapped in */ + unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping, + queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1); + } + + mutex_unlock(&kbdev->csf.reg_lock); +} + +static void assign_user_doorbell_to_group(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID) + group->doorbell_nr = acquire_doorbell(kbdev); +} + +static void assign_user_doorbell_to_queue(struct kbase_device *kbdev, + struct kbase_queue *const queue) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + mutex_lock(&kbdev->csf.reg_lock); + + /* If bind operation for the queue hasn't completed yet, then the + * the CSI can't be programmed for the queue + * (even in stopped state) and so the doorbell also can't be assigned + * to it. + */ + if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) && + (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) { + WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID); + queue->doorbell_nr = queue->group->doorbell_nr; + + /* After this the real Hw doorbell page would be mapped in */ + unmap_mapping_range( + kbdev->csf.db_filp->f_inode->i_mapping, + queue->db_file_offset << PAGE_SHIFT, + PAGE_SIZE, 1); + } + + mutex_unlock(&kbdev->csf.reg_lock); +} + +static void scheduler_doorbell_init(struct kbase_device *kbdev) +{ + int doorbell_nr; + + bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap, + CSF_NUM_DOORBELL); + + mutex_lock(&kbdev->csf.scheduler.lock); + /* Reserve doorbell 0 for use by kernel driver */ + doorbell_nr = acquire_doorbell(kbdev); + mutex_unlock(&kbdev->csf.scheduler.lock); + + WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR); +} + +static u32 get_nr_active_csgs(struct kbase_device *kbdev) +{ + u32 nr_active_csgs; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap, + kbdev->csf.global_iface.group_num); + + return nr_active_csgs; +} + +/** + * csgs_active - returns true if any of CSG slots are in use + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: the interface is actively engaged flag. + */ +static bool csgs_active(struct kbase_device *kbdev) +{ + u32 nr_active_csgs; + + mutex_lock(&kbdev->csf.scheduler.lock); + nr_active_csgs = get_nr_active_csgs(kbdev); + mutex_unlock(&kbdev->csf.scheduler.lock); + + /* Right now if any of the CSG interfaces are in use + * then we need to assume that there is some work pending. + * In future when we have IDLE notifications from firmware implemented + * then we would have a better idea of the pending work. + */ + return (nr_active_csgs != 0); +} + +/** + * csg_slot_in_use - returns true if a queue group has been programmed on a + * given CSG slot. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @slot: Index/number of the CSG slot in question. + * + * Return: the interface is actively engaged flag. + * + * Note: Caller must hold the scheduler lock. + */ +static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL); +} + +static bool queue_group_suspended_locked(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + return (group->run_state == KBASE_CSF_GROUP_SUSPENDED || + group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE || + group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); +} + +static bool queue_group_idle_locked(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + return (group->run_state == KBASE_CSF_GROUP_IDLE || + group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE); +} + +static bool queue_group_scheduled(struct kbase_queue_group *group) +{ + return (group->run_state != KBASE_CSF_GROUP_INACTIVE && + group->run_state != KBASE_CSF_GROUP_TERMINATED && + group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED); +} + +static bool queue_group_scheduled_locked(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + return queue_group_scheduled(group); +} + +/** + * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function waits for the GPU to exit protected mode which is confirmed + * when active_protm_grp is set to NULL. + */ +static void scheduler_wait_protm_quit(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + long remaining; + + lockdep_assert_held(&scheduler->lock); + + KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL, + jiffies_to_msecs(wt)); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt); + + if (!remaining) + dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped"); + + KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL, + jiffies_to_msecs(remaining)); +} + +/** + * scheduler_force_protm_exit() - Force GPU to exit protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function sends a ping request to the firmware and waits for the GPU + * to exit protected mode. + */ +static void scheduler_force_protm_exit(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + kbase_csf_firmware_ping(kbdev); + scheduler_wait_protm_quit(kbdev); +} + +/** + * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up + * automatically for periodic tasks. + * + * @kbdev: Pointer to the device + * + * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the + * CSF scheduler lock to already have been held. + * + * Return: true if the scheduler is configured to wake up periodically + */ +static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + return kbdev->csf.scheduler.timer_enabled; +} + +static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + lockdep_assert_held(&scheduler->lock); + + if (scheduler->gpu_idle_fw_timer_enabled) + return; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + + /* Update the timer_enabled flag requires holding interrupt_lock */ + scheduler->gpu_idle_fw_timer_enabled = true; + kbase_csf_firmware_enable_gpu_idle_timer(kbdev); + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + lockdep_assert_held(&scheduler->interrupt_lock); + + /* Update of the timer_enabled flag requires holding interrupt_lock */ + if (scheduler->gpu_idle_fw_timer_enabled) { + scheduler->gpu_idle_fw_timer_enabled = false; + kbase_csf_firmware_disable_gpu_idle_timer(kbdev); + } +} + +static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + lockdep_assert_held(&scheduler->lock); + + if (!scheduler->gpu_idle_fw_timer_enabled) + return; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + disable_gpu_idle_fw_timer_locked(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +static void scheduler_wakeup(struct kbase_device *kbdev, bool kick) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (scheduler->state == SCHED_SUSPENDED) { + dev_dbg(kbdev->dev, "Re-activating the Scheduler"); + kbase_csf_scheduler_pm_active(kbdev); + scheduler->state = SCHED_INACTIVE; + + if (kick) + scheduler_enable_tick_timer_nolock(kbdev); + } +} + +static void scheduler_suspend(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) { + dev_dbg(kbdev->dev, "Suspending the Scheduler"); + kbase_csf_scheduler_pm_idle(kbdev); + scheduler->state = SCHED_SUSPENDED; + } +} + +/** + * update_idle_suspended_group_state() - Move the queue group to a non-idle + * suspended state. + * @group: Pointer to the queue group. + * + * This function is called to change the state of queue group to non-idle + * suspended state, if the group was suspended when all the queues bound to it + * became empty or when some queues got blocked on a sync wait & others became + * empty. The group is also moved to the runnable list from idle wait list in + * the latter case. + * So the function gets called when a queue is kicked or sync wait condition + * gets satisfied. + */ +static void update_idle_suspended_group_state(struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *scheduler = + &group->kctx->kbdev->csf.scheduler; + int new_val; + + lockdep_assert_held(&scheduler->lock); + + if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) { + remove_group_from_idle_wait(group); + insert_group_to_runnable(scheduler, group, + KBASE_CSF_GROUP_SUSPENDED); + } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) { + group->run_state = KBASE_CSF_GROUP_SUSPENDED; + + /* If scheduler is not suspended and the given group's + * static priority (reflected by the scan_seq_num) is inside + * the current tick slot-range, schedules an async tock. + */ + if (scheduler->state != SCHED_SUSPENDED && + group->scan_seq_num < scheduler->num_csg_slots_for_tick) + schedule_in_cycle(group, true); + } else + return; + + new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, + group, new_val); +} + +int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *scheduler = + &group->kctx->kbdev->csf.scheduler; + int slot_num = group->csg_nr; + + lockdep_assert_held(&scheduler->interrupt_lock); + + if (slot_num >= 0) { + if (WARN_ON(scheduler->csg_slots[slot_num].resident_group != + group)) + return -1; + } + + return slot_num; +} + +int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *scheduler = + &group->kctx->kbdev->csf.scheduler; + unsigned long flags; + int slot_num; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + slot_num = kbase_csf_scheduler_group_get_slot_locked(group); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + return slot_num; +} + +static bool kbasep_csf_scheduler_group_is_on_slot_locked( + struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *scheduler = + &group->kctx->kbdev->csf.scheduler; + int slot_num = group->csg_nr; + + lockdep_assert_held(&scheduler->lock); + + if (slot_num >= 0) { + if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group != + group)) + return true; + } + + return false; +} + +bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *scheduler = + &group->kctx->kbdev->csf.scheduler; + int slot_num = group->csg_nr; + + lockdep_assert_held(&scheduler->interrupt_lock); + + if (WARN_ON(slot_num < 0)) + return false; + + return test_bit(slot_num, scheduler->csgs_events_enable_mask); +} + +struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( + struct kbase_device *kbdev, int slot) +{ + lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); + + return kbdev->csf.scheduler.csg_slots[slot].resident_group; +} + +static int halt_stream_sync(struct kbase_queue *queue) +{ + struct kbase_queue_group *group = queue->group; + struct kbase_device *kbdev = queue->kctx->kbdev; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + struct kbase_csf_cmd_stream_group_info *ginfo; + struct kbase_csf_cmd_stream_info *stream; + int csi_index = queue->csi_index; + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + + if (WARN_ON(!group) || + WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + return -EINVAL; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + ginfo = &global_iface->groups[group->csg_nr]; + stream = &ginfo->streams[csi_index]; + + if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) == + CS_REQ_STATE_START) { + + remaining = wait_event_timeout(kbdev->csf.event_wait, + (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) + == CS_ACK_STATE_START), remaining); + + if (!remaining) { + dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d", + csi_index, group->handle, group->csg_nr); + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + + return -ETIMEDOUT; + } + + remaining = + kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + } + + /* Set state to STOP */ + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP, + CS_REQ_STATE_MASK); + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u); + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true); + + /* Timed wait */ + remaining = wait_event_timeout(kbdev->csf.event_wait, + (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK)) + == CS_ACK_STATE_STOP), remaining); + + if (!remaining) { + dev_warn(kbdev->dev, "Timed out waiting for queue to stop on csi %d bound to group %d on slot %d", + queue->csi_index, group->handle, group->csg_nr); + + /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU + * will be reset as a work-around. + */ + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + } + return (remaining) ? 0 : -ETIMEDOUT; +} + +static bool can_halt_stream(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_csg_slot *const csg_slot = + kbdev->csf.scheduler.csg_slots; + unsigned long flags; + bool can_halt; + int slot; + + if (!queue_group_scheduled(group)) + return true; + + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + slot = kbase_csf_scheduler_group_get_slot_locked(group); + can_halt = (slot >= 0) && + (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, + flags); + + return can_halt; +} + +/** + * sched_halt_stream() - Stop a GPU queue when its queue group is not running + * on a CSG slot. + * @queue: Pointer to the GPU queue to stop. + * + * This function handles stopping gpu queues for groups that are either not on + * a CSG slot or are on the slot but undergoing transition to + * resume or suspend states. + * It waits until the queue group is scheduled on a slot and starts running, + * which is needed as groups that were suspended may need to resume all queues + * that were enabled and running at the time of suspension. + * + * Return: 0 on success, or negative on failure. + */ +static int sched_halt_stream(struct kbase_queue *queue) +{ + struct kbase_queue_group *group = queue->group; + struct kbase_device *kbdev = queue->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = + &kbdev->csf.scheduler; + struct kbase_csf_csg_slot *const csg_slot = + kbdev->csf.scheduler.csg_slots; + bool retry_needed = false; + bool retried = false; + long remaining; + int slot; + int err = 0; + + if (WARN_ON(!group)) + return -EINVAL; + + lockdep_assert_held(&queue->kctx->csf.lock); + lockdep_assert_held(&scheduler->lock); + + slot = kbase_csf_scheduler_group_get_slot(group); + + if (slot >= 0) { + WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING); + + if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { + dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state", + queue->csi_index, group->handle); + retry_needed = true; + } + } +retry: + /* Update the group state so that it can get scheduled soon */ + update_idle_suspended_group_state(group); + + mutex_unlock(&scheduler->lock); + + /* This function is called when the queue group is either not on a CSG + * slot or is on the slot but undergoing transition. + * + * To stop the queue, the function needs to wait either for the queue + * group to be assigned a CSG slot (and that slot has to reach the + * running state) or for the eviction of the queue group from the + * scheduler's list. + * + * In order to evaluate the latter condition, the function doesn't + * really need to lock the scheduler, as any update to the run_state + * of the queue group by sched_evict_group() would be visible due + * to implicit barriers provided by the kernel waitqueue macros. + * + * The group pointer cannot disappear meanwhile, as the high level + * CSF context is locked. Therefore, the scheduler would be + * the only one to update the run_state of the group. + */ + remaining = wait_event_timeout( + kbdev->csf.event_wait, can_halt_stream(kbdev, group), + kbase_csf_timeout_in_jiffies( + 20 * kbdev->csf.scheduler.csg_scheduling_period_ms)); + + mutex_lock(&scheduler->lock); + + if (remaining && queue_group_scheduled_locked(group)) { + slot = kbase_csf_scheduler_group_get_slot(group); + + /* If the group is still on slot and slot is in running state + * then explicitly stop the CSI of the + * queue. Otherwise there are different cases to consider + * + * - If the queue group was already undergoing transition to + * resume/start state when this function was entered then it + * would not have disabled the CSI of the + * queue being stopped and the previous wait would have ended + * once the slot was in a running state with CS + * interface still enabled. + * Now the group is going through another transition either + * to a suspend state or to a resume state (it could have + * been suspended before the scheduler lock was grabbed). + * In both scenarios need to wait again for the group to + * come on a slot and that slot to reach the running state, + * as that would guarantee that firmware will observe the + * CSI as disabled. + * + * - If the queue group was either off the slot or was + * undergoing transition to suspend state on entering this + * function, then the group would have been resumed with the + * queue's CSI in disabled state. + * So now if the group is undergoing another transition + * (after the resume) then just need to wait for the state + * bits in the ACK register of CSI to be + * set to STOP value. It is expected that firmware will + * process the stop/disable request of the CS + * interface after resuming the group before it processes + * another state change request of the group. + */ + if ((slot >= 0) && + (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) { + err = halt_stream_sync(queue); + } else if (retry_needed && !retried) { + retried = true; + goto retry; + } else if (slot >= 0) { + struct kbase_csf_global_iface *global_iface = + &kbdev->csf.global_iface; + struct kbase_csf_cmd_stream_group_info *ginfo = + &global_iface->groups[slot]; + struct kbase_csf_cmd_stream_info *stream = + &ginfo->streams[queue->csi_index]; + u32 cs_req = + kbase_csf_firmware_cs_input_read(stream, CS_REQ); + + if (!WARN_ON(CS_REQ_STATE_GET(cs_req) != + CS_REQ_STATE_STOP)) { + /* Timed wait */ + remaining = wait_event_timeout( + kbdev->csf.event_wait, + (CS_ACK_STATE_GET( + kbase_csf_firmware_cs_output( + stream, CS_ACK)) == + CS_ACK_STATE_STOP), + kbdev->csf.fw_timeout_ms); + + if (!remaining) { + dev_warn(kbdev->dev, + "Timed out waiting for queue stop ack on csi %d bound to group %d on slot %d", + queue->csi_index, + group->handle, group->csg_nr); + err = -ETIMEDOUT; + } + } + } + } else if (!remaining) { + dev_warn(kbdev->dev, "Group-%d failed to get a slot for stopping the queue on csi %d", + group->handle, queue->csi_index); + err = -ETIMEDOUT; + } + + return err; +} + +int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + struct kbase_queue_group *group = queue->group; + bool const cs_enabled = queue->enabled; + int err = 0; + + if (WARN_ON(!group)) + return -EINVAL; + + kbase_reset_gpu_assert_failed_or_prevented(kbdev); + lockdep_assert_held(&queue->kctx->csf.lock); + mutex_lock(&kbdev->csf.scheduler.lock); + + queue->enabled = false; + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled); + + if (cs_enabled && queue_group_scheduled_locked(group)) { + struct kbase_csf_csg_slot *const csg_slot = + kbdev->csf.scheduler.csg_slots; + int slot = kbase_csf_scheduler_group_get_slot(group); + + /* Since the group needs to be resumed in order to stop the queue, + * check if GPU needs to be powered up. + */ + scheduler_wakeup(kbdev, true); + + if ((slot >= 0) && + (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) + err = halt_stream_sync(queue); + else + err = sched_halt_stream(queue); + + unassign_user_doorbell_from_queue(kbdev, queue); + } + + mutex_unlock(&kbdev->csf.scheduler.lock); + return err; +} + +static void update_hw_active(struct kbase_queue *queue, bool active) +{ + CSTD_UNUSED(queue); + CSTD_UNUSED(active); +} + +static void program_cs_extract_init(struct kbase_queue *queue) +{ + u64 *input_addr = (u64 *)queue->user_io_addr; + u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); + + input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = + output_addr[CS_EXTRACT_LO / sizeof(u64)]; +} + +static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, + struct kbase_queue *queue) +{ + struct kbase_device *kbdev = queue->kctx->kbdev; + u32 const glb_version = kbdev->csf.global_iface.version; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* If cs_trace_command not supported, nothing to program */ + if (glb_version < kbase_csf_interface_version(1, 1, 0)) + return; + + /* Program for cs_trace if enabled. In the current arrangement, it is + * possible for the context to enable the cs_trace after some queues + * has been registered in cs_trace in disabled state. This is tracked by + * the queue's trace buffer base address, which had been validated at the + * queue's register_ex call. + */ + if (kbase_csf_scheduler_queue_has_trace(queue)) { + u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET( + queue->trace_cfg, queue->kctx->as_nr); + + kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg); + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, + queue->trace_buffer_size); + + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO, + queue->trace_buffer_base & U32_MAX); + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI, + queue->trace_buffer_base >> 32); + + kbase_csf_firmware_cs_input( + stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO, + queue->trace_offset_ptr & U32_MAX); + kbase_csf_firmware_cs_input( + stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI, + queue->trace_offset_ptr >> 32); + } else { + /* Place the configuration to the disabled condition */ + kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0); + kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0); + } +} + +static void program_cs(struct kbase_device *kbdev, + struct kbase_queue *queue, bool ring_csg_doorbell) +{ + struct kbase_queue_group *group = queue->group; + struct kbase_csf_cmd_stream_group_info *ginfo; + struct kbase_csf_cmd_stream_info *stream; + int csi_index = queue->csi_index; + u64 user_input; + u64 user_output; + + if (WARN_ON(!group)) + return; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + return; + + ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; + + if (WARN_ON(csi_index < 0) || + WARN_ON(csi_index >= ginfo->stream_num)) + return; + + assign_user_doorbell_to_queue(kbdev, queue); + if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) + return; + + WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr); + + if (queue->enabled && queue_group_suspended_locked(group)) + program_cs_extract_init(queue); + + stream = &ginfo->streams[csi_index]; + + kbase_csf_firmware_cs_input(stream, CS_BASE_LO, + queue->base_addr & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_BASE_HI, + queue->base_addr >> 32); + kbase_csf_firmware_cs_input(stream, CS_SIZE, + queue->size); + + user_input = (queue->reg->start_pfn << PAGE_SHIFT); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO, + user_input & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI, + user_input >> 32); + + user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT); + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO, + user_output & 0xFFFFFFFF); + kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI, + user_output >> 32); + + kbase_csf_firmware_cs_input(stream, CS_CONFIG, + (queue->doorbell_nr << 8) | (queue->priority & 0xF)); + + /* Program the queue's cs_trace configuration */ + program_cs_trace_cfg(stream, queue); + + /* Enable all interrupts for now */ + kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0)); + + /* + * Enable the CSG idle notification once the CS's ringbuffer + * becomes empty or the CS becomes sync_idle, waiting sync update + * or protected mode switch. + */ + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK, + CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK); + + /* Set state to START/STOP */ + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, + queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP, + CS_REQ_STATE_MASK); + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled); + + kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, + ring_csg_doorbell); + update_hw_active(queue, true); +} + +int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) +{ + struct kbase_queue_group *group = queue->group; + struct kbase_device *kbdev = queue->kctx->kbdev; + bool const cs_enabled = queue->enabled; + int err = 0; + bool evicted = false; + + kbase_reset_gpu_assert_prevented(kbdev); + lockdep_assert_held(&queue->kctx->csf.lock); + + if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) + return -EINVAL; + + mutex_lock(&kbdev->csf.scheduler.lock); + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue, + group->run_state); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group, + queue, queue->status_wait); + + if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) { + err = -EIO; + evicted = true; + } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) + && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) { + dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked", + queue->csi_index, group->handle); + } else { + err = scheduler_group_schedule(group); + + if (!err) { + queue->enabled = true; + if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { + if (cs_enabled) { + /* In normal situation, when a queue is + * already running, the queue update + * would be a doorbell kick on user + * side. However, if such a kick is + * shortly following a start or resume, + * the queue may actually in transition + * hence the said kick would enter the + * kernel as the hw_active flag is yet + * to be set. The sheduler needs to + * give a kick to the corresponding + * user door-bell on such a case. + */ + kbase_csf_ring_cs_user_doorbell(kbdev, queue); + } else + program_cs(kbdev, queue, true); + } + queue_delayed_work(system_long_wq, + &kbdev->csf.scheduler.ping_work, + msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); + } + } + + mutex_unlock(&kbdev->csf.scheduler.lock); + + if (evicted) + kbase_csf_term_descheduled_queue_group(group); + + return err; +} + +static enum kbase_csf_csg_slot_state update_csg_slot_status( + struct kbase_device *kbdev, s8 slot) +{ + struct kbase_csf_csg_slot *csg_slot = + &kbdev->csf.scheduler.csg_slots[slot]; + struct kbase_csf_cmd_stream_group_info *ginfo = + &kbdev->csf.global_iface.groups[slot]; + u32 state; + enum kbase_csf_csg_slot_state slot_state; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, + CSG_ACK)); + slot_state = atomic_read(&csg_slot->state); + + switch (slot_state) { + case CSG_SLOT_READY2RUN: + if ((state == CSG_ACK_STATE_START) || + (state == CSG_ACK_STATE_RESUME)) { + slot_state = CSG_SLOT_RUNNING; + atomic_set(&csg_slot->state, slot_state); + csg_slot->trigger_jiffies = jiffies; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state); + dev_dbg(kbdev->dev, "Group %u running on slot %d\n", + csg_slot->resident_group->handle, slot); + } + break; + case CSG_SLOT_DOWN2STOP: + if ((state == CSG_ACK_STATE_SUSPEND) || + (state == CSG_ACK_STATE_TERMINATE)) { + slot_state = CSG_SLOT_STOPPED; + atomic_set(&csg_slot->state, slot_state); + csg_slot->trigger_jiffies = jiffies; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state); + dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n", + csg_slot->resident_group->handle, slot); + } + break; + case CSG_SLOT_DOWN2STOP_TIMEDOUT: + case CSG_SLOT_READY2RUN_TIMEDOUT: + case CSG_SLOT_READY: + case CSG_SLOT_RUNNING: + case CSG_SLOT_STOPPED: + break; + default: + dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state); + break; + } + + return slot_state; +} + +static bool csg_slot_running(struct kbase_device *kbdev, s8 slot) +{ + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING); +} + +static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot) +{ + enum kbase_csf_csg_slot_state slot_state; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + slot_state = update_csg_slot_status(kbdev, slot); + + return (slot_state == CSG_SLOT_STOPPED || + slot_state == CSG_SLOT_READY); +} + +static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot) +{ + struct kbase_csf_cmd_stream_group_info *ginfo = + &kbdev->csf.global_iface.groups[slot]; + u32 state; + + state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, + CSG_ACK)); + + if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) { + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state); + dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot); + return true; + } + + return false; +} + +static void halt_csg_slot(struct kbase_queue_group *group, bool suspend) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + struct kbase_csf_csg_slot *csg_slot = + kbdev->csf.scheduler.csg_slots; + s8 slot; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + return; + + slot = group->csg_nr; + + /* When in transition, wait for it to complete */ + if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) { + long remaining = + kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + + dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot); + remaining = wait_event_timeout(kbdev->csf.event_wait, + csg_slot_running(kbdev, slot), remaining); + if (!remaining) + dev_warn(kbdev->dev, + "slot %d timed out on up-running\n", slot); + } + + if (csg_slot_running(kbdev, slot)) { + unsigned long flags; + struct kbase_csf_cmd_stream_group_info *ginfo = + &global_iface->groups[slot]; + u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND : + CSG_REQ_STATE_TERMINATE; + + dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d", + suspend, group->handle, group->kctx->tgid, group->kctx->id, slot); + + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + /* Set state to SUSPEND/TERMINATE */ + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd, + CSG_REQ_STATE_MASK); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, + flags); + atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP); + csg_slot[slot].trigger_jiffies = jiffies; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd); + + kbase_csf_ring_csg_doorbell(kbdev, slot); + } +} + +static void term_csg_slot(struct kbase_queue_group *group) +{ + halt_csg_slot(group, false); +} + +static void suspend_csg_slot(struct kbase_queue_group *group) +{ + halt_csg_slot(group, true); +} + +/** + * evaluate_sync_update() - Evaluate the sync wait condition the GPU command + * queue has been blocked on. + * + * @queue: Pointer to the GPU command queue + * + * Return: true if sync wait condition is satisfied. + */ +static bool evaluate_sync_update(struct kbase_queue *queue) +{ + struct kbase_vmap_struct *mapping; + bool updated = false; + u32 *sync_ptr; + u32 sync_wait_cond; + u32 sync_current_val; + struct kbase_device *kbdev; + + if (WARN_ON(!queue)) + return false; + + kbdev = queue->kctx->kbdev; + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr, + &mapping); + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group, + queue, queue->sync_ptr); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON, + queue->group, queue, queue->blocked_reason); + + if (!sync_ptr) { + dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed", + queue->sync_ptr); + goto out; + } + + sync_wait_cond = + CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait); + + WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && + (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE)); + + sync_current_val = READ_ONCE(*sync_ptr); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group, + queue, sync_current_val); + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group, + queue, queue->sync_value); + + if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) && + (sync_current_val > queue->sync_value)) || + ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) && + (sync_current_val <= queue->sync_value))) { + /* The sync wait condition is satisfied so the group to which + * queue is bound can be re-scheduled. + */ + updated = true; + } else { + dev_dbg(queue->kctx->kbdev->dev, + "sync memory not updated yet(%u)", sync_current_val); + } + + kbase_phy_alloc_mapping_put(queue->kctx, mapping); +out: + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED, + queue->group, queue, updated); + return updated; +} + +/** + * save_slot_cs() - Save the state for blocked GPU command queue. + * + * @ginfo: Pointer to the CSG interface used by the group + * the queue is bound to. + * @queue: Pointer to the GPU command queue. + * + * This function will check if GPU command queue is blocked on a sync wait and + * evaluate the wait condition. If the wait condition isn't satisfied it would + * save the state needed to reevaluate the condition in future. + * The group to which queue is bound shall be in idle state. + * + * Return: true if the queue is blocked on a sync wait operation. + */ +static +bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo, + struct kbase_queue *queue) +{ + struct kbase_csf_cmd_stream_info *const stream = + &ginfo->streams[queue->csi_index]; + u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT); + bool is_waiting = false; + + KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT, + queue->group, queue, status); + + if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) { + queue->status_wait = status; + queue->sync_ptr = kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT_SYNC_POINTER_LO); + queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; + queue->sync_value = kbase_csf_firmware_cs_output(stream, + CS_STATUS_WAIT_SYNC_VALUE); + + queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( + kbase_csf_firmware_cs_output(stream, + CS_STATUS_SCOREBOARDS)); + queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET( + kbase_csf_firmware_cs_output(stream, + CS_STATUS_BLOCKED_REASON)); + + if (!evaluate_sync_update(queue)) { + is_waiting = true; + } else { + /* Sync object already got updated & met the condition + * thus it doesn't need to be reevaluated and so can + * clear the 'status_wait' here. + */ + queue->status_wait = 0; + } + } else { + /* Invalidate wait status info that would have been recorded if + * this queue was blocked when the group (in idle state) was + * suspended previously. After that the group could have been + * unblocked due to the kicking of another queue bound to it & + * so the wait status info would have stuck with this queue. + */ + queue->status_wait = 0; + } + + return is_waiting; +} + +/** + * Calculate how far in the future an event should be scheduled. + * + * The objective of this function is making sure that a minimum period of + * time is guaranteed between handling two consecutive events. + * + * This function guarantees a minimum period of time between two consecutive + * events: given the minimum period and the distance between the current time + * and the last event, the function returns the difference between the two. + * However, if more time than the minimum period has already elapsed + * since the last event, the function will return 0 to schedule work to handle + * the event with the lowest latency possible. + * + * @last_event: Timestamp of the last event, in jiffies. + * @time_now: Timestamp of the new event to handle, in jiffies. + * Must be successive to last_event. + * @period: Minimum period between two events, in jiffies. + * + * Return: Time to delay work to handle the current event, in jiffies + */ +static unsigned long get_schedule_delay(unsigned long last_event, + unsigned long time_now, + unsigned long period) +{ + const unsigned long t_distance = time_now - last_event; + const unsigned long delay_t = (t_distance < period) ? + (period - t_distance) : 0; + + return delay_t; +} + +static void schedule_in_cycle(struct kbase_queue_group *group, bool force) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + /* Only try to schedule work for this event if no requests are pending, + * otherwise the function will end up canceling previous work requests, + * and scheduler is configured to wake up periodically (or the schedule + * of work needs to be enforced in situation such as entering into + * protected mode). + */ + if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) && + !scheduler->tock_pending_request) { + const unsigned long delay = + get_schedule_delay(scheduler->last_schedule, jiffies, + CSF_SCHEDULER_TIME_TOCK_JIFFIES); + scheduler->tock_pending_request = true; + dev_dbg(kbdev->dev, "Kicking async for group %d\n", + group->handle); + mod_delayed_work(scheduler->wq, &scheduler->tock_work, delay); + } +} + +static +void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, + struct kbase_queue_group *const group, + enum kbase_csf_group_state run_state) +{ + struct kbase_context *const kctx = group->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + + lockdep_assert_held(&scheduler->lock); + + WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); + + if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) + return; + + group->run_state = run_state; + + if (run_state == KBASE_CSF_GROUP_RUNNABLE) + group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; + + list_add_tail(&group->link, + &kctx->csf.sched.runnable_groups[group->priority]); + kctx->csf.sched.num_runnable_grps++; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group, + kctx->csf.sched.num_runnable_grps); + + /* Add the kctx if not yet in runnable kctxs */ + if (kctx->csf.sched.num_runnable_grps == 1) { + /* First runnable csg, adds to the runnable_kctxs */ + INIT_LIST_HEAD(&kctx->csf.link); + list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u); + } + + scheduler->total_runnable_grps++; + + if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && + (scheduler->total_runnable_grps == 1 || + scheduler->state == SCHED_SUSPENDED)) { + dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); + /* Fire a scheduling to start the time-slice */ + enqueue_tick_work(kbdev); + } else + schedule_in_cycle(group, false); + + /* Since a new group has become runnable, check if GPU needs to be + * powered up. + */ + scheduler_wakeup(kbdev, false); +} + +static +void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, + struct kbase_queue_group *group, + enum kbase_csf_group_state run_state) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_queue_group *new_head_grp; + struct list_head *list = + &kctx->csf.sched.runnable_groups[group->priority]; + + lockdep_assert_held(&scheduler->lock); + + WARN_ON(!queue_group_scheduled_locked(group)); + + group->run_state = run_state; + list_del_init(&group->link); + + if (scheduler->top_grp == group) { + /* + * Note: this disables explicit rotation in the next scheduling + * cycle. However, removing the top_grp is the same as an + * implicit rotation (e.g. if we instead rotated the top_ctx + * and then remove top_grp) + * + * This implicit rotation is assumed by the scheduler rotate + * functions. + */ + scheduler->top_grp = NULL; + + /* + * Trigger a scheduling tock for a CSG containing protected + * content in case there has been any in order to minimise + * latency. + */ + group = scheduler_get_protm_enter_async_group(kctx->kbdev, + NULL); + if (group) + schedule_in_cycle(group, true); + } + + kctx->csf.sched.num_runnable_grps--; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group, + kctx->csf.sched.num_runnable_grps); + new_head_grp = (!list_empty(list)) ? + list_first_entry(list, struct kbase_queue_group, link) : + NULL; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp, + 0u); + + if (kctx->csf.sched.num_runnable_grps == 0) { + struct kbase_context *new_head_kctx; + struct list_head *kctx_list = &scheduler->runnable_kctxs; + /* drop the kctx */ + list_del_init(&kctx->csf.link); + if (scheduler->top_ctx == kctx) + scheduler->top_ctx = NULL; + KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx, + 0u); + new_head_kctx = (!list_empty(kctx_list)) ? + list_first_entry(kctx_list, struct kbase_context, csf.link) : + NULL; + KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE, + new_head_kctx, 0u); + } + + WARN_ON(scheduler->total_runnable_grps == 0); + scheduler->total_runnable_grps--; + if (!scheduler->total_runnable_grps) { + dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); + cancel_tick_timer(kctx->kbdev); + WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); + if (scheduler->state != SCHED_SUSPENDED) + queue_work(system_wq, &scheduler->gpu_idle_work); + } + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, + scheduler->num_active_address_spaces | + (((u64)scheduler->total_runnable_grps) << 32)); +} + +static void insert_group_to_idle_wait(struct kbase_queue_group *const group) +{ + struct kbase_context *kctx = group->kctx; + + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + + WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE); + + list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups); + kctx->csf.sched.num_idle_wait_grps++; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group, + kctx->csf.sched.num_idle_wait_grps); + group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC; + dev_dbg(kctx->kbdev->dev, + "Group-%d suspended on sync_wait, total wait_groups: %u\n", + group->handle, kctx->csf.sched.num_idle_wait_grps); +} + +static void remove_group_from_idle_wait(struct kbase_queue_group *const group) +{ + struct kbase_context *kctx = group->kctx; + struct list_head *list = &kctx->csf.sched.idle_wait_groups; + struct kbase_queue_group *new_head_grp; + + lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock); + + WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC); + + list_del_init(&group->link); + WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0); + kctx->csf.sched.num_idle_wait_grps--; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group, + kctx->csf.sched.num_idle_wait_grps); + new_head_grp = (!list_empty(list)) ? + list_first_entry(list, struct kbase_queue_group, link) : + NULL; + KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT, + new_head_grp, 0u); + group->run_state = KBASE_CSF_GROUP_INACTIVE; +} + +static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler, + struct kbase_queue_group *group) +{ + lockdep_assert_held(&scheduler->lock); + + if (WARN_ON(!group)) + return; + + remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE); + insert_group_to_idle_wait(group); +} + +static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { + int new_val = + atomic_dec_return(&scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, + group, new_val); + } +} + +static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + WARN_ON(group->csg_nr < 0); + + if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) { + int new_val = + atomic_dec_return(&scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, + group, new_val); + } +} + +static void update_offslot_non_idle_cnt_on_grp_suspend( + struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (scheduler->state == SCHED_BUSY) { + /* active phase or, async entering the protected mode */ + if (group->prepared_seq_num >= + scheduler->non_idle_scanout_grps) { + /* At scanout, it was tagged as on-slot idle */ + if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { + int new_val = atomic_inc_return( + &scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, + group, new_val); + } + } else { + if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) { + int new_val = atomic_dec_return( + &scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, + group, new_val); + } + } + } else { + /* async phases */ + if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) { + int new_val = atomic_inc_return( + &scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, + group, new_val); + } + } +} + +static bool confirm_cmd_buf_empty(struct kbase_queue *queue) +{ + bool cs_empty; + bool cs_idle; + u32 sb_status = 0; + + struct kbase_device const *const kbdev = queue->group->kctx->kbdev; + struct kbase_csf_global_iface const *const iface = + &kbdev->csf.global_iface; + + u32 glb_version = iface->version; + + u64 *input_addr = (u64 *)queue->user_io_addr; + u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); + + if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { + /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[queue->group->csg_nr]; + struct kbase_csf_cmd_stream_info const *const stream = + &ginfo->streams[queue->csi_index]; + + sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET( + kbase_csf_firmware_cs_output(stream, + CS_STATUS_SCOREBOARDS)); + } + + cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == + output_addr[CS_EXTRACT_LO / sizeof(u64)]); + cs_idle = cs_empty && (!sb_status); + + return cs_idle; +} + +static void save_csg_slot(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_csf_cmd_stream_group_info *ginfo; + u32 state; + + lockdep_assert_held(&scheduler->lock); + + if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + return; + + ginfo = &kbdev->csf.global_iface.groups[group->csg_nr]; + + state = + CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK)); + + if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) && + (state != CSG_ACK_STATE_TERMINATE))) { + u32 max_streams = ginfo->stream_num; + u32 i; + bool sync_wait = false; + bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & + CSG_STATUS_STATE_IDLE_MASK; + for (i = 0; idle && i < max_streams; i++) { + struct kbase_queue *const queue = + group->bound_queues[i]; + + if (!queue || !queue->enabled) + continue; + + if (save_slot_cs(ginfo, queue)) + sync_wait = true; + else { + /* Need to confirm if ringbuffer of the GPU + * queue is empty or not. A race can arise + * between the flush of GPU queue and suspend + * of CSG. If a queue is flushed after FW has + * set the IDLE bit in CSG_STATUS_STATE, then + * Scheduler will incorrectly consider CSG + * as idle. And there may not be any further + * flush call for the GPU queue, which would + * have de-idled the CSG. + */ + idle = confirm_cmd_buf_empty(queue); + } + } + + if (idle) { + /* Take the suspended group out of the runnable_groups + * list of the context and move it to the + * idle_wait_groups list. + */ + if (sync_wait) + deschedule_idle_wait_group(scheduler, group); + else { + group->run_state = + KBASE_CSF_GROUP_SUSPENDED_ON_IDLE; + dev_dbg(kbdev->dev, "Group-%d suspended: idle", + group->handle); + } + } else { + group->run_state = KBASE_CSF_GROUP_SUSPENDED; + } + + update_offslot_non_idle_cnt_on_grp_suspend(group); + } +} + +/* Cleanup_csg_slot after it has been vacated, ready for next csg run. + * Return whether there is a kctx address fault associated with the group + * for which the clean-up is done. + */ +static bool cleanup_csg_slot(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + struct kbase_csf_cmd_stream_group_info *ginfo; + s8 slot; + struct kbase_csf_csg_slot *csg_slot; + unsigned long flags; + u32 i; + bool as_fault = false; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + return as_fault; + + slot = group->csg_nr; + csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; + ginfo = &global_iface->groups[slot]; + + /* Now loop through all the bound CSs, and clean them via a stop */ + for (i = 0; i < ginfo->stream_num; i++) { + struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i]; + + if (group->bound_queues[i]) { + if (group->bound_queues[i]->enabled) { + kbase_csf_firmware_cs_input_mask(stream, + CS_REQ, CS_REQ_STATE_STOP, + CS_REQ_STATE_MASK); + } + + unassign_user_doorbell_from_queue(kbdev, + group->bound_queues[i]); + } + } + + unassign_user_doorbell_from_group(kbdev, group); + + /* The csg does not need cleanup other than drop its AS */ + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT); + kbase_ctx_sched_release_ctx(kctx); + if (unlikely(group->faulted)) + as_fault = true; + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + + /* now marking the slot is vacant */ + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; + clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, + kbdev->csf.scheduler.csg_slots_idle_mask[0]); + + group->csg_nr = KBASEP_CSG_NR_INVALID; + set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask); + clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + + csg_slot->trigger_jiffies = jiffies; + atomic_set(&csg_slot->state, CSG_SLOT_READY); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot); + dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n", + group->handle, slot); + + KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, slot); + + return as_fault; +} + +static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_csg_slot *csg_slot; + struct kbase_csf_cmd_stream_group_info *ginfo; + s8 slot; + u8 prev_prio; + u32 ep_cfg; + u32 csg_req; + unsigned long flags; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group))) + return; + + slot = group->csg_nr; + csg_slot = &kbdev->csf.scheduler.csg_slots[slot]; + ginfo = &kbdev->csf.global_iface.groups[slot]; + + /* CSGs remaining on-slot can be either idle or runnable. + * This also applies in protected mode. + */ + WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) || + (group->run_state == KBASE_CSF_GROUP_IDLE))); + + /* Update consumes a group from scanout */ + update_offslot_non_idle_cnt_for_onslot_grp(group); + + if (csg_slot->priority == prio) + return; + + /* Read the csg_ep_cfg back for updating the priority field */ + ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); + prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); + ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + csg_req ^= CSG_REQ_EP_CFG_MASK; + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, + CSG_REQ_EP_CFG_MASK); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + + csg_slot->priority = prio; + + dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n", + group->handle, group->kctx->tgid, group->kctx->id, slot, + prev_prio, prio); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio); + + kbase_csf_ring_csg_doorbell(kbdev, slot); + set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update); +} + +static void program_csg_slot(struct kbase_queue_group *group, s8 slot, + u8 prio) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface; + const u64 shader_core_mask = + kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); + const u64 tiler_core_mask = + kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER); + const u64 compute_mask = shader_core_mask & group->compute_mask; + const u64 fragment_mask = shader_core_mask & group->fragment_mask; + const u64 tiler_mask = tiler_core_mask & group->tiler_mask; + const u8 num_cores = kbdev->gpu_props.num_cores; + const u8 compute_max = min(num_cores, group->compute_max); + const u8 fragment_max = min(num_cores, group->fragment_max); + const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); + struct kbase_csf_cmd_stream_group_info *ginfo; + u32 ep_cfg = 0; + u32 csg_req; + u32 state; + int i; + unsigned long flags; + const u64 normal_suspend_buf = + group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT; + struct kbase_csf_csg_slot *csg_slot = + &kbdev->csf.scheduler.csg_slots[slot]; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (WARN_ON(slot < 0) && + WARN_ON(slot >= global_iface->group_num)) + return; + + WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY); + + ginfo = &global_iface->groups[slot]; + + /* Pick an available address space for this context */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_ctx_sched_retain_ctx(kctx); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + if (kctx->as_nr == KBASEP_AS_NR_INVALID) { + dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n", + group->handle, kctx->tgid, kctx->id, slot); + return; + } + + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap); + kbdev->csf.scheduler.csg_slots[slot].resident_group = group; + group->csg_nr = slot; + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + + assign_user_doorbell_to_group(kbdev, group); + + /* Now loop through all the bound & kicked CSs, and program them */ + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { + struct kbase_queue *queue = group->bound_queues[i]; + + if (queue) + program_cs(kbdev, queue, false); + } + + + /* Endpoint programming for CSG */ + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO, + compute_mask & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI, + compute_mask >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO, + fragment_mask & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, + fragment_mask >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, + tiler_mask & U32_MAX); + + + ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max); + ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); + ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); + ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + + /* Program the address space number assigned to the context */ + kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); + + kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO, + normal_suspend_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI, + normal_suspend_buf >> 32); + + if (group->protected_suspend_buf.reg) { + const u64 protm_suspend_buf = + group->protected_suspend_buf.reg->start_pfn << + PAGE_SHIFT; + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO, + protm_suspend_buf & U32_MAX); + kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI, + protm_suspend_buf >> 32); + } + + /* Enable all interrupts for now */ + kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0)); + + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + csg_req ^= CSG_REQ_EP_CFG_MASK; + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, + CSG_REQ_EP_CFG_MASK); + + /* Set state to START/RESUME */ + if (queue_group_suspended_locked(group)) { + state = CSG_REQ_STATE_RESUME; + } else { + WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE); + state = CSG_REQ_STATE_START; + } + + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, + state, CSG_REQ_STATE_MASK); + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); + + /* Update status before rings the door-bell, marking ready => run */ + atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN); + csg_slot->trigger_jiffies = jiffies; + csg_slot->priority = prio; + + /* Trace the programming of the CSG on the slot */ + KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, group->handle, slot); + + dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n", + group->handle, kctx->tgid, kctx->id, slot, prio); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group, + (((u64)ep_cfg) << 32) | + ((((u32)kctx->as_nr) & 0xF) << 16) | + (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT))); + + kbase_csf_ring_csg_doorbell(kbdev, slot); + + /* Programming a slot consumes a group from scanout */ + update_offslot_non_idle_cnt_for_onslot_grp(group); +} + +static void remove_scheduled_group(struct kbase_device *kbdev, + struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + WARN_ON(group->prepared_seq_num == + KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID); + WARN_ON(list_empty(&group->link_to_schedule)); + + list_del_init(&group->link_to_schedule); + scheduler->ngrp_to_schedule--; + group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID; + group->kctx->csf.sched.ngrp_to_schedule--; +} + +static void sched_evict_group(struct kbase_queue_group *group, bool fault, + bool update_non_idle_offslot_grps_cnt) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (queue_group_scheduled_locked(group)) { + u32 i; + + if (update_non_idle_offslot_grps_cnt && + (group->run_state == KBASE_CSF_GROUP_SUSPENDED || + group->run_state == KBASE_CSF_GROUP_RUNNABLE)) { + int new_val = atomic_dec_return( + &scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, + group, new_val); + } + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { + if (group->bound_queues[i]) + group->bound_queues[i]->enabled = false; + } + + if (group->prepared_seq_num != + KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) + remove_scheduled_group(kbdev, group); + + if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) + remove_group_from_idle_wait(group); + else { + remove_group_from_runnable(scheduler, group, + KBASE_CSF_GROUP_INACTIVE); + } + + WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE); + + if (fault) + group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group, + (((u64)scheduler->total_runnable_grps) << 32) | + ((u32)group->run_state)); + dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n", + group->handle, scheduler->total_runnable_grps); + /* Notify a group has been evicted */ + wake_up_all(&kbdev->csf.event_wait); + } +} + +static int term_group_sync(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + int err = 0; + + term_csg_slot(group); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + csg_slot_stopped_locked(kbdev, group->csg_nr), remaining); + + if (!remaining) { + dev_warn(kbdev->dev, "term request timed out for group %d of context %d_%d on slot %d", + group->handle, group->kctx->tgid, + group->kctx->id, group->csg_nr); + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + err = -ETIMEDOUT; + } + + return err; +} + +void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group) +{ + struct kbase_device *kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + long remaining = + kbase_csf_timeout_in_jiffies(CSG_SCHED_STOP_TIMEOUT_MS); + bool force = false; + + kbase_reset_gpu_assert_failed_or_prevented(kbdev); + lockdep_assert_held(&group->kctx->csf.lock); + mutex_lock(&scheduler->lock); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state); + while (queue_group_scheduled_locked(group)) { + u32 saved_state = scheduler->state; + + if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) { + sched_evict_group(group, false, true); + } else if (saved_state == SCHED_INACTIVE || force) { + bool as_faulty; + + term_group_sync(group); + /* Treat the csg been terminated */ + as_faulty = cleanup_csg_slot(group); + /* remove from the scheduler list */ + sched_evict_group(group, as_faulty, false); + } + + /* waiting scheduler state to change */ + if (queue_group_scheduled_locked(group)) { + mutex_unlock(&scheduler->lock); + remaining = wait_event_timeout( + kbdev->csf.event_wait, + saved_state != scheduler->state, + remaining); + if (!remaining) { + dev_warn(kbdev->dev, "Scheduler state change wait timed out for group %d on slot %d", + group->handle, group->csg_nr); + force = true; + } + mutex_lock(&scheduler->lock); + } + } + + mutex_unlock(&scheduler->lock); +} + +/** + * scheduler_group_schedule() - Schedule a GPU command queue group on firmware + * + * @group: Pointer to the queue group to be scheduled. + * + * This function would enable the scheduling of GPU command queue group on + * firmware. + * + * Return: 0 on success, or negative on failure. + */ +static int scheduler_group_schedule(struct kbase_queue_group *group) +{ + struct kbase_context *kctx = group->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&kctx->csf.lock); + lockdep_assert_held(&scheduler->lock); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state); + if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) + update_idle_suspended_group_state(group); + else if (queue_group_idle_locked(group)) { + WARN_ON(kctx->csf.sched.num_runnable_grps == 0); + WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0); + + if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) + update_idle_suspended_group_state(group); + else { + struct kbase_queue_group *protm_grp; + unsigned long flags; + + WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked( + group)); + + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + + /* A normal mode CSG could be idle onslot during + * protected mode. In this case clear the + * appropriate bit in csg_slots_idle_mask. + */ + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + if (protm_grp && protm_grp != group) { + clear_bit((unsigned int)group->csg_nr, + scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, + scheduler->csg_slots_idle_mask[0]); + } + spin_unlock_irqrestore(&scheduler->interrupt_lock, + flags); + + /* If GPU is in protected mode then any doorbells rang + * would have no effect. Check if GPU is in protected + * mode and if this group has higher priority than the + * active protected mode group. If so prompt the FW + * to exit protected mode. + */ + if (protm_grp && + group->scan_seq_num < protm_grp->scan_seq_num) { + /* Prompt the FW to exit protected mode */ + scheduler_force_protm_exit(kbdev); + } + } + } else if (!queue_group_scheduled_locked(group)) { + int new_val; + insert_group_to_runnable(&kbdev->csf.scheduler, group, + KBASE_CSF_GROUP_RUNNABLE); + /* A new group into the scheduler */ + new_val = atomic_inc_return( + &kbdev->csf.scheduler.non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, + group, new_val); + } + + /* Since a group has become active now, check if GPU needs to be + * powered up. Also rekick the Scheduler. + */ + scheduler_wakeup(kbdev, true); + + return 0; +} + +/** + * set_max_csg_slots() - Set the number of available CSG slots + * + * @kbdev: Pointer of the GPU device. + * + * This function would set/limit the number of CSG slots that + * can be used in the given tick/tock. It would be less than the total CSG + * slots supported by firmware if the number of GPU address space slots + * required to utilize all the CSG slots is more than the available + * address space slots. + */ +static inline void set_max_csg_slots(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; + unsigned int max_address_space_slots = + kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; + + WARN_ON(scheduler->num_active_address_spaces > total_csg_slots); + + if (likely(scheduler->num_active_address_spaces <= + max_address_space_slots)) + scheduler->num_csg_slots_for_tick = total_csg_slots; +} + +/** + * count_active_address_space() - Count the number of GPU address space slots + * + * @kbdev: Pointer of the GPU device. + * @kctx: Pointer of the Kbase context. + * + * This function would update the counter that is tracking the number of GPU + * address space slots that would be required to program the CS + * group slots from the groups at the head of groups_to_schedule list. + */ +static inline void count_active_address_space(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned int total_csg_slots = kbdev->csf.global_iface.group_num; + unsigned int max_address_space_slots = + kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS; + + if (scheduler->ngrp_to_schedule <= total_csg_slots) { + if (kctx->csf.sched.ngrp_to_schedule == 1) + scheduler->num_active_address_spaces++; + + if (scheduler->num_active_address_spaces <= + max_address_space_slots) + scheduler->num_csg_slots_for_tick++; + } +} + +/* Two schemes are used in assigning the priority to CSG slots for a given + * CSG from the 'groups_to_schedule' list. + * This is needed as an idle on-slot group is deprioritized by moving it to + * the tail of 'groups_to_schedule' list. As a result it can either get + * evicted from the CSG slot in current tick/tock dealing, or its position + * can be after the lower priority non-idle groups in the 'groups_to_schedule' + * list. The latter case can result in the on-slot subset containing both + * non-idle and idle CSGs, and is handled through the 2nd scheme described + * below. + * + * First scheme :- If all the slots are going to be occupied by the non-idle or + * idle groups, then a simple assignment of the priority is done as per the + * position of a group in the 'groups_to_schedule' list. So maximum priority + * gets assigned to the slot of a group which is at the head of the list. + * Here the 'groups_to_schedule' list would effectively be ordered as per the + * static priority of groups. + * + * Second scheme :- If the slots are going to be occupied by a mix of idle and + * non-idle groups then the priority assignment needs to ensure that the + * priority of a slot belonging to a higher priority idle group will always be + * greater than the priority of a slot belonging to a lower priority non-idle + * group, reflecting the original position of a group in the scan order (i.e + * static priority) 'scan_seq_num', which is set during the prepare phase of a + * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it + * is idle. + * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first + * 'slots_for_tick' groups in the original scan order are assigned a priority in + * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick), + * whereas rest of the groups are assigned the priority in the subrange + * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher + * priority group ends up after the non-idle lower priority groups in the + * 'groups_to_schedule' list, it will get a higher slot priority. And this will + * enable the FW to quickly start the execution of higher priority group when it + * gets de-idled. + */ +static u8 get_slot_priority(struct kbase_queue_group *group) +{ + struct kbase_csf_scheduler *scheduler = + &group->kctx->kbdev->csf.scheduler; + u8 slot_prio; + u32 slots_for_tick = scheduler->num_csg_slots_for_tick; + u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots; + /* Check if all the slots are going to be occupied by the non-idle or + * idle groups. + */ + if (scheduler->non_idle_scanout_grps >= slots_for_tick || + !scheduler->non_idle_scanout_grps) { + slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots); + } else { + /* There will be a mix of idle and non-idle groups. */ + if (group->scan_seq_num < slots_for_tick) + slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - + group->scan_seq_num); + else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots)) + slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots)); + else + slot_prio = 0; + } + return slot_prio; +} + +/** + * update_resident_groups_priority() - Update the priority of resident groups + * + * @kbdev: The GPU device. + * + * This function will update the priority of all resident queue groups + * that are at the head of groups_to_schedule list, preceding the first + * non-resident group. + * + * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on + * the priority update. + */ +static void update_resident_groups_priority(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u32 num_groups = scheduler->num_csg_slots_for_tick; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + while (!list_empty(&scheduler->groups_to_schedule)) { + struct kbase_queue_group *group = + list_first_entry(&scheduler->groups_to_schedule, + struct kbase_queue_group, + link_to_schedule); + bool resident = + kbasep_csf_scheduler_group_is_on_slot_locked(group); + + if ((group->prepared_seq_num >= num_groups) || !resident) + break; + + update_csg_slot_priority(group, + get_slot_priority(group)); + + /* Drop the head group from the list */ + remove_scheduled_group(kbdev, group); + scheduler->remaining_tick_slots--; + } +} + +/** + * program_group_on_vacant_csg_slot() - Program a non-resident group on the + * given vacant CSG slot. + * @kbdev: Pointer to the GPU device. + * @slot: Vacant CSG slot number. + * + * This function will program a non-resident group at the head of + * kbase_csf_scheduler.groups_to_schedule list on the given vacant + * CSG slot, provided the initial position of the non-resident + * group in the list is less than the number of CSG slots and there is + * an available GPU address space slot. + * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after + * programming the slot. + */ +static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev, + s8 slot) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *const group = + list_empty(&scheduler->groups_to_schedule) ? NULL : + list_first_entry(&scheduler->groups_to_schedule, + struct kbase_queue_group, + link_to_schedule); + u32 num_groups = scheduler->num_csg_slots_for_tick; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + if (group && (group->prepared_seq_num < num_groups)) { + bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group); + + if (!WARN_ON(ret)) { + if (kctx_as_enabled(group->kctx) && !group->faulted) { + program_csg_slot(group, slot, + get_slot_priority(group)); + + if (likely(csg_slot_in_use(kbdev, slot))) { + /* Drop the head group from the list */ + remove_scheduled_group(kbdev, group); + scheduler->remaining_tick_slots--; + } + } else { + update_offslot_non_idle_cnt_for_faulty_grp( + group); + remove_scheduled_group(kbdev, group); + } + } + } +} + +/** + * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident + * group and update the priority of resident groups. + * + * @kbdev: Pointer to the GPU device. + * @slot: Vacant CSG slot number. + * + * This function will first update the priority of all resident queue groups + * that are at the head of groups_to_schedule list, preceding the first + * non-resident group, it will then try to program the given CS + * group slot with the non-resident group. Finally update the priority of all + * resident queue groups following the non-resident group. + * + * kbase_csf_scheduler.remaining_tick_slots would also be adjusted. + */ +static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_csf_csg_slot *const csg_slot = + scheduler->csg_slots; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY); + + /* First update priority for already resident groups (if any) + * before the non-resident group + */ + update_resident_groups_priority(kbdev); + + /* Now consume the vacant slot for the non-resident group */ + program_group_on_vacant_csg_slot(kbdev, slot); + + /* Now update priority for already resident groups (if any) + * following the non-resident group + */ + update_resident_groups_priority(kbdev); +} + +static bool slots_state_changed(struct kbase_device *kbdev, + unsigned long *slots_mask, + bool (*state_check_func)(struct kbase_device *, s8)) +{ + u32 num_groups = kbdev->csf.global_iface.group_num; + DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0}; + bool changed = false; + u32 i; + + for_each_set_bit(i, slots_mask, num_groups) { + if (state_check_func(kbdev, (s8)i)) { + set_bit(i, changed_slots); + changed = true; + } + } + + if (changed) + bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS); + + return changed; +} + +/** + * program_suspending_csg_slots() - Program the CSG slots vacated on suspension + * of queue groups running on them. + * + * @kbdev: Pointer to the GPU device. + * + * This function will first wait for the ongoing suspension to complete on a + * CSG slot and will then program the vacant slot with the + * non-resident queue group inside the groups_to_schedule list. + * The programming of the non-resident queue group on the vacant slot could + * fail due to unavailability of free GPU address space slot and so the + * programming is re-attempted after the ongoing suspension has completed + * for all the CSG slots. + * The priority of resident groups before and after the non-resident group + * in the groups_to_schedule list would also be updated. + * This would be repeated for all the slots undergoing suspension. + * GPU reset would be initiated if the wait for suspend times out. + */ +static void program_suspending_csg_slots(struct kbase_device *kbdev) +{ + u32 num_groups = kbdev->csf.global_iface.group_num; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); + DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; + bool suspend_wait_failed = false; + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* In the current implementation, csgs_events_enable_mask would be used + * only to indicate suspending CSGs. + */ + bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask, + MAX_SUPPORTED_CSGS); + + while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { + DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + + bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + slots_state_changed(kbdev, changed, + csg_slot_stopped_raw), + remaining); + + if (remaining) { + u32 i; + + for_each_set_bit(i, changed, num_groups) { + struct kbase_queue_group *group = + scheduler->csg_slots[i].resident_group; + + if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) { + continue; + } + /* The on slot csg is now stopped */ + clear_bit(i, slot_mask); + + if (likely(group)) { + bool as_fault; + /* Only do save/cleanup if the + * group is not terminated during + * the sleep. + */ + save_csg_slot(group); + as_fault = cleanup_csg_slot(group); + /* If AS fault detected, evict it */ + if (as_fault) { + sched_evict_group(group, true, true); + set_bit(i, evicted_mask); + } + } + + program_vacant_csg_slot(kbdev, (s8)i); + } + } else { + u32 i; + + /* Groups that have failed to suspend in time shall + * raise a fatal error as they could no longer be + * safely resumed. + */ + for_each_set_bit(i, slot_mask, num_groups) { + struct kbase_queue_group *const group = + scheduler->csg_slots[i].resident_group; + + struct base_gpu_queue_group_error const + err_payload = { .error_type = + BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { + .fatal_group = { + .status = + GPU_EXCEPTION_TYPE_SW_FAULT_2, + } } }; + + if (unlikely(group == NULL)) + continue; + + kbase_csf_add_group_fatal_error(group, + &err_payload); + kbase_event_wakeup(group->kctx); + + /* TODO GPUCORE-25328: The CSG can't be + * terminated, the GPU will be reset as a + * work-around. + */ + dev_warn( + kbdev->dev, + "Group %d of context %d_%d on slot %u failed to suspend", + group->handle, group->kctx->tgid, + group->kctx->id, i); + + /* The group has failed suspension, stop + * further examination. + */ + clear_bit(i, slot_mask); + set_bit(i, scheduler->csgs_events_enable_mask); + update_offslot_non_idle_cnt_for_onslot_grp( + group); + } + + suspend_wait_failed = true; + } + } + + if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS)) + dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n", + num_groups, evicted_mask); + + if (likely(!suspend_wait_failed)) { + u32 i; + + while (scheduler->ngrp_to_schedule && + scheduler->remaining_tick_slots) { + i = find_first_zero_bit(scheduler->csg_inuse_bitmap, + num_groups); + if (WARN_ON(i == num_groups)) + break; + program_vacant_csg_slot(kbdev, (s8)i); + if (!csg_slot_in_use(kbdev, (int)i)) { + dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i); + break; + } + } + } else { + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + } +} + +static void suspend_queue_group(struct kbase_queue_group *group) +{ + unsigned long flags; + struct kbase_csf_scheduler *const scheduler = + &group->kctx->kbdev->csf.scheduler; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + /* This shall be used in program_suspending_csg_slots() where we + * assume that whilst CSGs are being suspended, this bitmask is not + * used by anything else i.e., it indicates only the CSGs going + * through suspension. + */ + clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* If AS fault detected, terminate the group */ + if (!kctx_as_enabled(group->kctx) || group->faulted) + term_csg_slot(group); + else + suspend_csg_slot(group); +} + +static void wait_csg_slots_start(struct kbase_device *kbdev) +{ + u32 num_groups = kbdev->csf.global_iface.group_num; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + u32 i; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* extract start slot flags for check */ + for (i = 0; i < num_groups; i++) { + if (atomic_read(&scheduler->csg_slots[i].state) == + CSG_SLOT_READY2RUN) + set_bit(i, slot_mask); + } + + while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { + DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + + bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + slots_state_changed(kbdev, changed, csg_slot_running), + remaining); + + if (remaining) { + for_each_set_bit(i, changed, num_groups) { + struct kbase_queue_group *group = + scheduler->csg_slots[i].resident_group; + + /* The on slot csg is now running */ + clear_bit(i, slot_mask); + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + } + } else { + dev_warn(kbdev->dev, "Timed out waiting for CSG slots to start, slots: 0x%*pb\n", + num_groups, slot_mask); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + break; + } + } +} + +/** + * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state + * flagged after the completion of a CSG status + * update command + * + * This function is called at the start of scheduling tick to check the + * idle status of a queue group resident on a CSG slot. + * The caller must make sure the corresponding status update command has + * been called and completed before checking this status. + * + * @kbdev: Pointer to the GPU device. + * @slot: The given slot for checking an occupying resident group's idle + * state. + * + * Return: true if the group resident on slot is idle, otherwise false. + */ +static bool group_on_slot_is_idle(struct kbase_device *kbdev, + unsigned long slot) +{ + struct kbase_csf_cmd_stream_group_info *ginfo = + &kbdev->csf.global_iface.groups[slot]; + bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) & + CSG_STATUS_STATE_IDLE_MASK; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + return idle; +} + +/** + * slots_update_state_changed() - Check the handshake state of a subset of + * command group slots. + * + * Checks the state of a subset of slots selected through the slots_mask + * bit_map. Records which slots' handshake completed and send it back in the + * slots_done bit_map. + * + * @kbdev: The GPU device. + * @field_mask: The field mask for checking the state in the csg_req/ack. + * @slots_mask: A bit_map specifying the slots to check. + * @slots_done: A cleared bit_map for returning the slots that + * have finished update. + * + * Return: true if the slots_done is set for at least one slot. + * Otherwise false. + */ +static +bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask, + const unsigned long *slots_mask, unsigned long *slots_done) +{ + u32 num_groups = kbdev->csf.global_iface.group_num; + bool changed = false; + u32 i; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + for_each_set_bit(i, slots_mask, num_groups) { + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[i]; + u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); + + state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + + if (!(state & field_mask)) { + set_bit(i, slots_done); + changed = true; + } + } + + return changed; +} + +/** + * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on + * the specified groups. + * + * This function waits for the acknowledgement of the request that have + * already been placed for the CSG slots by the caller. Currently used for + * the CSG priority update and status update requests. + * + * @kbdev: Pointer to the GPU device. + * @field_mask: The field mask for checking the state in the csg_req/ack. + * @slot_mask: Bitmap reflecting the slots, the function will modify + * the acknowledged slots by clearing their corresponding + * bits. + * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out. + * + * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For + * timed out condition with unacknowledged slots, their bits remain + * set in the slot_mask. + */ +static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev, + u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies) +{ + const u32 num_groups = kbdev->csf.global_iface.group_num; + long remaining = wait_in_jiffies; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + while (!bitmap_empty(slot_mask, num_groups) && + !kbase_reset_gpu_is_active(kbdev)) { + DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 }; + + remaining = wait_event_timeout(kbdev->csf.event_wait, + slots_update_state_changed(kbdev, field_mask, + slot_mask, dones), + remaining); + + if (remaining) + bitmap_andnot(slot_mask, slot_mask, dones, num_groups); + else + /* Timed-out on the wait */ + return -ETIMEDOUT; + } + + return 0; +} + +static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev) +{ + unsigned long *slot_mask = + kbdev->csf.scheduler.csg_slots_prio_update; + long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK, + slot_mask, wait_time); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (ret != 0) { + /* The update timeout is not regarded as a serious + * issue, no major consequences are expected as a + * result, so just warn the case. + */ + dev_warn( + kbdev->dev, + "Timeout on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx", + slot_mask[0]); + } +} + +void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, + struct kbase_context *kctx, struct list_head *evicted_groups) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *group; + u32 num_groups = kbdev->csf.global_iface.group_num; + u32 slot; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + + lockdep_assert_held(&kctx->csf.lock); + mutex_lock(&scheduler->lock); + + /* This code is only called during reset, so we don't wait for the CSG + * slots to be stopped + */ + WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + + KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u); + for (slot = 0; slot < num_groups; slot++) { + group = kbdev->csf.scheduler.csg_slots[slot].resident_group; + if (group && group->kctx == kctx) { + bool as_fault; + + term_csg_slot(group); + as_fault = cleanup_csg_slot(group); + /* remove the group from the scheduler list */ + sched_evict_group(group, as_fault, false); + /* return the evicted group to the caller */ + list_add_tail(&group->link, evicted_groups); + set_bit(slot, slot_mask); + } + } + + dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n", + kctx->tgid, kctx->id, num_groups, slot_mask); + + mutex_unlock(&scheduler->lock); +} + +/** + * scheduler_slot_protm_ack - Acknowledging the protected region requests + * from the resident group on a given slot. + * + * The function assumes that the given slot is in stable running state and + * has already been judged by the caller on that any pending protected region + * requests of the resident group should be acknowledged. + * + * @kbdev: Pointer to the GPU device. + * @group: Pointer to the resident group on the given slot. + * @slot: The slot that the given group is actively operating on. + * + * Return: true if the group has pending protm request(s) and is acknowledged. + * The caller should arrange to enter the protected mode for servicing + * it. Otherwise return false, indicating the group has no pending protm + * request. + */ +static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev, + struct kbase_queue_group *const group, + const int slot) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + bool protm_ack = false; + struct kbase_csf_cmd_stream_group_info *ginfo = + &kbdev->csf.global_iface.groups[slot]; + u32 max_csi; + int i; + + if (WARN_ON(scheduler->csg_slots[slot].resident_group != group)) + return protm_ack; + + lockdep_assert_held(&scheduler->lock); + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock); + + max_csi = ginfo->stream_num; + for (i = find_first_bit(group->protm_pending_bitmap, max_csi); + i < max_csi; + i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) { + struct kbase_queue *queue = group->bound_queues[i]; + + clear_bit(i, group->protm_pending_bitmap); + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group, + queue, group->protm_pending_bitmap[0]); + + if (!WARN_ON(!queue) && queue->enabled) { + struct kbase_csf_cmd_stream_info *stream = + &ginfo->streams[i]; + u32 cs_protm_ack = kbase_csf_firmware_cs_output( + stream, CS_ACK) & + CS_ACK_PROTM_PEND_MASK; + u32 cs_protm_req = kbase_csf_firmware_cs_input_read( + stream, CS_REQ) & + CS_REQ_PROTM_PEND_MASK; + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group, + queue, cs_protm_ack ^ cs_protm_req); + + if (cs_protm_ack == cs_protm_req) { + dev_dbg(kbdev->dev, + "PROTM-ack already done for queue-%d group-%d slot-%d", + queue->csi_index, group->handle, slot); + continue; + } + + kbase_csf_firmware_cs_input_mask(stream, CS_REQ, + cs_protm_ack, + CS_ACK_PROTM_PEND_MASK); + protm_ack = true; + dev_dbg(kbdev->dev, + "PROTM-ack for queue-%d, group-%d slot-%d", + queue->csi_index, group->handle, slot); + } + } + + return protm_ack; +} + +/** + * scheduler_group_check_protm_enter - Request the given group to be evaluated + * for triggering the protected mode. + * + * The function assumes the given group is either an active running group or + * the scheduler internally maintained field scheduler->top_grp. + * + * If the GPU is not already running in protected mode and the input group + * has protected region requests from its bound queues, the requests are + * acknowledged and the GPU is instructed to enter the protected mode. + * + * @kbdev: Pointer to the GPU device. + * @input_grp: Pointer to the GPU queue group. + */ +static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev, + struct kbase_queue_group *const input_grp) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + bool protm_in_use; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + + protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp, + protm_in_use); + + /* Firmware samples the PROTM_PEND ACK bit for CSs when + * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit + * is set for a CS after Host has sent the PROTM_ENTER + * Global request, then there is no guarantee that firmware will + * notice that prior to switching to protected mode. And firmware + * may not again raise the PROTM_PEND interrupt for that CS + * later on. To avoid that uncertainty PROTM_PEND ACK bit + * is not set for a CS if the request to enter protected + * mode has already been sent. It will be set later (after the exit + * from protected mode has taken place) when the group to which + * CS is bound becomes the top group. + * + * The actual decision of entering protected mode is hinging on the + * input group is the top priority group, or, in case the previous + * top-group is evicted from the scheduler during the tick, its would + * be replacement, and that it is currently in a stable state (i.e. the + * slot state is running). + */ + if (!protm_in_use && !WARN_ON(!input_grp)) { + const int slot = + kbase_csf_scheduler_group_get_slot_locked(input_grp); + + /* check the input_grp is running and requesting protected mode + */ + if (slot >= 0 && + atomic_read(&scheduler->csg_slots[slot].state) == + CSG_SLOT_RUNNING) { + if (kctx_as_enabled(input_grp->kctx) && + scheduler_slot_protm_ack(kbdev, input_grp, slot)) { + /* Option of acknowledging to multiple + * CSGs from the same kctx is dropped, + * after consulting with the + * architecture team. See the comment in + * GPUCORE-21394. + */ + + /* Disable the idle timer */ + disable_gpu_idle_fw_timer_locked(kbdev); + + /* Switch to protected mode */ + scheduler->active_protm_grp = input_grp; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM, + input_grp, 0u); + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + kbase_csf_enter_protected_mode(kbdev); + return; + } + } + } + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +static void scheduler_apply(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + const u32 total_csg_slots = kbdev->csf.global_iface.group_num; + const u32 available_csg_slots = scheduler->num_csg_slots_for_tick; + u32 suspend_cnt = 0; + u32 remain_cnt = 0; + u32 resident_cnt = 0; + struct kbase_queue_group *group; + u32 i; + u32 spare; + + lockdep_assert_held(&scheduler->lock); + + /* Suspend those resident groups not in the run list */ + for (i = 0; i < total_csg_slots; i++) { + group = scheduler->csg_slots[i].resident_group; + if (group) { + resident_cnt++; + if (group->prepared_seq_num >= available_csg_slots) { + suspend_queue_group(group); + suspend_cnt++; + } else + remain_cnt++; + } + } + + /* Initialize the remaining avialable csg slots for the tick/tock */ + scheduler->remaining_tick_slots = available_csg_slots; + + /* If there are spare slots, apply heads in the list */ + spare = (available_csg_slots > resident_cnt) ? + (available_csg_slots - resident_cnt) : 0; + while (!list_empty(&scheduler->groups_to_schedule)) { + group = list_first_entry(&scheduler->groups_to_schedule, + struct kbase_queue_group, + link_to_schedule); + + if (kbasep_csf_scheduler_group_is_on_slot_locked(group) && + group->prepared_seq_num < available_csg_slots) { + /* One of the resident remainders */ + update_csg_slot_priority(group, + get_slot_priority(group)); + } else if (spare != 0) { + s8 slot = (s8)find_first_zero_bit( + kbdev->csf.scheduler.csg_inuse_bitmap, + total_csg_slots); + + if (WARN_ON(slot >= (s8)total_csg_slots)) + break; + + if (!kctx_as_enabled(group->kctx) || group->faulted) { + /* Drop the head group and continue */ + update_offslot_non_idle_cnt_for_faulty_grp( + group); + remove_scheduled_group(kbdev, group); + continue; + } + program_csg_slot(group, slot, + get_slot_priority(group)); + if (unlikely(!csg_slot_in_use(kbdev, slot))) + break; + + spare--; + } else + break; + + /* Drop the head csg from the list */ + remove_scheduled_group(kbdev, group); + if (!WARN_ON(!scheduler->remaining_tick_slots)) + scheduler->remaining_tick_slots--; + } + + /* Dealing with groups currently going through suspend */ + program_suspending_csg_slots(kbdev); +} + +static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, + struct kbase_context *kctx, int priority) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *group; + + lockdep_assert_held(&scheduler->lock); + if (WARN_ON(priority < 0) || + WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) + return; + + if (!kctx_as_enabled(kctx)) + return; + + list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], + link) { + if (WARN_ON(!list_empty(&group->link_to_schedule))) + /* This would be a bug */ + list_del_init(&group->link_to_schedule); + + if (unlikely(group->faulted)) + continue; + + /* Set the scanout sequence number, starting from 0 */ + group->scan_seq_num = scheduler->csg_scan_count_for_tick++; + + if (queue_group_idle_locked(group)) { + list_add_tail(&group->link_to_schedule, + &scheduler->idle_groups_to_schedule); + continue; + } + + if (!scheduler->ngrp_to_schedule) { + /* keep the top csg's origin */ + scheduler->top_ctx = kctx; + scheduler->top_grp = group; + } + + list_add_tail(&group->link_to_schedule, + &scheduler->groups_to_schedule); + group->prepared_seq_num = scheduler->ngrp_to_schedule++; + + kctx->csf.sched.ngrp_to_schedule++; + count_active_address_space(kbdev, kctx); + } +} + +/** + * scheduler_rotate_groups() - Rotate the runnable queue groups to provide + * fairness of scheduling within a single + * kbase_context. + * + * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned + * the highest slot priority) is guaranteed to get the resources that it + * needs we only rotate the kbase_context corresponding to it - + * kbase_csf_scheduler's top_ctx. + * + * The priority level chosen for rotation is the one containing the previous + * scheduling cycle's kbase_csf_scheduler's top_grp. + * + * In a 'fresh-slice-cycle' this always corresponds to the highest group + * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority + * level of the previous scheduling cycle's first runnable kbase_context. + * + * We choose this priority level because when higher priority work is + * scheduled, we should always cause the scheduler to run and do a scan. The + * scan always enumerates the highest priority work first (whether that be + * based on process priority or group priority), and thus + * kbase_csf_scheduler's top_grp will point to the first of those high priority + * groups, which necessarily must be the highest priority group in + * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick + * up that group appropriately. + * + * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL), + * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but + * will set up kbase_csf_scheduler's top_ctx again for the next scheduling + * cycle. Implicitly, a rotation had already occurred by removing + * the kbase_csf_scheduler's top_grp + * + * If kbase_csf_scheduler's top_grp became idle and all other groups belonging + * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's + * top_ctx are also idle, then the effect of this will be to rotate idle + * groups, which might not actually become resident in the next + * scheduling slice. However this is acceptable since a queue group becoming + * idle is implicitly a rotation (as above with evicted queue groups), as it + * automatically allows a new queue group to take the maximum slot priority + * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of + * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will + * be for a group in the next lowest priority level or in absence of those the + * next kbase_context's queue groups. + * + * @kbdev: Pointer to the GPU device. + */ +static void scheduler_rotate_groups(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_context *const top_ctx = scheduler->top_ctx; + struct kbase_queue_group *const top_grp = scheduler->top_grp; + + lockdep_assert_held(&scheduler->lock); + if (top_ctx && top_grp) { + struct list_head *list = + &top_ctx->csf.sched.runnable_groups[top_grp->priority]; + + WARN_ON(top_grp->kctx != top_ctx); + if (!WARN_ON(list_empty(list))) { + struct kbase_queue_group *new_head_grp; + list_move_tail(&top_grp->link, list); + new_head_grp = (!list_empty(list)) ? + list_first_entry(list, struct kbase_queue_group, link) : + NULL; + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE, + top_grp, top_ctx->csf.sched.num_runnable_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE, + new_head_grp, 0u); + dev_dbg(kbdev->dev, + "groups rotated for a context, num_runnable_groups: %u\n", + scheduler->top_ctx->csf.sched.num_runnable_grps); + } + } +} + +static void scheduler_rotate_ctxs(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct list_head *list = &scheduler->runnable_kctxs; + + lockdep_assert_held(&scheduler->lock); + if (scheduler->top_ctx) { + if (!WARN_ON(list_empty(list))) { + struct kbase_context *pos; + bool found = false; + + /* Locate the ctx on the list */ + list_for_each_entry(pos, list, csf.link) { + if (scheduler->top_ctx == pos) { + found = true; + break; + } + } + + if (!WARN_ON(!found)) { + struct kbase_context *new_head_kctx; + list_move_tail(&pos->csf.link, list); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos, + 0u); + new_head_kctx = (!list_empty(list)) ? + list_first_entry(list, struct kbase_context, csf.link) : + NULL; + KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE, + new_head_kctx, 0u); + dev_dbg(kbdev->dev, "contexts rotated\n"); + } + } + } +} + +/** + * scheduler_update_idle_slots_status() - Get the status update for the CSG + * slots for which the IDLE notification was received + * previously. + * + * This function sends a CSG status update request for all the CSG slots + * present in the bitmap scheduler->csg_slots_idle_mask and wait for the + * request to complete. + * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by + * this function. + * + * @kbdev: Pointer to the GPU device. + * @csg_bitmap: Bitmap of the CSG slots for which + * the status update request completed successfully. + * @failed_csg_bitmap: Bitmap of the CSG slots for which + * the status update request timedout. + */ +static void scheduler_update_idle_slots_status(struct kbase_device *kbdev, + unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + const u32 num_groups = kbdev->csf.global_iface.group_num; + struct kbase_csf_global_iface *const global_iface = + &kbdev->csf.global_iface; + unsigned long flags, i; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { + struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; + struct kbase_queue_group *group = csg_slot->resident_group; + struct kbase_csf_cmd_stream_group_info *const ginfo = + &global_iface->groups[i]; + u32 csg_req; + + clear_bit(i, scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group, + scheduler->csg_slots_idle_mask[0]); + if (WARN_ON(!group)) + continue; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group, + i); + + csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + csg_req ^= CSG_REQ_STATUS_UPDATE_MASK; + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req, + CSG_REQ_STATUS_UPDATE_MASK); + + set_bit(i, csg_bitmap); + } + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* The groups are aggregated into a single kernel doorbell request */ + if (!bitmap_empty(csg_bitmap, num_groups)) { + long wt = + kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); + u32 db_slots = (u32)csg_bitmap[0]; + + kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots); + + if (wait_csg_slots_handshake_ack(kbdev, + CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) { + dev_warn( + kbdev->dev, + "Timeout on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx", + csg_bitmap[0]); + + /* Store the bitmap of timed out slots */ + bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups); + csg_bitmap[0] = ~csg_bitmap[0] & db_slots; + } else { + KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL, + db_slots); + csg_bitmap[0] = db_slots; + } + } +} + +/** + * scheduler_handle_idle_slots() - Update the idle status of queue groups + * resident on CSG slots for which the + * IDLE notification was received previously. + * + * This function is called at the start of scheduling tick/tock to reconfirm + * the idle status of queue groups resident on CSG slots for + * which idle notification was received previously, i.e. all the CSG slots + * present in the bitmap scheduler->csg_slots_idle_mask. + * The confirmation is done by sending the CSG status update request to the + * firmware. On completion, the firmware will mark the idleness at the + * slot's interface CSG_STATUS_STATE register accordingly. + * + * The run state of the groups resident on still idle CSG slots is changed to + * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is + * updated accordingly. + * The bits corresponding to slots for which the status update request timedout + * remain set in scheduler->csg_slots_idle_mask. + * + * @kbdev: Pointer to the GPU device. + */ +static void scheduler_handle_idle_slots(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u32 num_groups = kbdev->csf.global_iface.group_num; + unsigned long flags, i; + DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; + DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 }; + + lockdep_assert_held(&scheduler->lock); + + scheduler_update_idle_slots_status(kbdev, csg_bitmap, + failed_csg_bitmap); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + for_each_set_bit(i, csg_bitmap, num_groups) { + struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i]; + struct kbase_queue_group *group = csg_slot->resident_group; + + if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING)) + continue; + if (WARN_ON(!group)) + continue; + if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE && + group->run_state != KBASE_CSF_GROUP_IDLE)) + continue; + if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT)) + continue; + + if (group_on_slot_is_idle(kbdev, i)) { + group->run_state = KBASE_CSF_GROUP_IDLE; + set_bit(i, scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, + group, scheduler->csg_slots_idle_mask[0]); + } else + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + } + + bitmap_or(scheduler->csg_slots_idle_mask, + scheduler->csg_slots_idle_mask, + failed_csg_bitmap, num_groups); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL, + scheduler->csg_slots_idle_mask[0]); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +static void scheduler_scan_idle_groups(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *group, *n; + + list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, + link_to_schedule) { + + WARN_ON(!queue_group_idle_locked(group)); + + if (!scheduler->ngrp_to_schedule) { + /* keep the top csg's origin */ + scheduler->top_ctx = group->kctx; + scheduler->top_grp = group; + } + + group->prepared_seq_num = scheduler->ngrp_to_schedule++; + list_move_tail(&group->link_to_schedule, + &scheduler->groups_to_schedule); + + group->kctx->csf.sched.ngrp_to_schedule++; + count_active_address_space(kbdev, group->kctx); + } +} + +static void scheduler_rotate(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + /* Dealing with rotation */ + scheduler_rotate_groups(kbdev); + scheduler_rotate_ctxs(kbdev); +} + +static struct kbase_queue_group *get_tock_top_group( + struct kbase_csf_scheduler *const scheduler) +{ + struct kbase_context *kctx; + int i; + + lockdep_assert_held(&scheduler->lock); + for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { + list_for_each_entry(kctx, + &scheduler->runnable_kctxs, csf.link) { + struct kbase_queue_group *group; + + list_for_each_entry(group, + &kctx->csf.sched.runnable_groups[i], + link) { + if (queue_group_idle_locked(group)) + continue; + + return group; + } + } + } + + return NULL; +} + +static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, + bool is_suspend) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; + + int ret = suspend_active_queue_groups(kbdev, slot_mask); + + if (ret) { + /* The suspend of CSGs failed, trigger the GPU reset and wait + * for it to complete to be in a deterministic state. + */ + dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n", + kbdev->csf.global_iface.group_num, slot_mask); + + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu(kbdev); + + if (is_suspend) { + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_wait(kbdev); + mutex_lock(&scheduler->lock); + } + return -1; + } + + /* Check if the groups became active whilst the suspend was ongoing, + * but only for the case where the system suspend is not in progress + */ + if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps)) + return -1; + + return 0; +} + +static bool scheduler_idle_suspendable(struct kbase_device *kbdev) +{ + bool suspend; + unsigned long flags; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + if (scheduler->state == SCHED_SUSPENDED) + return false; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (scheduler->total_runnable_grps) { + spin_lock(&scheduler->interrupt_lock); + + /* Check both on-slots and off-slots groups idle status */ + suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) && + !atomic_read(&scheduler->non_idle_offslot_grps) && + kbase_pm_idle_groups_sched_suspendable(kbdev); + + spin_unlock(&scheduler->interrupt_lock); + } else + suspend = kbase_pm_no_runnables_sched_suspendable(kbdev); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return suspend; +} + +static void gpu_idle_worker(struct work_struct *work) +{ + struct kbase_device *kbdev = container_of( + work, struct kbase_device, csf.scheduler.gpu_idle_work); + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + bool reset_active = false; + bool scheduler_is_idle_suspendable = false; + bool all_groups_suspended = false; + + KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u); + +#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \ + (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8)) + + if (kbase_reset_gpu_try_prevent(kbdev)) { + dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n"); + KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, + __ENCODE_KTRACE_INFO(true, false, false)); + return; + } + mutex_lock(&scheduler->lock); + + /* Cycle completed, disable the firmware idle timer */ + disable_gpu_idle_fw_timer(kbdev); + scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev); + reset_active = kbase_reset_gpu_is_active(kbdev); + if (scheduler_is_idle_suspendable && !reset_active) { + all_groups_suspended = + !suspend_active_groups_on_powerdown(kbdev, false); + + if (all_groups_suspended) { + dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now"); + scheduler_suspend(kbdev); + cancel_tick_timer(kbdev); + } else { + dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)", + atomic_read(&scheduler->non_idle_offslot_grps)); + /* Bring forward the next tick */ + kbase_csf_scheduler_advance_tick(kbdev); + } + } + + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); + KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL, + __ENCODE_KTRACE_INFO(reset_active, scheduler_is_idle_suspendable, all_groups_suspended)); +#undef __ENCODE_KTRACE_INFO +} + +static int scheduler_prepare(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + int i; + + lockdep_assert_held(&scheduler->lock); + + /* Empty the groups_to_schedule */ + while (!list_empty(&scheduler->groups_to_schedule)) { + struct kbase_queue_group *grp = + list_first_entry(&scheduler->groups_to_schedule, + struct kbase_queue_group, + link_to_schedule); + + remove_scheduled_group(kbdev, grp); + } + + /* Pre-scan init scheduler fields */ + if (WARN_ON(scheduler->ngrp_to_schedule != 0)) + scheduler->ngrp_to_schedule = 0; + scheduler->top_ctx = NULL; + scheduler->top_grp = NULL; + scheduler->csg_scan_count_for_tick = 0; + WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule)); + scheduler->num_active_address_spaces = 0; + scheduler->num_csg_slots_for_tick = 0; + bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); + + /* Scan out to run groups */ + for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) { + struct kbase_context *kctx; + + list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) + scheduler_ctx_scan_groups(kbdev, kctx, i); + } + + /* Update this tick's non-idle groups */ + scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; + + /* Initial number of non-idle off-slot groups, before the scheduler's + * scheduler_apply() operation. This gives a sensible start point view + * of the tick. It will be subject to up/downs during the scheduler + * active phase. + */ + atomic_set(&scheduler->non_idle_offslot_grps, + scheduler->non_idle_scanout_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL, + scheduler->non_idle_scanout_grps); + + /* Adds those idle but runnable groups to the scanout list */ + scheduler_scan_idle_groups(kbdev); + + /* After adding the idle CSGs, the two counts should be the same */ + WARN_ON(scheduler->csg_scan_count_for_tick != scheduler->ngrp_to_schedule); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, + scheduler->num_active_address_spaces | + (((u64)scheduler->ngrp_to_schedule) << 32)); + set_max_csg_slots(kbdev); + dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n", + scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces); + return 0; +} + +static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->lock); + + /* After the scheduler apply operation, the internal variable + * scheduler->non_idle_offslot_grps reflects the end-point view + * of the count at the end of the active phase. + * + * Any changes that follow (after the scheduler has dropped the + * scheduler->lock), reflects async operations to the scheduler, + * such as a group gets killed (evicted) or a new group inserted, + * cqs wait-sync triggered state transtion etc. + * + * The condition for enable the idle timer is that there is no + * non-idle groups off-slots. If there is non-idle group off-slot, + * the timer should be disabled. + */ + if (atomic_read(&scheduler->non_idle_offslot_grps)) + disable_gpu_idle_fw_timer(kbdev); + else + enable_gpu_idle_fw_timer(kbdev); +} + +static void schedule_actions(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + struct kbase_queue_group *protm_grp; + int ret; + bool skip_idle_slots_update; + bool new_protm_top_grp = false; + + kbase_reset_gpu_assert_prevented(kbdev); + lockdep_assert_held(&scheduler->lock); + + ret = kbase_pm_wait_for_desired_state(kbdev); + if (ret) { + dev_err(kbdev->dev, "Wait for MCU power on failed"); + return; + } + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + /* Skip updating on-slot idle CSGs if GPU is in protected mode. */ + if (!skip_idle_slots_update) + scheduler_handle_idle_slots(kbdev); + + scheduler_prepare(kbdev); + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + + /* Avoid update if the top-group remains unchanged and in protected + * mode. For the said case, all the slots update is effectively + * competing against the active protected mode group (typically the + * top-group). If we update other slots, even on leaving the + * top-group slot untouched, the firmware would exit the protected mode + * for interacting with the host-driver. After it, as the top-group + * would again raise the request for entering protected mode, we would + * be actively doing the switching over twice without progressing the + * queue jobs. + */ + if (protm_grp && scheduler->top_grp == protm_grp) { + int new_val; + dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d", + protm_grp->handle); + new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC, + protm_grp, new_val); + } else if (scheduler->top_grp) { + if (protm_grp) + dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d", + protm_grp->handle); + + if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap, + kbdev->csf.global_iface.groups[0].stream_num)) { + dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d", + scheduler->top_grp->handle, + scheduler->top_grp->kctx->tgid, + scheduler->top_grp->kctx->id); + + /* When entering protected mode all CSG slots can be occupied + * but only the protected mode CSG will be running. Any event + * that would trigger the execution of an on-slot idle CSG will + * need to be handled by the host during protected mode. + */ + new_protm_top_grp = true; + } + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + scheduler_apply(kbdev); + + /* Post-apply, all the committed groups in this tick are on + * slots, time to arrange the idle timer on/off decision. + */ + scheduler_handle_idle_timer_onoff(kbdev); + + /* Scheduler is dropping the exec of the previous protm_grp, + * Until the protm quit completes, the GPU is effectively + * locked in the secure mode. + */ + if (protm_grp) + scheduler_force_protm_exit(kbdev); + + wait_csg_slots_start(kbdev); + wait_csg_slots_finish_prio_update(kbdev); + + if (new_protm_top_grp) { + scheduler_group_check_protm_enter(kbdev, + scheduler->top_grp); + } + + return; + } + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + return; +} + +static void schedule_on_tock(struct work_struct *work) +{ + struct kbase_device *kbdev = container_of(work, struct kbase_device, + csf.scheduler.tock_work.work); + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + int err = kbase_reset_gpu_try_prevent(kbdev); + /* Regardless of whether reset failed or is currently happening, exit + * early + */ + if (err) + return; + + mutex_lock(&scheduler->lock); + if (scheduler->state == SCHED_SUSPENDED) + goto exit_no_schedule_unlock; + + WARN_ON(!(scheduler->state == SCHED_INACTIVE)); + scheduler->state = SCHED_BUSY; + + /* Undertaking schedule action steps */ + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u); + schedule_actions(kbdev); + + /* Record time information */ + scheduler->last_schedule = jiffies; + + /* Tock is serviced */ + scheduler->tock_pending_request = false; + + scheduler->state = SCHED_INACTIVE; + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); + + dev_dbg(kbdev->dev, + "Waking up for event after schedule-on-tock completes."); + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); + return; + +exit_no_schedule_unlock: + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); +} + +static void schedule_on_tick(struct work_struct *work) +{ + struct kbase_device *kbdev = container_of(work, struct kbase_device, + csf.scheduler.tick_work); + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + int err = kbase_reset_gpu_try_prevent(kbdev); + /* Regardless of whether reset failed or is currently happening, exit + * early + */ + if (err) + return; + + mutex_lock(&scheduler->lock); + + WARN_ON(scheduler->tick_timer_active); + if (scheduler->state == SCHED_SUSPENDED) + goto exit_no_schedule_unlock; + + scheduler->state = SCHED_BUSY; + /* Do scheduling stuff */ + scheduler_rotate(kbdev); + + /* Undertaking schedule action steps */ + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL, + scheduler->total_runnable_grps); + schedule_actions(kbdev); + + /* Record time information */ + scheduler->last_schedule = jiffies; + + /* Kicking next scheduling if needed */ + if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && + (scheduler->total_runnable_grps > 0)) { + start_tick_timer(kbdev); + dev_dbg(kbdev->dev, + "scheduling for next tick, num_runnable_groups:%u\n", + scheduler->total_runnable_grps); + } + + scheduler->state = SCHED_INACTIVE; + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); + + dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); + wake_up_all(&kbdev->csf.event_wait); + KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, + scheduler->total_runnable_grps); + return; + +exit_no_schedule_unlock: + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); +} + +static int wait_csg_slots_suspend(struct kbase_device *kbdev, + const unsigned long *slot_mask, + unsigned int timeout_ms) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); + u32 num_groups = kbdev->csf.global_iface.group_num; + int err = 0; + DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); + + lockdep_assert_held(&scheduler->lock); + + bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); + + while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) + && remaining) { + DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + + bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); + + remaining = wait_event_timeout(kbdev->csf.event_wait, + slots_state_changed(kbdev, changed, + csg_slot_stopped_locked), + remaining); + + if (remaining) { + u32 i; + + for_each_set_bit(i, changed, num_groups) { + struct kbase_queue_group *group; + + if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) + continue; + + /* The on slot csg is now stopped */ + clear_bit(i, slot_mask_local); + + group = scheduler->csg_slots[i].resident_group; + if (likely(group)) { + /* Only do save/cleanup if the + * group is not terminated during + * the sleep. + */ + save_csg_slot(group); + if (cleanup_csg_slot(group)) + sched_evict_group(group, true, true); + } + } + } else { + dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend, slot_mask: 0x%*pb\n", + num_groups, slot_mask_local); + err = -ETIMEDOUT; + } + } + + return err; +} + +static int suspend_active_queue_groups(struct kbase_device *kbdev, + unsigned long *slot_mask) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + u32 num_groups = kbdev->csf.global_iface.group_num; + u32 slot_num; + int ret; + + lockdep_assert_held(&scheduler->lock); + + for (slot_num = 0; slot_num < num_groups; slot_num++) { + struct kbase_queue_group *group = + scheduler->csg_slots[slot_num].resident_group; + + if (group) { + suspend_queue_group(group); + set_bit(slot_num, slot_mask); + } + } + + ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); + return ret; +} + +static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 }; + int ret; + int ret2; + + mutex_lock(&scheduler->lock); + + ret = suspend_active_queue_groups(kbdev, slot_mask); + + if (ret) { + dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", + kbdev->csf.global_iface.group_num, slot_mask); + } + + /* Need to flush the GPU cache to ensure suspend buffer + * contents are not lost on reset of GPU. + * Do this even if suspend operation had timed out for some of + * the CSG slots. + * In case the scheduler already in suspended state, the + * cache clean is required as the async reset request from + * the debugfs may race against the scheduler suspend operation + * due to the extra context ref-count, which prevents the + * L2 powering down cache clean operation in the non racing + * case. + */ + kbase_gpu_start_cache_clean(kbdev); + ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, + kbdev->reset_timeout_ms); + if (ret2) { + dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset"); + if (!ret) + ret = ret2; + } + + mutex_unlock(&scheduler->lock); + + return ret; +} + +/** + * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode + * groups when reset is done during + * protected mode execution. + * + * @group: Pointer to the device. + * + * This function is called at the time of GPU reset, before the suspension of + * queue groups, to handle the case when the reset is getting performed whilst + * GPU is in protected mode. + * On entry to protected mode all the groups, except the top group that executes + * in protected mode, are implicitly suspended by the FW. Thus this function + * simply marks the normal mode groups as suspended (and cleans up the + * corresponding CSG slots) to prevent their potential forceful eviction from + * the Scheduler. So if GPU was in protected mode and there was no fault, then + * only the protected mode group would be suspended in the regular way post exit + * from this function. And if GPU was in normal mode, then all on-slot groups + * will get suspended in the regular way. + * + * Return: true if the groups remaining on the CSG slots need to be suspended in + * the regular way by sending CSG SUSPEND reqs to FW, otherwise false. + */ +static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u32 const num_groups = kbdev->csf.global_iface.group_num; + struct kbase_queue_group *protm_grp; + bool suspend_on_slot_groups; + unsigned long flags; + u32 csg_nr; + + mutex_lock(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + + /* If GPU wasn't in protected mode or had exited it before the GPU reset + * then all the on-slot groups can be suspended in the regular way by + * sending CSG SUSPEND requests to FW. + * If there wasn't a fault for protected mode group, then it would + * also need to be suspended in the regular way before the reset. + */ + suspend_on_slot_groups = !(protm_grp && protm_grp->faulted); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + if (!protm_grp) + goto unlock; + + /* GPU is in protected mode, so all the on-slot groups barring the + * the protected mode group can be marked as suspended right away. + */ + for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + int new_val; + + if (!group || (group == protm_grp)) + continue; + + cleanup_csg_slot(group); + group->run_state = KBASE_CSF_GROUP_SUSPENDED; + + /* Simply treat the normal mode groups as non-idle. The tick + * scheduled after the reset will re-initialize the counter + * anyways. + */ + new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, + group, new_val); + } + +unlock: + mutex_unlock(&scheduler->lock); + return suspend_on_slot_groups; +} + +static void scheduler_inner_reset(struct kbase_device *kbdev) +{ + u32 const num_groups = kbdev->csf.global_iface.group_num; + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + WARN_ON(csgs_active(kbdev)); + + /* Cancel any potential queued delayed work(s) */ + cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); + cancel_tick_timer(kbdev); + cancel_work_sync(&scheduler->tick_work); + cancel_delayed_work_sync(&scheduler->tock_work); + cancel_delayed_work_sync(&scheduler->ping_work); + + mutex_lock(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); + if (scheduler->active_protm_grp) + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, + scheduler->active_protm_grp, 0u); + scheduler->active_protm_grp = NULL; + memset(kbdev->csf.scheduler.csg_slots, 0, + num_groups * sizeof(struct kbase_csf_csg_slot)); + bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + scheduler->top_ctx = NULL; + scheduler->top_grp = NULL; + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp, + scheduler->num_active_address_spaces | + (((u64)scheduler->total_runnable_grps) << 32)); + + mutex_unlock(&scheduler->lock); +} + +void kbase_csf_scheduler_reset(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + WARN_ON(!kbase_reset_gpu_is_active(kbdev)); + + KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u); + + if (scheduler_handle_reset_in_protected_mode(kbdev) && + !suspend_active_queue_groups_on_reset(kbdev)) { + /* As all groups have been successfully evicted from the CSG + * slots, clear out thee scheduler data fields and return + */ + scheduler_inner_reset(kbdev); + return; + } + + mutex_lock(&kbdev->kctx_list_lock); + + /* The loop to iterate over the kbase contexts is present due to lock + * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock. + * CSF ioctls first take kctx->csf.lock which is context-specific and + * then take kbdev->csf.scheduler.lock for global actions like assigning + * a CSG slot. + * If the lock ordering constraint was not there then could have + * directly looped over the active queue groups. + */ + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* Firmware reload would reinitialize the CSG & CS interface IO + * pages, so just need to internally mark the currently active + * queue groups as terminated (similar to the unexpected OoM + * event case). + * No further work can now get executed for the active groups + * (new groups would have to be created to execute work) and + * in near future Clients would be duly informed of this + * reset. The resources (like User IO pages, GPU queue memory) + * allocated for the associated queues would be freed when the + * Clients do the teardown when they become aware of the reset. + */ + kbase_csf_active_queue_groups_reset(kbdev, kctx); + } + + mutex_unlock(&kbdev->kctx_list_lock); + + /* After queue groups reset, the scheduler data fields clear out */ + scheduler_inner_reset(kbdev); +} + +static void firmware_aliveness_monitor(struct work_struct *work) +{ + struct kbase_device *kbdev = container_of(work, struct kbase_device, + csf.scheduler.ping_work.work); + int err; + + /* Ensure that reset will not be occurring while this function is being + * executed as otherwise calling kbase_reset_gpu when reset is already + * occurring is a programming error. + * + * We must use the 'try' variant as the Reset worker can try to flush + * this workqueue, which would otherwise deadlock here if we tried to + * wait for the reset (and thus ourselves) to complete. + */ + err = kbase_reset_gpu_try_prevent(kbdev); + if (err) { + /* It doesn't matter whether the value was -EAGAIN or a fatal + * error, just stop processing. In case of -EAGAIN, the Reset + * worker will restart the scheduler later to resume ping + */ + return; + } + + mutex_lock(&kbdev->csf.scheduler.lock); + +#ifdef CONFIG_MALI_DEBUG + if (fw_debug) { + /* ping requests cause distraction in firmware debugging */ + goto exit; + } +#endif + + if (kbdev->csf.scheduler.state == SCHED_SUSPENDED) + goto exit; + + if (get_nr_active_csgs(kbdev) != 1) + goto exit; + + if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) + goto exit; + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + /* Suspend pending - no real need to ping */ + goto exit; + } + + kbase_pm_wait_for_desired_state(kbdev); + + err = kbase_csf_firmware_ping_wait(kbdev); + + if (err) { + /* It is acceptable to enqueue a reset whilst we've prevented + * them, it will happen after we've allowed them again + */ + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } else if (get_nr_active_csgs(kbdev) == 1) { + queue_delayed_work(system_long_wq, + &kbdev->csf.scheduler.ping_work, + msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS)); + } + + kbase_pm_context_idle(kbdev); +exit: + mutex_unlock(&kbdev->csf.scheduler.lock); + kbase_reset_gpu_allow(kbdev); + return; +} + +int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, + struct kbase_suspend_copy_buffer *sus_buf) +{ + struct kbase_context *const kctx = group->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + int err = 0; + + kbase_reset_gpu_assert_prevented(kbdev); + lockdep_assert_held(&kctx->csf.lock); + mutex_lock(&scheduler->lock); + + if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) { + DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0}; + + set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask); + + if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) + suspend_queue_group(group); + err = wait_csg_slots_suspend(kbdev, slot_mask, + kbdev->csf.fw_timeout_ms); + if (err) { + dev_warn(kbdev->dev, "Timed out waiting for the group %d to suspend on slot %d", + group->handle, group->csg_nr); + goto exit; + } + } + + if (queue_group_suspended_locked(group)) { + unsigned int target_page_nr = 0, i = 0; + u64 offset = sus_buf->offset; + size_t to_copy = sus_buf->size; + + if (scheduler->state != SCHED_SUSPENDED) { + /* Similar to the case of HW counters, need to flush + * the GPU cache before reading from the suspend buffer + * pages as they are mapped and cached on GPU side. + */ + kbase_gpu_start_cache_clean(kbdev); + kbase_gpu_wait_cache_clean(kbdev); + } else { + /* Make sure power down transitions have completed, + * i.e. L2 has been powered off as that would ensure + * its contents are flushed to memory. + * This is needed as Scheduler doesn't wait for the + * power down to finish. + */ + kbase_pm_wait_for_desired_state(kbdev); + } + + for (i = 0; i < PFN_UP(sus_buf->size) && + target_page_nr < sus_buf->nr_pages; i++) { + struct page *pg = + as_page(group->normal_suspend_buf.phy[i]); + void *sus_page = kmap(pg); + + if (sus_page) { + kbase_sync_single_for_cpu(kbdev, + kbase_dma_addr(pg), + PAGE_SIZE, DMA_BIDIRECTIONAL); + + err = kbase_mem_copy_to_pinned_user_pages( + sus_buf->pages, sus_page, + &to_copy, sus_buf->nr_pages, + &target_page_nr, offset); + kunmap(pg); + if (err) + break; + } else { + err = -ENOMEM; + break; + } + } + schedule_in_cycle(group, false); + } else { + /* If addr-space fault, the group may have been evicted */ + err = -EIO; + } + +exit: + mutex_unlock(&scheduler->lock); + return err; +} + +KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf); + +/** + * group_sync_updated() - Evaluate sync wait condition of all blocked command + * queues of the group. + * + * @group: Pointer to the command queue group that has blocked command queue(s) + * bound to it. + * + * Return: true if sync wait condition is satisfied for at least one blocked + * queue of the group. + */ +static bool group_sync_updated(struct kbase_queue_group *group) +{ + bool updated = false; + int stream; + + /* Groups can also be blocked on-slot during protected mode. */ + WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC && + group->run_state != KBASE_CSF_GROUP_IDLE); + + for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) { + struct kbase_queue *const queue = group->bound_queues[stream]; + + /* To check the necessity of sync-wait evaluation, + * we rely on the cached 'status_wait' instead of reading it + * directly from shared memory as the CSG has been already + * evicted from the CSG slot, thus this CSG doesn't have + * valid information in the shared memory. + */ + if (queue && queue->enabled && + CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) + if (evaluate_sync_update(queue)) { + updated = true; + queue->status_wait = 0; + } + } + + return updated; +} + +/** + * scheduler_get_protm_enter_async_group() - Check if the GPU queue group + * can be now allowed to execute in protected mode. + * + * @kbdev: Pointer to the GPU device. + * @group: Pointer to the GPU queue group. + * + * This function is called outside the scheduling tick/tock to determine + * if the given GPU queue group can now execute in protected mode or not. + * If the group pointer passed is NULL then the evaluation is done for the + * highest priority group on the scheduler maintained group lists without + * tick associated rotation actions. This is referred as the 'top-group' + * in a tock action sense. + * + * It returns the same group pointer, that was passed as an argument, if that + * group matches the highest priority group and has pending protected region + * requests otherwise NULL is returned. + * + * If the group pointer passed is NULL then the internal evaluated highest + * priority group is returned if that has pending protected region requests + * otherwise NULL is returned. + * + * The evaluated highest priority group may not necessarily be the same as the + * scheduler->top_grp. This can happen if there is dynamic de-idle update + * during the tick interval for some on-slots groups that were idle during the + * scheduler normal scheduling action, where the scheduler->top_grp was set. + * The recorded scheduler->top_grp is untouched by this evualuation, so will not + * affect the scheduler context/priority list rotation arrangement. + * + * Return: the pointer to queue group that can currently execute in protected + * mode or NULL. + */ +static struct kbase_queue_group *scheduler_get_protm_enter_async_group( + struct kbase_device *const kbdev, + struct kbase_queue_group *const group) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *match_grp, *input_grp; + + lockdep_assert_held(&scheduler->lock); + + if (scheduler->state != SCHED_INACTIVE) + return NULL; + + match_grp = get_tock_top_group(scheduler); + input_grp = group ? group : match_grp; + + if (input_grp && (input_grp == match_grp)) { + struct kbase_csf_cmd_stream_group_info *ginfo = + &kbdev->csf.global_iface.groups[0]; + unsigned long *pending = + input_grp->protm_pending_bitmap; + unsigned long flags; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + + if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || + bitmap_empty(pending, ginfo->stream_num)) + input_grp = NULL; + + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + } else { + input_grp = NULL; + } + + return input_grp; +} + +void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + int err = kbase_reset_gpu_try_prevent(kbdev); + /* Regardless of whether reset failed or is currently happening, exit + * early + */ + if (err) + return; + + mutex_lock(&scheduler->lock); + + /* Check if the group is now eligible for execution in protected mode. */ + if (scheduler_get_protm_enter_async_group(kbdev, group)) + scheduler_group_check_protm_enter(kbdev, group); + + mutex_unlock(&scheduler->lock); + kbase_reset_gpu_allow(kbdev); +} + +/** + * check_sync_update_for_idle_group_protm() - Check the sync wait condition + * for all the queues bound to + * the given group. + * + * @group: Pointer to the group that requires evaluation. + * + * This function is called if the GPU is in protected mode and there are on + * slot idle groups with higher priority than the active protected mode group. + * This function will evaluate the sync condition, if any, of all the queues + * bound to the given group. + * + * Return true if the sync condition of at least one queue has been satisfied. + */ +static bool check_sync_update_for_idle_group_protm( + struct kbase_queue_group *group) +{ + struct kbase_device *const kbdev = group->kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = + &kbdev->csf.scheduler; + bool sync_update_done = false; + int i; + + lockdep_assert_held(&scheduler->lock); + + for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) { + struct kbase_queue *queue = group->bound_queues[i]; + + if (queue && queue->enabled && !sync_update_done) { + struct kbase_csf_cmd_stream_group_info *const ginfo = + &kbdev->csf.global_iface.groups[group->csg_nr]; + struct kbase_csf_cmd_stream_info *const stream = + &ginfo->streams[queue->csi_index]; + u32 status = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT); + unsigned long flags; + + KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, + queue->group, queue, status); + + if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status)) + continue; + + /* Save the information of sync object of the command + * queue so the callback function, 'group_sync_updated' + * can evaluate the sync object when it gets updated + * later. + */ + queue->status_wait = status; + queue->sync_ptr = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_POINTER_LO); + queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32; + queue->sync_value = kbase_csf_firmware_cs_output( + stream, CS_STATUS_WAIT_SYNC_VALUE); + queue->blocked_reason = + CS_STATUS_BLOCKED_REASON_REASON_GET( + kbase_csf_firmware_cs_output( + stream, + CS_STATUS_BLOCKED_REASON)); + + if (!evaluate_sync_update(queue)) + continue; + + /* Update csg_slots_idle_mask and group's run_state */ + if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) { + /* Only clear the group's idle flag if it has been dealt + * with by the scheduler's tick/tock action, otherwise + * leave it untouched. + */ + spin_lock_irqsave(&scheduler->interrupt_lock, + flags); + clear_bit((unsigned int)group->csg_nr, + scheduler->csg_slots_idle_mask); + KBASE_KTRACE_ADD_CSF_GRP( + kbdev, CSG_SLOT_IDLE_CLEAR, group, + scheduler->csg_slots_idle_mask[0]); + spin_unlock_irqrestore( + &scheduler->interrupt_lock, flags); + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + } + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + sync_update_done = true; + } + } + + return sync_update_done; +} + +/** + * check_sync_update_for_idle_groups_protm() - Check the sync wait condition + * for the idle groups on slot + * during protected mode. + * + * @kbdev: Pointer to the GPU device + * + * This function checks the gpu queues of all the idle groups on slot during + * protected mode that has a higher priority than the active protected mode + * group. + * + * Return true if the sync condition of at least one queue in a group has been + * satisfied. + */ +static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + struct kbase_queue_group *protm_grp; + bool exit_protm = false; + unsigned long flags; + u32 num_groups; + u32 i; + + lockdep_assert_held(&scheduler->lock); + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + protm_grp = scheduler->active_protm_grp; + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + + if (!protm_grp) + return exit_protm; + + num_groups = kbdev->csf.global_iface.group_num; + + for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) { + struct kbase_csf_csg_slot *csg_slot = + &scheduler->csg_slots[i]; + struct kbase_queue_group *group = csg_slot->resident_group; + + if (group->scan_seq_num < protm_grp->scan_seq_num) { + /* If sync update has been performed for the group that + * has a higher priority than the protm group, then we + * need to exit protected mode. + */ + if (check_sync_update_for_idle_group_protm(group)) + exit_protm = true; + } + } + + return exit_protm; +} + +/** + * check_group_sync_update_worker() - Check the sync wait condition for all the + * blocked queue groups + * + * @work: Pointer to the context-specific work item for evaluating the wait + * condition for all the queue groups in idle_wait_groups list. + * + * This function checks the gpu queues of all the groups present in both + * idle_wait_groups list of a context and all on slot idle groups (if GPU + * is in protected mode). + * If the sync wait condition for at least one queue bound to the group has + * been satisfied then the group is moved to the per context list of + * runnable groups so that Scheduler can consider scheduling the group + * in next tick or exit protected mode. + */ +static void check_group_sync_update_worker(struct work_struct *work) +{ + struct kbase_context *const kctx = container_of(work, + struct kbase_context, csf.sched.sync_update_work); + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + mutex_lock(&scheduler->lock); + + KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u); + if (kctx->csf.sched.num_idle_wait_grps != 0) { + struct kbase_queue_group *group, *temp; + + list_for_each_entry_safe(group, temp, + &kctx->csf.sched.idle_wait_groups, link) { + if (group_sync_updated(group)) { + /* Move this group back in to the runnable + * groups list of the context. + */ + update_idle_suspended_group_state(group); + KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u); + } + } + } else { + WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups)); + } + + if (check_sync_update_for_idle_groups_protm(kbdev)) + scheduler_force_protm_exit(kbdev); + KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u); + + mutex_unlock(&scheduler->lock); +} + +static +enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param) +{ + struct kbase_context *const kctx = param; + + KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u); + queue_work(kctx->csf.sched.sync_update_wq, + &kctx->csf.sched.sync_update_work); + + return KBASE_CSF_EVENT_CALLBACK_KEEP; +} + +int kbase_csf_scheduler_context_init(struct kbase_context *kctx) +{ + int priority; + int err; + + for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT; + ++priority) { + INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]); + } + + kctx->csf.sched.num_runnable_grps = 0; + INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups); + kctx->csf.sched.num_idle_wait_grps = 0; + kctx->csf.sched.ngrp_to_schedule = 0; + + kctx->csf.sched.sync_update_wq = + alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq", + WQ_HIGHPRI); + if (!kctx->csf.sched.sync_update_wq) { + dev_err(kctx->kbdev->dev, + "Failed to initialize scheduler context workqueue"); + return -ENOMEM; + } + + INIT_WORK(&kctx->csf.sched.sync_update_work, + check_group_sync_update_worker); + + err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); + + if (err) { + dev_err(kctx->kbdev->dev, + "Failed to register a sync update callback"); + destroy_workqueue(kctx->csf.sched.sync_update_wq); + } + + return err; +} + +void kbase_csf_scheduler_context_term(struct kbase_context *kctx) +{ + kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx); + cancel_work_sync(&kctx->csf.sched.sync_update_work); + destroy_workqueue(kctx->csf.sched.sync_update_wq); +} + +int kbase_csf_scheduler_init(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + u32 num_groups = kbdev->csf.global_iface.group_num; + + bitmap_zero(scheduler->csg_inuse_bitmap, num_groups); + bitmap_zero(scheduler->csg_slots_idle_mask, num_groups); + + scheduler->csg_slots = kcalloc(num_groups, + sizeof(*scheduler->csg_slots), GFP_KERNEL); + if (!scheduler->csg_slots) { + dev_err(kbdev->dev, + "Failed to allocate memory for csg slot status array\n"); + return -ENOMEM; + } + + return 0; +} + +int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + scheduler->timer_enabled = true; + + scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI); + if (!scheduler->wq) { + dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n"); + return -ENOMEM; + } + + INIT_WORK(&scheduler->tick_work, schedule_on_tick); + INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock); + + INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); + BUILD_BUG_ON(CSF_FIRMWARE_TIMEOUT_MS >= FIRMWARE_PING_INTERVAL_MS); + + mutex_init(&scheduler->lock); + spin_lock_init(&scheduler->interrupt_lock); + + /* Internal lists */ + INIT_LIST_HEAD(&scheduler->runnable_kctxs); + INIT_LIST_HEAD(&scheduler->groups_to_schedule); + INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule); + + BUILD_BUG_ON(MAX_SUPPORTED_CSGS > + (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE)); + bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS); + scheduler->state = SCHED_SUSPENDED; + scheduler->pm_active_count = 0; + scheduler->ngrp_to_schedule = 0; + scheduler->total_runnable_grps = 0; + scheduler->top_ctx = NULL; + scheduler->top_grp = NULL; + scheduler->last_schedule = 0; + scheduler->tock_pending_request = false; + scheduler->active_protm_grp = NULL; + scheduler->gpu_idle_fw_timer_enabled = false; + scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS; + scheduler_doorbell_init(kbdev); + + INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker); + atomic_set(&scheduler->non_idle_offslot_grps, 0); + + hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + scheduler->tick_timer.function = tick_timer_callback; + scheduler->tick_timer_active = false; + + return 0; +} + +void kbase_csf_scheduler_term(struct kbase_device *kbdev) +{ + if (kbdev->csf.scheduler.csg_slots) { + WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); + WARN_ON(csgs_active(kbdev)); + flush_work(&kbdev->csf.scheduler.gpu_idle_work); + mutex_lock(&kbdev->csf.scheduler.lock); + if (WARN_ON(kbdev->csf.scheduler.state != SCHED_SUSPENDED)) + scheduler_suspend(kbdev); + mutex_unlock(&kbdev->csf.scheduler.lock); + cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); + cancel_tick_timer(kbdev); + cancel_work_sync(&kbdev->csf.scheduler.tick_work); + cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work); + mutex_destroy(&kbdev->csf.scheduler.lock); + kfree(kbdev->csf.scheduler.csg_slots); + kbdev->csf.scheduler.csg_slots = NULL; + } +} + +void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) +{ + if (kbdev->csf.scheduler.wq) + destroy_workqueue(kbdev->csf.scheduler.wq); +} + +/** + * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function will restart the scheduler tick so that regular scheduling can + * be resumed without any explicit trigger (like kicking of GPU queues). This + * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the + * CSF scheduler lock to already have been held. + */ +static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) + return; + + WARN_ON((scheduler->state != SCHED_INACTIVE) && + (scheduler->state != SCHED_SUSPENDED)); + + if (scheduler->total_runnable_grps > 0) { + enqueue_tick_work(kbdev); + dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); + } else if (scheduler->state != SCHED_SUSPENDED) { + queue_work(system_wq, &scheduler->gpu_idle_work); + } +} + +void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->csf.scheduler.lock); + scheduler_enable_tick_timer_nolock(kbdev); + mutex_unlock(&kbdev->csf.scheduler.lock); +} + +bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + bool enabled; + + mutex_lock(&scheduler->lock); + enabled = scheduler_timer_is_enabled_nolock(kbdev); + mutex_unlock(&scheduler->lock); + + return enabled; +} + +void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, + bool enable) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + bool currently_enabled; + + mutex_lock(&scheduler->lock); + + currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); + if (currently_enabled && !enable) { + scheduler->timer_enabled = false; + cancel_tick_timer(kbdev); + cancel_delayed_work(&scheduler->tock_work); + mutex_unlock(&scheduler->lock); + /* The non-sync version to cancel the normal work item is not + * available, so need to drop the lock before cancellation. + */ + cancel_work_sync(&scheduler->tick_work); + } else if (!currently_enabled && enable) { + scheduler->timer_enabled = true; + + scheduler_enable_tick_timer_nolock(kbdev); + mutex_unlock(&scheduler->lock); + } +} + +void kbase_csf_scheduler_kick(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + mutex_lock(&scheduler->lock); + + if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) + goto out; + + if (scheduler->total_runnable_grps > 0) { + enqueue_tick_work(kbdev); + dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); + } + +out: + mutex_unlock(&scheduler->lock); +} + +void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + /* Cancel any potential queued delayed work(s) */ + cancel_work_sync(&scheduler->tick_work); + cancel_delayed_work_sync(&scheduler->tock_work); + + if (kbase_reset_gpu_prevent_and_wait(kbdev)) { + dev_warn(kbdev->dev, + "Stop PM suspending for failing to prevent gpu reset.\n"); + return; + } + + mutex_lock(&scheduler->lock); + + disable_gpu_idle_fw_timer(kbdev); + + if (scheduler->state != SCHED_SUSPENDED) { + suspend_active_groups_on_powerdown(kbdev, true); + dev_info(kbdev->dev, "Scheduler PM suspend"); + scheduler_suspend(kbdev); + cancel_tick_timer(kbdev); + } + mutex_unlock(&scheduler->lock); + + kbase_reset_gpu_allow(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend); + +void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + mutex_lock(&scheduler->lock); + + if (scheduler->total_runnable_grps > 0) { + WARN_ON(scheduler->state != SCHED_SUSPENDED); + dev_info(kbdev->dev, "Scheduler PM resume"); + scheduler_wakeup(kbdev, true); + } + mutex_unlock(&scheduler->lock); +} +KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume); + +void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 prev_count; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + prev_count = kbdev->csf.scheduler.pm_active_count++; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* On 0 => 1, make a pm_ctx_active request */ + if (!prev_count) + kbase_pm_context_active(kbdev); + else + WARN_ON(prev_count == U32_MAX); +} +KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active); + +void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 prev_count; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + prev_count = kbdev->csf.scheduler.pm_active_count--; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (prev_count == 1) + kbase_pm_context_idle(kbdev); + else + WARN_ON(prev_count == 0); +} +KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h new file mode 100644 index 0000000..428ecbe --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_scheduler.h @@ -0,0 +1,494 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_SCHEDULER_H_ +#define _KBASE_CSF_SCHEDULER_H_ + +#include "mali_kbase_csf.h" + +/** + * kbase_csf_scheduler_queue_start() - Enable the running of GPU command queue + * on firmware. + * + * @queue: Pointer to the GPU command queue to be started. + * + * This function would enable the start of a CSI, within a + * CSG, to which the @queue was bound. + * If the CSG is already scheduled and resident, the CSI will be started + * right away, otherwise once the group is made resident. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_scheduler_queue_start(struct kbase_queue *queue); + +/** + * kbase_csf_scheduler_queue_stop() - Disable the running of GPU command queue + * on firmware. + * + * @queue: Pointer to the GPU command queue to be stopped. + * + * This function would stop the CSI, within a CSG, to which @queue was bound. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue); + +/** + * kbase_csf_scheduler_group_protm_enter - Handle the protm enter event for the + * GPU command queue group. + * + * @group: The command queue group. + * + * This function could request the firmware to enter the protected mode + * and allow the execution of protected region instructions for all the + * bound queues of the group that have protm pending bit set in their + * respective CS_ACK register. + */ +void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_group_get_slot() - Checks if a queue group is + * programmed on a firmware CSG slot + * and returns the slot number. + * + * @group: The command queue group. + * + * Return: The slot number, if the group is programmed on a slot. + * Otherwise returns a negative number. + * + * Note: This function should not be used if the interrupt_lock is held. Use + * kbase_csf_scheduler_group_get_slot_locked() instead. + */ +int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_group_get_slot_locked() - Checks if a queue group is + * programmed on a firmware CSG slot + * and returns the slot number. + * + * @group: The command queue group. + * + * Return: The slot number, if the group is programmed on a slot. + * Otherwise returns a negative number. + * + * Note: Caller must hold the interrupt_lock. + */ +int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_group_events_enabled() - Checks if interrupt events + * should be handled for a queue group. + * + * @kbdev: The device of the group. + * @group: The queue group. + * + * Return: true if interrupt events should be handled. + * + * Note: Caller must hold the interrupt_lock. + */ +bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev, + struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been + * programmed to a firmware CSG slot. + * + * @kbdev: The GPU device. + * @slot: The slot for which to get the queue group. + * + * Return: Pointer to the programmed queue group. + * + * Note: Caller must hold the interrupt_lock. + */ +struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot( + struct kbase_device *kbdev, int slot); + +/** + * kbase_csf_scheduler_group_deschedule() - Deschedule a GPU command queue + * group from the firmware. + * + * @group: Pointer to the queue group to be descheduled. + * + * This function would disable the scheduling of GPU command queue group on + * firmware. + */ +void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group); + +/** + * kbase_csf_scheduler_evict_ctx_slots() - Evict all GPU command queue groups + * of a given context that are active + * running from the firmware. + * + * @kbdev: The GPU device. + * @kctx: Kbase context for the evict operation. + * @evicted_groups: List_head for returning evicted active queue groups. + * + * This function would disable the scheduling of GPU command queue groups active + * on firmware slots from the given Kbase context. The affected groups are + * added to the supplied list_head argument. + */ +void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev, + struct kbase_context *kctx, struct list_head *evicted_groups); + +/** + * kbase_csf_scheduler_context_init() - Initialize the context-specific part + * for CSF scheduler. + * + * @kctx: Pointer to kbase context that is being created. + * + * This function must be called during Kbase context creation. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_scheduler_context_init(struct kbase_context *kctx); + +/** + * kbase_csf_scheduler_init - Initialize the CSF scheduler + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * The scheduler does the arbitration for the CSG slots + * provided by the firmware between the GPU command queue groups created + * by the Clients. + * This function must be called after loading firmware and parsing its capabilities. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_scheduler_init(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_early_init - Early initialization for the CSF scheduler + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Initialize necessary resources such as locks, workqueue for CSF scheduler. + * This must be called at kbase probe. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_scheduler_early_init(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_context_term() - Terminate the context-specific part + * for CSF scheduler. + * + * @kctx: Pointer to kbase context that is being terminated. + * + * This function must be called during Kbase context termination. + */ +void kbase_csf_scheduler_context_term(struct kbase_context *kctx); + +/** + * kbase_csf_scheduler_term - Terminate the CSF scheduler. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This should be called when unload of firmware is done on device + * termination. + */ +void kbase_csf_scheduler_term(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_early_term - Early termination of the CSF scheduler. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This should be called only when kbase probe fails or gets rmmoded. + */ +void kbase_csf_scheduler_early_term(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_reset - Reset the state of all active GPU command + * queue groups. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function will first iterate through all the active/scheduled GPU + * command queue groups and suspend them (to avoid losing work for groups + * that are not stuck). The groups that could not get suspended would be + * descheduled and marked as terminated (which will then lead to unbinding + * of all the queues bound to them) and also no more work would be allowed + * to execute for them. + * + * This is similar to the action taken in response to an unexpected OoM event. + * No explicit re-initialization is done for CSG & CS interface I/O pages; + * instead, that happens implicitly on firmware reload. + * + * Should be called only after initiating the GPU reset. + */ +void kbase_csf_scheduler_reset(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_enable_tick_timer - Enable the scheduler tick timer. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function will restart the scheduler tick so that regular scheduling can + * be resumed without any explicit trigger (like kicking of GPU queues). + */ +void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_group_copy_suspend_buf - Suspend a queue + * group and copy suspend buffer. + * + * This function is called to suspend a queue group and copy the suspend_buffer + * contents to the input buffer provided. + * + * @group: Pointer to the queue group to be suspended. + * @sus_buf: Pointer to the structure which contains details of the + * user buffer and its kernel pinned pages to which we need to copy + * the group suspend buffer. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, + struct kbase_suspend_copy_buffer *sus_buf); + +/** + * kbase_csf_scheduler_lock - Acquire the global Scheduler lock. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function will take the global scheduler lock, in order to serialize + * against the Scheduler actions, for access to CS IO pages. + */ +static inline void kbase_csf_scheduler_lock(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->csf.scheduler.lock); +} + +/** + * kbase_csf_scheduler_unlock - Release the global Scheduler lock. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev) +{ + mutex_unlock(&kbdev->csf.scheduler.lock); +} + +/** + * kbase_csf_scheduler_spin_lock - Acquire Scheduler interrupt spinlock. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @flags: Pointer to the memory location that would store the previous + * interrupt state. + * + * This function will take the global scheduler lock, in order to serialize + * against the Scheduler actions, for access to CS IO pages. + */ +static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev, + unsigned long *flags) +{ + spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, *flags); +} + +/** + * kbase_csf_scheduler_spin_unlock - Release Scheduler interrupt spinlock. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @flags: Previously stored interrupt state when Scheduler interrupt + * spinlock was acquired. + */ +static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev, + unsigned long flags) +{ + spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags); +} + +/** + * kbase_csf_scheduler_spin_lock_assert_held - Assert if the Scheduler + * interrupt spinlock is held. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +static inline void +kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); +} + +/** + * kbase_csf_scheduler_timer_is_enabled() - Check if the scheduler wakes up + * automatically for periodic tasks. + * + * @kbdev: Pointer to the device + * + * Return: true if the scheduler is configured to wake up periodically + */ +bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic + * scheduler tasks. + * + * @kbdev: Pointer to the device + * @enable: Whether to enable periodic scheduler tasks + */ +void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, + bool enable); + +/** + * kbase_csf_scheduler_kick - Perform pending scheduling tasks once. + * + * Note: This function is only effective if the scheduling timer is disabled. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_scheduler_kick(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_protected_mode_in_use() - Check if the scheduler is + * running with protected mode tasks. + * + * @kbdev: Pointer to the device + * + * Return: true if the scheduler is running with protected mode tasks + */ +static inline bool kbase_csf_scheduler_protected_mode_in_use( + struct kbase_device *kbdev) +{ + return (kbdev->csf.scheduler.active_protm_grp != NULL); +} + +/** + * kbase_csf_scheduler_pm_active - Perform scheduler power active operation + * + * Note: This function will increase the scheduler's internal pm_active_count + * value, ensuring that both GPU and MCU are powered for access. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_pm_idle - Perform the scheduler power idle operation + * + * Note: This function will decrease the scheduler's internal pm_active_count + * value. On reaching 0, the MCU and GPU could be powered off. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function will make the scheduler resume the scheduling of queue groups + * and take the power managemenet reference, if there are any runnable groups. + */ +void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function will make the scheduler suspend all the running queue groups + * and drop its power managemenet reference. + */ +void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev); + +/** + * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal + * runtime used slots are all tagged as idle command queue groups. + * + * @kbdev: Pointer to the device + * + * Return: true if all the used slots are tagged as idle CSGs. + */ +static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock); + return bitmap_equal(kbdev->csf.scheduler.csg_slots_idle_mask, + kbdev->csf.scheduler.csg_inuse_bitmap, + kbdev->csf.global_iface.group_num); +} + +/** + * kbase_csf_scheduler_advance_tick_nolock() - Advance the scheduling tick + * + * @kbdev: Pointer to the device + * + * This function advances the scheduling tick by enqueing the tick work item for + * immediate execution, but only if the tick hrtimer is active. If the timer + * is inactive then the tick work item is already in flight. + * The caller must hold the interrupt lock. + */ +static inline void +kbase_csf_scheduler_advance_tick_nolock(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + lockdep_assert_held(&scheduler->interrupt_lock); + + if (scheduler->tick_timer_active) { + KBASE_KTRACE_ADD(kbdev, SCHEDULER_ADVANCE_TICK, NULL, 0u); + scheduler->tick_timer_active = false; + queue_work(scheduler->wq, &scheduler->tick_work); + } else { + KBASE_KTRACE_ADD(kbdev, SCHEDULER_NOADVANCE_TICK, NULL, 0u); + } +} + +/** + * kbase_csf_scheduler_advance_tick() - Advance the scheduling tick + * + * @kbdev: Pointer to the device + * + * This function advances the scheduling tick by enqueing the tick work item for + * immediate execution, but only if the tick hrtimer is active. If the timer + * is inactive then the tick work item is already in flight. + */ +static inline void kbase_csf_scheduler_advance_tick(struct kbase_device *kbdev) +{ + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + spin_lock_irqsave(&scheduler->interrupt_lock, flags); + kbase_csf_scheduler_advance_tick_nolock(kbdev); + spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); +} + +/** + * kbase_csf_scheduler_queue_has_trace() - report whether the queue has been + * configured to operate with the + * cs_trace feature. + * + * @queue: Pointer to the queue. + * + * Return: True if the gpu queue is configured to operate with the cs_trace + * feature, otherwise false. + */ +static inline bool kbase_csf_scheduler_queue_has_trace(struct kbase_queue *queue) +{ + lockdep_assert_held(&queue->kctx->kbdev->csf.scheduler.lock); + /* In the current arrangement, it is possible for the context to enable + * the cs_trace after some queues have been registered with cs_trace in + * disabled state. So each queue has its own enabled/disabled condition. + */ + return (queue->trace_buffer_size && queue->trace_buffer_base); +} + +#endif /* _KBASE_CSF_SCHEDULER_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c new file mode 100644 index 0000000..8ecf235 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.c @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include + +#include "mali_kbase_csf_tiler_heap.h" +#include "mali_kbase_csf_tiler_heap_def.h" +#include "mali_kbase_csf_heap_context_alloc.h" + +/** + * encode_chunk_ptr - Encode the address and size of a chunk as an integer. + * + * The size and address of the next chunk in a list are packed into a single + * 64-bit value for storage in a chunk's header. This function returns that + * value. + * + * @chunk_size: Size of a tiler heap chunk, in bytes. + * @chunk_addr: GPU virtual address of the same tiler heap chunk. + * + * Return: Next chunk pointer suitable for writing into a chunk header. + */ +static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr) +{ + u64 encoded_size, encoded_addr; + + WARN_ON(chunk_size & ~CHUNK_SIZE_MASK); + WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK); + + encoded_size = + (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) << + CHUNK_HDR_NEXT_SIZE_POS; + + encoded_addr = + (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) << + CHUNK_HDR_NEXT_ADDR_POS; + + return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) | + (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK); +} + +/** + * get_last_chunk - Get the last chunk of a tiler heap + * + * @heap: Pointer to the tiler heap. + * + * Return: The address of the most recently-linked chunk, or NULL if none. + */ +static struct kbase_csf_tiler_heap_chunk *get_last_chunk( + struct kbase_csf_tiler_heap *const heap) +{ + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + if (list_empty(&heap->chunks_list)) + return NULL; + + return list_last_entry(&heap->chunks_list, + struct kbase_csf_tiler_heap_chunk, link); +} + +/** + * link_chunk - Link a chunk into a tiler heap + * + * Unless the @chunk is the first in the kernel's list of chunks belonging to + * a given tiler heap, this function stores the size and address of the @chunk + * in the header of the preceding chunk. This requires the GPU memory region + * containing the header to be be mapped temporarily, which can fail. + * + * @heap: Pointer to the tiler heap. + * @chunk: Pointer to the heap chunk to be linked. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int link_chunk(struct kbase_csf_tiler_heap *const heap, + struct kbase_csf_tiler_heap_chunk *const chunk) +{ + struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap); + + if (prev) { + struct kbase_context *const kctx = heap->kctx; + struct kbase_vmap_struct map; + u64 *const prev_hdr = kbase_vmap_prot(kctx, prev->gpu_va, + sizeof(*prev_hdr), KBASE_REG_CPU_WR, &map); + + if (unlikely(!prev_hdr)) { + dev_err(kctx->kbdev->dev, + "Failed to map tiler heap chunk 0x%llX\n", + prev->gpu_va); + return -ENOMEM; + } + + *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va); + kbase_vunmap(kctx, &map); + + dev_dbg(kctx->kbdev->dev, + "Linked tiler heap chunks, 0x%llX -> 0x%llX\n", + prev->gpu_va, chunk->gpu_va); + } + + return 0; +} + +/** + * init_chunk - Initialize and link a tiler heap chunk + * + * Zero-initialize a new chunk's header (including its pointer to the next + * chunk, which doesn't exist yet) and then update the previous chunk's + * header to link the new chunk into the chunk list. + * + * @heap: Pointer to the tiler heap. + * @chunk: Pointer to the heap chunk to be initialized and linked. + * @link_with_prev: Flag to indicate if the new chunk needs to be linked with + * the previously allocated chunk. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int init_chunk(struct kbase_csf_tiler_heap *const heap, + struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev) +{ + struct kbase_vmap_struct map; + struct u64 *chunk_hdr = NULL; + struct kbase_context *const kctx = heap->kctx; + + if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) { + dev_err(kctx->kbdev->dev, + "Tiler heap chunk address is unusable\n"); + return -EINVAL; + } + + chunk_hdr = kbase_vmap_prot(kctx, + chunk->gpu_va, CHUNK_HDR_SIZE, KBASE_REG_CPU_WR, &map); + + if (unlikely(!chunk_hdr)) { + dev_err(kctx->kbdev->dev, + "Failed to map a tiler heap chunk header\n"); + return -ENOMEM; + } + + memset(chunk_hdr, 0, CHUNK_HDR_SIZE); + kbase_vunmap(kctx, &map); + + if (link_with_prev) + return link_chunk(heap, chunk); + else + return 0; +} + +/** + * create_chunk - Create a tiler heap chunk + * + * This function allocates a chunk of memory for a tiler heap and adds it to + * the end of the list of chunks associated with that heap. The size of the + * chunk is not a parameter because it is configured per-heap not per-chunk. + * + * @heap: Pointer to the tiler heap for which to allocate memory. + * @link_with_prev: Flag to indicate if the chunk to be allocated needs to be + * linked with the previously allocated chunk. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int create_chunk(struct kbase_csf_tiler_heap *const heap, + bool link_with_prev) +{ + int err = 0; + struct kbase_context *const kctx = heap->kctx; + u64 nr_pages = PFN_UP(heap->chunk_size); + u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE | + BASE_MEM_COHERENT_LOCAL; + struct kbase_csf_tiler_heap_chunk *chunk = NULL; + + flags |= base_mem_group_id_set(kctx->jit_group_id); + +#if defined(CONFIG_MALI_DEBUG) || defined(CONFIG_MALI_VECTOR_DUMP) + flags |= BASE_MEM_PROT_CPU_RD; +#endif + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (unlikely(!chunk)) { + dev_err(kctx->kbdev->dev, + "No kernel memory for a new tiler heap chunk\n"); + return -ENOMEM; + } + + /* Allocate GPU memory for the new chunk. */ + INIT_LIST_HEAD(&chunk->link); + chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, + &flags, &chunk->gpu_va); + + if (unlikely(!chunk->region)) { + dev_err(kctx->kbdev->dev, + "Failed to allocate a tiler heap chunk\n"); + err = -ENOMEM; + } else { + err = init_chunk(heap, chunk, link_with_prev); + if (unlikely(err)) { + kbase_gpu_vm_lock(kctx); + chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, chunk->region); + kbase_gpu_vm_unlock(kctx); + } + } + + if (unlikely(err)) { + kfree(chunk); + } else { + list_add_tail(&chunk->link, &heap->chunks_list); + heap->chunk_count++; + + dev_dbg(kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", + chunk->gpu_va); + } + + return err; +} + +/** + * delete_chunk - Delete a tiler heap chunk + * + * This function frees a tiler heap chunk previously allocated by @create_chunk + * and removes it from the list of chunks associated with the heap. + * + * WARNING: The deleted chunk is not unlinked from the list of chunks used by + * the GPU, therefore it is only safe to use this function when + * deleting a heap. + * + * @heap: Pointer to the tiler heap for which @chunk was allocated. + * @chunk: Pointer to a chunk to be deleted. + */ +static void delete_chunk(struct kbase_csf_tiler_heap *const heap, + struct kbase_csf_tiler_heap_chunk *const chunk) +{ + struct kbase_context *const kctx = heap->kctx; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + kbase_gpu_vm_lock(kctx); + chunk->region->flags &= ~KBASE_REG_NO_USER_FREE; + kbase_mem_free_region(kctx, chunk->region); + kbase_gpu_vm_unlock(kctx); + list_del(&chunk->link); + heap->chunk_count--; + kfree(chunk); +} + +/** + * delete_all_chunks - Delete all chunks belonging to a tiler heap + * + * This function empties the list of chunks associated with a tiler heap by + * freeing all chunks previously allocated by @create_chunk. + * + * @heap: Pointer to a tiler heap. + */ +static void delete_all_chunks(struct kbase_csf_tiler_heap *heap) +{ + struct list_head *entry = NULL, *tmp = NULL; + struct kbase_context *const kctx = heap->kctx; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + list_for_each_safe(entry, tmp, &heap->chunks_list) { + struct kbase_csf_tiler_heap_chunk *chunk = list_entry( + entry, struct kbase_csf_tiler_heap_chunk, link); + + delete_chunk(heap, chunk); + } +} + +/** + * create_initial_chunks - Create the initial list of chunks for a tiler heap + * + * This function allocates a given number of chunks for a tiler heap and + * adds them to the list of chunks associated with that heap. + * + * @heap: Pointer to the tiler heap for which to allocate memory. + * @nchunks: Number of chunks to create. + * + * Return: 0 if successful or a negative error code on failure. + */ +static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap, + u32 const nchunks) +{ + int err = 0; + u32 i; + + for (i = 0; (i < nchunks) && likely(!err); i++) + err = create_chunk(heap, true); + + if (unlikely(err)) + delete_all_chunks(heap); + + return err; +} + +/** + * delete_heap - Delete a tiler heap + * + * This function frees any chunks allocated for a tiler heap previously + * initialized by @kbase_csf_tiler_heap_init and removes it from the list of + * heaps associated with the kbase context. The heap context structure used by + * the firmware is also freed. + * + * @heap: Pointer to a tiler heap to be deleted. + */ +static void delete_heap(struct kbase_csf_tiler_heap *heap) +{ + struct kbase_context *const kctx = heap->kctx; + + dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va); + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + delete_all_chunks(heap); + + /* We could optimize context destruction by not freeing leaked heap + * contexts but it doesn't seem worth the extra complexity. + */ + kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc, + heap->gpu_va); + + list_del(&heap->link); + + WARN_ON(heap->chunk_count); + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, + heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0, + heap->target_in_flight, 0); + + kfree(heap); +} + +/** + * find_tiler_heap - Find a tiler heap from the address of its heap context + * + * Each tiler heap managed by the kernel has an associated heap context + * structure used by the firmware. This function finds a tiler heap object from + * the GPU virtual address of its associated heap context. The heap context + * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the + * same @kctx. + * + * @kctx: Pointer to the kbase context to search for a tiler heap. + * @heap_gpu_va: GPU virtual address of a heap context structure. + * + * Return: pointer to the tiler heap object, or NULL if not found. + */ +static struct kbase_csf_tiler_heap *find_tiler_heap( + struct kbase_context *const kctx, u64 const heap_gpu_va) +{ + struct kbase_csf_tiler_heap *heap = NULL; + + lockdep_assert_held(&kctx->csf.tiler_heaps.lock); + + list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) { + if (heap_gpu_va == heap->gpu_va) + return heap; + } + + dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n", + heap_gpu_va); + + return NULL; +} + +int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx) +{ + int err = kbase_csf_heap_context_allocator_init( + &kctx->csf.tiler_heaps.ctx_alloc, kctx); + + if (unlikely(err)) + return err; + + INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list); + mutex_init(&kctx->csf.tiler_heaps.lock); + + dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n"); + + return 0; +} + +void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx) +{ + struct list_head *entry = NULL, *tmp = NULL; + + dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n"); + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + list_for_each_safe(entry, tmp, &kctx->csf.tiler_heaps.list) { + struct kbase_csf_tiler_heap *heap = list_entry( + entry, struct kbase_csf_tiler_heap, link); + delete_heap(heap); + } + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + mutex_destroy(&kctx->csf.tiler_heaps.lock); + + kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc); +} + +int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, + u32 const chunk_size, u32 const initial_chunks, u32 const max_chunks, + u16 const target_in_flight, u64 *const heap_gpu_va, + u64 *const first_chunk_va) +{ + int err = 0; + struct kbase_csf_tiler_heap *heap = NULL; + struct kbase_csf_heap_context_allocator *const ctx_alloc = + &kctx->csf.tiler_heaps.ctx_alloc; + + dev_dbg(kctx->kbdev->dev, + "Creating a tiler heap with %u chunks (limit: %u) of size %u\n", + initial_chunks, max_chunks, chunk_size); + + if (chunk_size == 0) + return -EINVAL; + + if (chunk_size & ~CHUNK_SIZE_MASK) + return -EINVAL; + + if (initial_chunks == 0) + return -EINVAL; + + if (initial_chunks > max_chunks) + return -EINVAL; + + if (target_in_flight == 0) + return -EINVAL; + + heap = kzalloc(sizeof(*heap), GFP_KERNEL); + if (unlikely(!heap)) { + dev_err(kctx->kbdev->dev, + "No kernel memory for a new tiler heap\n"); + return -ENOMEM; + } + + heap->kctx = kctx; + heap->chunk_size = chunk_size; + heap->max_chunks = max_chunks; + heap->target_in_flight = target_in_flight; + INIT_LIST_HEAD(&heap->chunks_list); + + heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc); + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + if (unlikely(!heap->gpu_va)) { + dev_err(kctx->kbdev->dev, + "Failed to allocate a tiler heap context\n"); + err = -ENOMEM; + } else { + err = create_initial_chunks(heap, initial_chunks); + if (unlikely(err)) { + kbase_csf_heap_context_allocator_free(ctx_alloc, + heap->gpu_va); + } + } + + if (unlikely(err)) { + kfree(heap); + } else { + struct kbase_csf_tiler_heap_chunk const *first_chunk = + list_first_entry(&heap->chunks_list, + struct kbase_csf_tiler_heap_chunk, link); + + kctx->csf.tiler_heaps.nr_of_heaps++; + heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps; + list_add(&heap->link, &kctx->csf.tiler_heaps.list); + + *heap_gpu_va = heap->gpu_va; + *first_chunk_va = first_chunk->gpu_va; + + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( + kctx->kbdev, kctx->id, heap->heap_id, + PFN_UP(heap->chunk_size * heap->max_chunks), + PFN_UP(heap->chunk_size * heap->chunk_count), + heap->max_chunks, heap->chunk_size, heap->chunk_count, + heap->target_in_flight, 0); + + dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", + heap->gpu_va); + } + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + return err; +} + +int kbase_csf_tiler_heap_term(struct kbase_context *const kctx, + u64 const heap_gpu_va) +{ + int err = 0; + struct kbase_csf_tiler_heap *heap = NULL; + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + heap = find_tiler_heap(kctx, heap_gpu_va); + if (likely(heap)) + delete_heap(heap); + else + err = -EINVAL; + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + return err; +} + +/** + * alloc_new_chunk - Allocate a new chunk for the tiler heap. + * + * This function will allocate a new chunk for the chunked tiler heap depending + * on the settings provided by userspace when the heap was created and the + * heap's statistics (like number of render passes in-flight). + * + * @heap: Pointer to the tiler heap. + * @nr_in_flight: Number of render passes that are in-flight, must not be zero. + * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. + * The minimum value is zero but it must be less or equal to + * the total number of render passes in flight + * @new_chunk_ptr: Where to store the GPU virtual address & size of the new + * chunk allocated for the heap. + * + * Return: 0 if a new chunk was allocated otherwise an appropriate negative + * error code. + */ +static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap, + u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) +{ + int err = -ENOMEM; + + lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock); + + if (WARN_ON(!nr_in_flight) || + WARN_ON(pending_frag_count > nr_in_flight)) + return -EINVAL; + + if (nr_in_flight <= heap->target_in_flight) { + if (heap->chunk_count < heap->max_chunks) { + /* Not exceeded the target number of render passes yet so be + * generous with memory. + */ + err = create_chunk(heap, false); + + if (likely(!err)) { + struct kbase_csf_tiler_heap_chunk *new_chunk = + get_last_chunk(heap); + if (!WARN_ON(!new_chunk)) { + *new_chunk_ptr = + encode_chunk_ptr(heap->chunk_size, + new_chunk->gpu_va); + return 0; + } + } + } else if (pending_frag_count > 0) { + err = -EBUSY; + } else { + err = -ENOMEM; + } + } else { + /* Reached target number of render passes in flight. + * Wait for some of them to finish + */ + err = -EBUSY; + } + + return err; +} + +int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, + u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr) +{ + struct kbase_csf_tiler_heap *heap; + int err = -EINVAL; + + mutex_lock(&kctx->csf.tiler_heaps.lock); + + heap = find_tiler_heap(kctx, gpu_heap_va); + + if (likely(heap)) { + err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count, + new_chunk_ptr); + + KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( + kctx->kbdev, kctx->id, heap->heap_id, + PFN_UP(heap->chunk_size * heap->max_chunks), + PFN_UP(heap->chunk_size * heap->chunk_count), + heap->max_chunks, heap->chunk_size, heap->chunk_count, + heap->target_in_flight, nr_in_flight); + } + + mutex_unlock(&kctx->csf.tiler_heaps.lock); + + return err; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h new file mode 100644 index 0000000..04c27f7 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_TILER_HEAP_H_ +#define _KBASE_CSF_TILER_HEAP_H_ + +#include + +/** + * kbase_csf_tiler_heap_context_init - Initialize the tiler heaps context for a + * GPU address space + * + * @kctx: Pointer to the kbase context being initialized. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_csf_tiler_heap_context_init(struct kbase_context *kctx); + +/** + * kbase_csf_tiler_heap_context_term - Terminate the tiler heaps context for a + * GPU address space + * + * This function deletes any chunked tiler heaps that weren't deleted before + * context termination. + * + * @kctx: Pointer to the kbase context being terminated. + */ +void kbase_csf_tiler_heap_context_term(struct kbase_context *kctx); + +/** + * kbase_csf_tiler_heap_init - Initialize a chunked tiler memory heap. + * + * @kctx: Pointer to the kbase context in which to allocate resources for the + * tiler heap. + * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. + * @initial_chunks: The initial number of chunks to allocate. Must not be + * zero or greater than @max_chunks. + * @max_chunks: The maximum number of chunks that the heap should be allowed + * to use. Must not be less than @initial_chunks. + * @target_in_flight: Number of render-passes that the driver should attempt to + * keep in flight for which allocation of new chunks is + * allowed. Must not be zero. + * @gpu_heap_va: Where to store the GPU virtual address of the context that was + * set up for the tiler heap. + * @first_chunk_va: Where to store the GPU virtual address of the first chunk + * allocated for the heap. This points to the header of the + * heap chunk and not to the low address of free memory in it. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_csf_tiler_heap_init(struct kbase_context *kctx, + u32 chunk_size, u32 initial_chunks, u32 max_chunks, + u16 target_in_flight, u64 *gpu_heap_va, + u64 *first_chunk_va); + +/** + * kbasep_cs_tiler_heap_term - Terminate a chunked tiler memory heap. + * + * This function will terminate a chunked tiler heap and cause all the chunks + * (initial and those added during out-of-memory processing) to be freed. + * It is the caller's responsibility to ensure no further operations on this + * heap will happen before calling this function. + * + * @kctx: Pointer to the kbase context in which the tiler heap was initialized. + * @gpu_heap_va: The GPU virtual address of the context that was set up for the + * tiler heap. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va); + +/** + * kbase_csf_tiler_heap_alloc_new_chunk - Allocate a new chunk for tiler heap. + * + * This function will allocate a new chunk for the chunked tiler heap depending + * on the settings provided by userspace when the heap was created and the + * heap's statistics (like number of render passes in-flight). + * It would return an appropriate error code if a new chunk couldn't be + * allocated. + * + * @kctx: Pointer to the kbase context in which the tiler heap was initialized. + * @gpu_heap_va: GPU virtual address of the heap context. + * @nr_in_flight: Number of render passes that are in-flight, must not be zero. + * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage. + * The minimum value is zero but it must be less or equal to + * the total number of render passes in flight + * @new_chunk_ptr: Where to store the GPU virtual address & size of the new + * chunk allocated for the heap. + * + * Return: 0 if a new chunk was allocated otherwise an appropriate negative + * error code (like -EBUSY when a free chunk is expected to be + * available upon completion of a render pass and -EINVAL when + * invalid value was passed for one of the argument). + */ +int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx, + u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr); +#endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c new file mode 100644 index 0000000..f46beed --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_tiler_heap_debugfs.h" +#include "mali_kbase_csf_tiler_heap_def.h" +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** + * kbasep_csf_tiler_heap_debugfs_show() - Print tiler heap information for per context + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase_context + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_csf_tiler_heap_debugfs_show(struct seq_file *file, void *data) +{ + struct kbase_context *kctx = file->private; + struct kbase_csf_tiler_heap_context *tiler_heaps_p = &kctx->csf.tiler_heaps; + struct kbase_csf_tiler_heap *heap; + struct kbase_csf_tiler_heap_chunk *chunk; + + seq_printf(file, "MALI_CSF_TILER_HEAP_DEBUGFS_VERSION: v%u\n", MALI_CSF_TILER_HEAP_DEBUGFS_VERSION); + + mutex_lock(&tiler_heaps_p->lock); + + list_for_each_entry(heap, &tiler_heaps_p->list, link) { + if (heap->kctx != kctx) + continue; + + seq_printf(file, "HEAP(gpu_va = 0x%llx):\n", heap->gpu_va); + seq_printf(file, "\tchunk_size = %u\n", heap->chunk_size); + seq_printf(file, "\tchunk_count = %u\n", heap->chunk_count); + seq_printf(file, "\tmax_chunks = %u\n", heap->max_chunks); + seq_printf(file, "\ttarget_in_flight = %u\n", heap->target_in_flight); + + list_for_each_entry(chunk, &heap->chunks_list, link) + seq_printf(file, "\t\tchunk gpu_va = 0x%llx\n", + chunk->gpu_va); + } + + mutex_unlock(&tiler_heaps_p->lock); + + return 0; +} + +static int kbasep_csf_tiler_heap_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_csf_tiler_heap_debugfs_show, in->i_private); +} + +static const struct file_operations kbasep_csf_tiler_heap_debugfs_fops = { + .open = kbasep_csf_tiler_heap_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) +{ + struct dentry *file; + + if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) + return; + + file = debugfs_create_file("tiler_heaps", 0444, kctx->kctx_dentry, + kctx, &kbasep_csf_tiler_heap_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kctx->kbdev->dev, + "Unable to create tiler heap debugfs entry"); + } +} + + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h new file mode 100644 index 0000000..92ae91a --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_debugfs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ +#define _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ + +/* Forward declaration */ +struct kbase_context; + +#define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0 + +/** + * kbase_csf_tiler_heap_debugfs_init() - Create a debugfs entry for per context tiler heap + * + * @kctx: The kbase_context for which to create the debugfs entry + */ +void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx); + +#endif /* _KBASE_CSF_TILER_HEAP_DEBUGFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h new file mode 100644 index 0000000..fb439cf --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tiler_heap_def.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_TILER_HEAP_DEF_H_ +#define _KBASE_CSF_TILER_HEAP_DEF_H_ + +#include + +/* Size of a tiler heap chunk header, in bytes. */ +#define CHUNK_HDR_SIZE ((size_t)64) + +/* Bit-position of the next chunk's size when stored in a chunk header. */ +#define CHUNK_HDR_NEXT_SIZE_POS (0) + +/* Bit-position of the next chunk's address when stored in a chunk header. */ +#define CHUNK_HDR_NEXT_ADDR_POS (12) + +/* Bitmask of the next chunk's size when stored in a chunk header. */ +#define CHUNK_HDR_NEXT_SIZE_MASK (((u64)1 << CHUNK_HDR_NEXT_ADDR_POS) - 1u) + +/* Bitmask of the address of the next chunk when stored in a chunk header. */ +#define CHUNK_HDR_NEXT_ADDR_MASK (~CHUNK_HDR_NEXT_SIZE_MASK) + +/* Right-shift before storing the next chunk's size in a chunk header. */ +#define CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT (12) + +/* Right-shift before storing the next chunk's address in a chunk header. */ +#define CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT (12) + +/* Bitmask of valid chunk sizes. This is also the maximum chunk size, in bytes. + */ +#define CHUNK_SIZE_MASK \ + ((CHUNK_HDR_NEXT_SIZE_MASK >> CHUNK_HDR_NEXT_SIZE_POS) << \ + CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) + +/* Bitmask of valid chunk addresses. This is also the highest address. */ +#define CHUNK_ADDR_MASK \ + ((CHUNK_HDR_NEXT_ADDR_MASK >> CHUNK_HDR_NEXT_ADDR_POS) << \ + CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) + +/** + * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel + * + * Chunks are allocated upon initialization of a tiler heap or in response to + * out-of-memory events from the firmware. Chunks are always fully backed by + * physical memory to avoid the overhead of processing GPU page faults. The + * allocated GPU memory regions are linked together independent of the list of + * kernel objects of this type. + * + * @link: Link to this chunk in a list of chunks belonging to a + * @kbase_csf_tiler_heap. + * @region: Pointer to the GPU memory region allocated for the chunk. + * @gpu_va: GPU virtual address of the start of the memory region. + * This points to the header of the chunk and not to the low address + * of free memory within it. + */ +struct kbase_csf_tiler_heap_chunk { + struct list_head link; + struct kbase_va_region *region; + u64 gpu_va; +}; + +/** + * struct kbase_csf_tiler_heap - A tiler heap managed by the kernel + * + * @kctx: Pointer to the kbase context with which this heap is + * associated. + * @link: Link to this heap in a list of tiler heaps belonging to + * the @kbase_csf_tiler_heap_context. + * @chunk_size: Size of each chunk, in bytes. Must be page-aligned. + * @chunk_count: The number of chunks currently allocated. Must not be + * zero or greater than @max_chunks. + * @max_chunks: The maximum number of chunks that the heap should be + * allowed to use. Must not be less than @chunk_count. + * @target_in_flight: Number of render-passes that the driver should attempt + * to keep in flight for which allocation of new chunks is + * allowed. Must not be zero. + * @gpu_va: The GPU virtual address of the heap context structure that + * was allocated for the firmware. This is also used to + * uniquely identify the heap. + * @heap_id: Unique id representing the heap, assigned during heap + * initialization. + * @chunks_list: Linked list of allocated chunks. + */ +struct kbase_csf_tiler_heap { + struct kbase_context *kctx; + struct list_head link; + u32 chunk_size; + u32 chunk_count; + u32 max_chunks; + u16 target_in_flight; + u64 gpu_va; + u64 heap_id; + struct list_head chunks_list; +}; +#endif /* !_KBASE_CSF_TILER_HEAP_DEF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c new file mode 100644 index 0000000..4d93fe5 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include + +#include "mali_kbase.h" +#include "mali_kbase_config_defaults.h" +#include "mali_kbase_csf_firmware.h" +#include "mali_kbase_csf_timeout.h" +#include "mali_kbase_reset_gpu.h" +#include "backend/gpu/mali_kbase_pm_internal.h" + +/** + * set_timeout - set a new global progress timeout. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * @timeout: the maximum number of GPU cycles without forward progress to allow + * to elapse before terminating a GPU command queue group. + * + * Return: 0 on success, or negative on failure + * (e.g. -ERANGE if the requested timeout is too large). + */ +static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) +{ + if (timeout > GLB_PROGRESS_TIMER_TIMEOUT_MAX) { + dev_err(kbdev->dev, "Timeout %llu is too large.\n", timeout); + return -ERANGE; + } + + dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout); + + atomic64_set(&kbdev->csf.progress_timeout, timeout); + + return 0; +} + +/** + * progress_timeout_store - Store the progress_timeout device attribute. + * @dev: The device that has the attribute. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called when the progress_timeout sysfs file is written to. + * It checks the data written, and if valid updates the progress timeout value. + * The function also checks gpu reset status, if the gpu is in reset process, + * the function will return an error code (-EBUSY), and no change for timeout + * value. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t progress_timeout_store(struct device * const dev, + struct device_attribute * const attr, const char * const buf, + size_t const count) +{ + struct kbase_device *const kbdev = dev_get_drvdata(dev); + int err; + u64 timeout; + + if (!kbdev) + return -ENODEV; + + err = kbase_reset_gpu_try_prevent(kbdev); + if (err) { + dev_warn(kbdev->dev, + "Couldn't process progress_timeout write operation for GPU reset.\n"); + return -EBUSY; + } + + err = kstrtou64(buf, 0, &timeout); + if (err) + dev_err(kbdev->dev, + "Couldn't process progress_timeout write operation.\n" + "Use format \n"); + else + err = set_timeout(kbdev, timeout); + + if (!err) { + kbase_csf_scheduler_pm_active(kbdev); + + err = kbase_pm_wait_for_desired_state(kbdev); + if (!err) + err = kbase_csf_firmware_set_timeout(kbdev, timeout); + + kbase_csf_scheduler_pm_idle(kbdev); + } + + kbase_reset_gpu_allow(kbdev); + if (err) + return err; + + return count; +} + +/** + * progress_timeout_show - Show the progress_timeout device attribute. + * @dev: The device that has the attribute. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the global timeout. + * + * This function is called to get the progress timeout value. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t progress_timeout_show(struct device * const dev, + struct device_attribute * const attr, char * const buf) +{ + struct kbase_device *const kbdev = dev_get_drvdata(dev); + int err; + + if (!kbdev) + return -ENODEV; + + err = scnprintf(buf, PAGE_SIZE, "%llu\n", kbase_csf_timeout_get(kbdev)); + + return err; + +} + +static DEVICE_ATTR(progress_timeout, 0644, progress_timeout_show, + progress_timeout_store); + +int kbase_csf_timeout_init(struct kbase_device *const kbdev) +{ + u64 timeout = DEFAULT_PROGRESS_TIMEOUT; + int err; + +#if IS_ENABLED(CONFIG_OF) + err = of_property_read_u64(kbdev->dev->of_node, + "progress_timeout", &timeout); + if (!err) + dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", + timeout); +#endif + + err = set_timeout(kbdev, timeout); + if (err) + return err; + + err = sysfs_create_file(&kbdev->dev->kobj, + &dev_attr_progress_timeout.attr); + if (err) + dev_err(kbdev->dev, "SysFS file creation failed\n"); + + return err; +} + +void kbase_csf_timeout_term(struct kbase_device * const kbdev) +{ + sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_progress_timeout.attr); +} + +u64 kbase_csf_timeout_get(struct kbase_device *const kbdev) +{ + return atomic64_read(&kbdev->csf.progress_timeout); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h new file mode 100644 index 0000000..b406eaa --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_timeout.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_TIMEOUT_H_ +#define _KBASE_CSF_TIMEOUT_H_ + +struct kbase_device; + +/** + * kbase_csf_timeout_init - Initialize the progress timeout. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * Must be zero-initialized. + * + * The progress timeout is the number of GPU clock cycles allowed to elapse + * before the driver terminates a GPU command queue group in which a task is + * making no forward progress on an endpoint (e.g. a shader core). This function + * determines the initial value and also creates a sysfs file to allow the + * timeout to be reconfigured later. + * + * Reconfigures the global firmware interface to enable the current timeout. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_csf_timeout_init(struct kbase_device *kbdev); + +/** + * kbase_csf_timeout_term - Terminate the progress timeout. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Removes the sysfs file which allowed the timeout to be reconfigured. + * Does nothing if called on a zero-initialized object. + */ +void kbase_csf_timeout_term(struct kbase_device *kbdev); + +/** + * kbase_csf_timeout_get - get the current global progress timeout. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Return: the maximum number of GPU cycles that is allowed to elapse without + * forward progress before the driver terminates a GPU command queue + * group. + */ +u64 kbase_csf_timeout_get(struct kbase_device *const kbdev); + +#endif /* _KBASE_CSF_TIMEOUT_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c new file mode 100644 index 0000000..1824c2d --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.c @@ -0,0 +1,534 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_csf_tl_reader.h" + +#include "mali_kbase_csf_trace_buffer.h" +#include "mali_kbase_reset_gpu.h" + +#include "tl/mali_kbase_tlstream.h" +#include "tl/mali_kbase_tl_serialize.h" +#include "tl/mali_kbase_tracepoints.h" + +#include "mali_kbase_pm.h" +#include "mali_kbase_hwaccess_time.h" + +#include +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) +#include "tl/mali_kbase_timeline_priv.h" +#include + +#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) +#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE +#endif +#endif + +/* Name of the CSFFW timeline tracebuffer. */ +#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline" +/* Name of the timeline header metatadata */ +#define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header" + +/** + * struct kbase_csffw_tl_message - CSFFW timeline message. + * + * @msg_id: Message ID. + * @timestamp: Timestamp of the event. + * @cycle_counter: Cycle number of the event. + * + * Contain fields that are common for all CSFFW timeline messages. + */ +struct kbase_csffw_tl_message { + u32 msg_id; + u64 timestamp; + u64 cycle_counter; +} __packed __aligned(4); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static int kbase_csf_tl_debugfs_poll_interval_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; + + *val = self->timer_interval; + + return 0; +} + +static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader; + + if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) { + return -EINVAL; + } + + self->timer_interval = (u32)val; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops, + kbase_csf_tl_debugfs_poll_interval_read, + kbase_csf_tl_debugfs_poll_interval_write, "%llu\n"); + + +void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("csf_tl_poll_interval_in_ms", S_IRUGO | S_IWUSR, + kbdev->debugfs_instr_directory, kbdev, + &kbase_csf_tl_poll_interval_fops); + +} +#endif + +/** + * get_cpu_gpu_time() - Get current CPU and GPU timestamps. + * + * @kbdev: Kbase device. + * @cpu_ts: Output CPU timestamp. + * @gpu_ts: Output GPU timestamp. + * @gpu_cycle: Output GPU cycle counts. + */ +static void get_cpu_gpu_time( + struct kbase_device *kbdev, + u64 *cpu_ts, + u64 *gpu_ts, + u64 *gpu_cycle) +{ + struct timespec64 ts; + + kbase_pm_context_active(kbdev); + kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts); + kbase_pm_context_idle(kbdev); + + if (cpu_ts) + *cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + + +/** + * kbase_ts_converter_init() - Initialize system timestamp converter. + * + * @self: System Timestamp Converter instance. + * @kbdev: Kbase device pointer + * + * Return: Zero on success, -1 otherwise. + */ +static int kbase_ts_converter_init( + struct kbase_ts_converter *self, + struct kbase_device *kbdev) +{ + u64 cpu_ts = 0; + u64 gpu_ts = 0; + u64 freq; + u64 common_factor; + + get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); + freq = arch_timer_get_cntfrq(); + + if (!freq) { + dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); + return -1; + } + + common_factor = gcd(NSEC_PER_SEC, freq); + + self->multiplier = div64_u64(NSEC_PER_SEC, common_factor); + self->divisor = div64_u64(freq, common_factor); + self->offset = + cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor); + + return 0; +} + +/** + * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp. + * + * @self: System Timestamp Converter instance. + * @gpu_ts: System timestamp value to converter. + * + * Return: The CPU timestamp. + */ +static void kbase_ts_converter_convert( + const struct kbase_ts_converter *self, + u64 *gpu_ts) +{ + u64 old_gpu_ts = *gpu_ts; + *gpu_ts = div64_u64(old_gpu_ts * self->multiplier, + self->divisor) + self->offset; +} + +/** + * tl_reader_overflow_notify() - Emit stream overflow tracepoint. + * + * @self: CSFFW TL Reader instance. + * @msg_buf_start: Start of the message. + * @msg_buf_end: End of the message buffer. + */ +static void tl_reader_overflow_notify( + const struct kbase_csf_tl_reader *self, + u8 *const msg_buf_start, + u8 *const msg_buf_end) +{ + struct kbase_device *kbdev = self->kbdev; + struct kbase_csffw_tl_message message = {0}; + + /* Reuse the timestamp and cycle count from current event if possible */ + if (msg_buf_start + sizeof(message) <= msg_buf_end) + memcpy(&message, msg_buf_start, sizeof(message)); + + KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( + kbdev, message.timestamp, message.cycle_counter); +} + +/** + * tl_reader_overflow_check() - Check if an overflow has happened + * + * @self: CSFFW TL Reader instance. + * @event_id: Incoming event id. + * + * Return: True, if an overflow has happened, False otherwise. + */ +static bool tl_reader_overflow_check( + struct kbase_csf_tl_reader *self, + u16 event_id) +{ + struct kbase_device *kbdev = self->kbdev; + bool has_overflow = false; + + /* 0 is a special event_id and reserved for the very first tracepoint + * after reset, we should skip overflow check when reset happened. + */ + if (event_id != 0) { + has_overflow = self->got_first_event + && self->expected_event_id != event_id; + + if (has_overflow) + dev_warn(kbdev->dev, + "CSFFW overflow, event_id: %u, expected: %u.", + event_id, self->expected_event_id); + } + + self->got_first_event = true; + self->expected_event_id = event_id + 1; + /* When event_id reaches its max value, it skips 0 and wraps to 1. */ + if (self->expected_event_id == 0) + self->expected_event_id++; + + return has_overflow; +} + +/** + * tl_reader_reset() - Reset timeline tracebuffer reader state machine. + * + * @self: CSFFW TL Reader instance. + * + * Reset the reader to the default state, i.e. set all the + * mutable fields to zero. + */ +static void tl_reader_reset(struct kbase_csf_tl_reader *self) +{ + self->got_first_event = false; + self->is_active = false; + self->expected_event_id = 0; + self->tl_header.btc = 0; +} + +int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self) +{ + int ret = 0; + struct kbase_device *kbdev = self->kbdev; + struct kbase_tlstream *stream = self->stream; + + u8 *read_buffer = self->read_buffer; + const size_t read_buffer_size = sizeof(self->read_buffer); + + u32 bytes_read; + u8 *csffw_data_begin; + u8 *csffw_data_end; + u8 *csffw_data_it; + + unsigned long flags; + + spin_lock_irqsave(&self->read_lock, flags); + + /* If not running, early exit. */ + if (!self->is_active) { + spin_unlock_irqrestore(&self->read_lock, flags); + return -EBUSY; + } + + /* Copying the whole buffer in a single shot. We assume + * that the buffer will not contain partially written messages. + */ + bytes_read = kbase_csf_firmware_trace_buffer_read_data( + self->trace_buffer, read_buffer, read_buffer_size); + csffw_data_begin = read_buffer; + csffw_data_end = read_buffer + bytes_read; + + for (csffw_data_it = csffw_data_begin; + csffw_data_it < csffw_data_end;) { + u32 event_header; + u16 event_id; + u16 event_size; + unsigned long acq_flags; + char *buffer; + + /* Can we safely read event_id? */ + if (csffw_data_it + sizeof(event_header) > csffw_data_end) { + dev_warn( + kbdev->dev, + "Unable to parse CSFFW tracebuffer event header."); + ret = -EBUSY; + break; + } + + /* Read and parse the event header. */ + memcpy(&event_header, csffw_data_it, sizeof(event_header)); + event_id = (event_header >> 0) & 0xFFFF; + event_size = (event_header >> 16) & 0xFFFF; + csffw_data_it += sizeof(event_header); + + /* Detect if an overflow has happened. */ + if (tl_reader_overflow_check(self, event_id)) + tl_reader_overflow_notify(self, + csffw_data_it, + csffw_data_end); + + /* Can we safely read the message body? */ + if (csffw_data_it + event_size > csffw_data_end) { + dev_warn(kbdev->dev, + "event_id: %u, can't read with event_size: %u.", + event_id, event_size); + ret = -EBUSY; + break; + } + + /* Convert GPU timestamp to CPU timestamp. */ + { + struct kbase_csffw_tl_message *msg = + (struct kbase_csffw_tl_message *) csffw_data_it; + kbase_ts_converter_convert( + &self->ts_converter, + &msg->timestamp); + } + + /* Copy the message out to the tl_stream. */ + buffer = kbase_tlstream_msgbuf_acquire( + stream, event_size, &acq_flags); + kbasep_serialize_bytes(buffer, 0, csffw_data_it, event_size); + kbase_tlstream_msgbuf_release(stream, acq_flags); + csffw_data_it += event_size; + } + + spin_unlock_irqrestore(&self->read_lock, flags); + return ret; +} + +static void kbasep_csf_tl_reader_read_callback(struct timer_list *timer) +{ + struct kbase_csf_tl_reader *self = + container_of(timer, struct kbase_csf_tl_reader, read_timer); + + int rcode; + + kbase_csf_tl_reader_flush_buffer(self); + + rcode = mod_timer(&self->read_timer, + jiffies + msecs_to_jiffies(self->timer_interval)); + + CSTD_UNUSED(rcode); +} + +/** + * tl_reader_init_late() - Late CSFFW TL Reader initialization. + * + * @self: CSFFW TL Reader instance. + * @kbdev: Kbase device. + * + * Late initialization is done once at kbase_csf_tl_reader_start() time. + * This is because the firmware image is not parsed + * by the kbase_csf_tl_reader_init() time. + * + * Return: Zero on success, -1 otherwise. + */ +static int tl_reader_init_late( + struct kbase_csf_tl_reader *self, + struct kbase_device *kbdev) +{ + struct firmware_trace_buffer *tb; + size_t hdr_size = 0; + const char *hdr = NULL; + + if (self->kbdev) + return 0; + + tb = kbase_csf_firmware_get_trace_buffer( + kbdev, KBASE_CSFFW_TRACEBUFFER_NAME); + hdr = kbase_csf_firmware_get_timeline_metadata( + kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size); + + if (!tb) { + dev_warn( + kbdev->dev, + "'%s' tracebuffer is not present in the firmware image.", + KBASE_CSFFW_TRACEBUFFER_NAME); + return -1; + } + + if (!hdr) { + dev_warn( + kbdev->dev, + "'%s' timeline metadata is not present in the firmware image.", + KBASE_CSFFW_TIMELINE_HEADER_NAME); + return -1; + } + + if (kbase_ts_converter_init(&self->ts_converter, kbdev)) { + return -1; + } + + self->kbdev = kbdev; + self->trace_buffer = tb; + self->tl_header.data = hdr; + self->tl_header.size = hdr_size; + + return 0; +} + +/** + * tl_reader_update_enable_bit() - Update the first bit of a CSFFW tracebuffer. + * + * @self: CSFFW TL Reader instance. + * @value: The value to set. + * + * Update the first bit of a CSFFW tracebufer and then reset the GPU. + * This is to make these changes visible to the MCU. + * + * Return: 0 on success, or negative error code for failure. + */ +static int tl_reader_update_enable_bit( + struct kbase_csf_tl_reader *self, + bool value) +{ + int err = 0; + + err = kbase_csf_firmware_trace_buffer_update_trace_enable_bit( + self->trace_buffer, 0, value); + + return err; +} + +void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, + struct kbase_tlstream *stream) +{ + self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT; + + kbase_timer_setup(&self->read_timer, + kbasep_csf_tl_reader_read_callback); + + self->stream = stream; + + /* This will be initialized by tl_reader_init_late() */ + self->kbdev = NULL; + self->trace_buffer = NULL; + self->tl_header.data = NULL; + self->tl_header.size = 0; + + spin_lock_init(&self->read_lock); + + tl_reader_reset(self); +} + +void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self) +{ + del_timer_sync(&self->read_timer); +} + +int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, + struct kbase_device *kbdev) +{ + int rcode; + + /* If already running, early exit. */ + if (self->is_active) + return 0; + + if (tl_reader_init_late(self, kbdev)) { + return -EINVAL; + } + + tl_reader_reset(self); + + self->is_active = true; + /* Set bytes to copy to the header size. This is to trigger copying + * of the header to the user space. + */ + self->tl_header.btc = self->tl_header.size; + + /* Enable the tracebuffer on the CSFFW side. */ + rcode = tl_reader_update_enable_bit(self, true); + if (rcode != 0) + return rcode; + + rcode = mod_timer(&self->read_timer, + jiffies + msecs_to_jiffies(self->timer_interval)); + + return 0; +} + +void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self) +{ + unsigned long flags; + + /* If is not running, early exit. */ + if (!self->is_active) + return; + + /* Disable the tracebuffer on the CSFFW side. */ + tl_reader_update_enable_bit(self, false); + + del_timer_sync(&self->read_timer); + + spin_lock_irqsave(&self->read_lock, flags); + + tl_reader_reset(self); + + spin_unlock_irqrestore(&self->read_lock, flags); +} + +void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self) +{ + u64 gpu_cycle = 0; + struct kbase_device *kbdev = self->kbdev; + + if (!kbdev) + return; + + kbase_csf_tl_reader_flush_buffer(self); + + get_cpu_gpu_time(kbdev, NULL, NULL, &gpu_cycle); + KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET(kbdev, gpu_cycle); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h new file mode 100644 index 0000000..1b0fcd7 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_tl_reader.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSFFW_TL_READER_H_ +#define _KBASE_CSFFW_TL_READER_H_ + +#include +#include +#include + +/* The number of pages used for CSFFW trace buffer. Can be tweaked. */ +#define KBASE_CSF_TL_BUFFER_NR_PAGES 128 +/* CSFFW Timeline read polling minimum period in milliseconds. */ +#define KBASE_CSF_TL_READ_INTERVAL_MIN 20 +/* CSFFW Timeline read polling default period in milliseconds. */ +#define KBASE_CSF_TL_READ_INTERVAL_DEFAULT 200 +/* CSFFW Timeline read polling maximum period in milliseconds. */ +#define KBASE_CSF_TL_READ_INTERVAL_MAX (60*1000) + +struct firmware_trace_buffer; +struct kbase_tlstream; +struct kbase_device; + +/** + * struct kbase_ts_converter - + * System timestamp to CPU timestamp converter state. + * + * @multiplier: Numerator of the converter's fraction. + * @divisor: Denominator of the converter's fraction. + * @offset: Converter's offset term. + * + * According to Generic timer spec, system timer: + * - Increments at a fixed frequency + * - Starts operating from zero + * + * Hence CPU time is a linear function of System Time. + * + * CPU_ts = alpha * SYS_ts + beta + * + * Where + * - alpha = 10^9/SYS_ts_freq + * - beta is calculated by two timer samples taken at the same time: + * beta = CPU_ts_s - SYS_ts_s * alpha + * + * Since alpha is a rational number, we minimizing possible + * rounding error by simplifying the ratio. Thus alpha is stored + * as a simple `multiplier / divisor` ratio. + * + */ +struct kbase_ts_converter { + u64 multiplier; + u64 divisor; + s64 offset; +}; + +/** + * struct kbase_csf_tl_reader - CSFFW timeline reader state. + * + * @read_timer: Timer used for periodical tracebufer reading. + * @timer_interval: Timer polling period in milliseconds. + * @stream: Timeline stream where to the tracebuffer content + * is copied. + * @kbdev: KBase device. + * @trace_buffer: CSF Firmware timeline tracebuffer. + * @tl_header: CSFFW Timeline header + * @tl_header.data: CSFFW Timeline header content. + * @tl_header.size: CSFFW Timeline header size. + * @tl_header.btc: CSFFW Timeline header remaining bytes to copy to + * the user space. + * @ts_converter: Timestamp converter state. + * @got_first_event: True, if a CSFFW timelime session has been enabled + * and the first event was received. + * @is_active: True, if a CSFFW timelime session has been enabled. + * @expected_event_id: The last 16 bit event ID received from CSFFW. It + * is only valid when got_first_event is true. + * @read_buffer: Temporary buffer used for CSFFW timeline data + * reading from the tracebufer. + * @read_lock: CSFFW timeline reader lock. + */ +struct kbase_csf_tl_reader { + struct timer_list read_timer; + u32 timer_interval; + struct kbase_tlstream *stream; + + struct kbase_device *kbdev; + struct firmware_trace_buffer *trace_buffer; + struct { + const char *data; + size_t size; + size_t btc; + } tl_header; + struct kbase_ts_converter ts_converter; + + bool got_first_event; + bool is_active; + u16 expected_event_id; + + u8 read_buffer[PAGE_SIZE * KBASE_CSF_TL_BUFFER_NR_PAGES]; + spinlock_t read_lock; +}; + +/** + * kbase_csf_tl_reader_init() - Initialize CSFFW Timelime Stream Reader. + * + * @self: CSFFW TL Reader instance. + * @stream: Destination timeline stream. + */ +void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, + struct kbase_tlstream *stream); + +/** + * kbase_csf_tl_reader_term() - Terminate CSFFW Timelime Stream Reader. + * + * @self: CSFFW TL Reader instance. + */ +void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self); + +/** + * kbase_csf_tl_reader_flush_buffer() - + * Flush trace from buffer into CSFFW timeline stream. + * + * @self: CSFFW TL Reader instance. + * + * Return: Zero on success, negative error code (EBUSY) otherwise + */ + +int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self); + +/** + * kbase_csf_tl_reader_start() - + * Start asynchronous copying of CSFFW timeline stream. + * + * @self: CSFFW TL Reader instance. + * @kbdev: Kbase device. + * + * Return: zero on success, a negative error code otherwise. + */ +int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, + struct kbase_device *kbdev); + +/** + * kbase_csf_tl_reader_stop() - + * Stop asynchronous copying of CSFFW timeline stream. + * + * @self: CSFFW TL Reader instance. + */ +void kbase_csf_tl_reader_stop(struct kbase_csf_tl_reader *self); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +/** + * kbase_csf_tl_reader_debugfs_init() - + * Initialize debugfs for CSFFW Timelime Stream Reader. + * + * @kbdev: Kbase device. + */ +void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev); +#endif + +/** + * kbase_csf_tl_reader_reset() - + * Reset CSFFW timeline reader, it should be called before reset CSFFW. + * + * @self: CSFFW TL Reader instance. + */ +void kbase_csf_tl_reader_reset(struct kbase_csf_tl_reader *self); + +#endif /* _KBASE_CSFFW_TL_READER_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c new file mode 100644 index 0000000..a6343c8 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_csf_firmware.h" +#include "mali_kbase_csf_trace_buffer.h" +#include "mali_kbase_reset_gpu.h" +#include "mali_kbase_csf_tl_reader.h" + +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) +#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) +#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE +#endif +#endif + +/** + * struct firmware_trace_buffer - Trace Buffer within the MCU firmware + * + * The firmware relays information to the host by writing on memory buffers + * which are allocated and partially configured by the host. These buffers + * are called Trace Buffers: each of them has a specific purpose and is + * identified by a name and a set of memory addresses where the host can + * set pointers to host-allocated structures. + * + * @kbdev: Pointer to the Kbase device. + * @node: List head linking all trace buffers to + * kbase_device:csf.firmware_trace_buffers + * @data_mapping: MCU shared memory mapping used for the data buffer. + * @updatable: Indicates whether config items can be updated with + * FIRMWARE_CONFIG_UPDATE + * @type: The type of the trace buffer. + * @trace_enable_entry_count: Number of Trace Enable bits. + * @gpu_va: Structure containing all the Firmware addresses + * that are accessed by the MCU. + * @gpu_va.size_address: The address where the MCU shall read the size of + * the data buffer. + * @gpu_va.insert_address: The address that shall be dereferenced by the MCU + * to write the Insert offset. + * @gpu_va.extract_address: The address that shall be dereferenced by the MCU + * to read the Extract offset. + * @gpu_va.data_address: The address that shall be dereferenced by the MCU + * to write the Trace Buffer. + * @gpu_va.trace_enable: The address where the MCU shall read the array of + * Trace Enable bits describing which trace points + * and features shall be enabled. + * @cpu_va: Structure containing CPU addresses of variables + * which are permanently mapped on the CPU address + * space. + * @cpu_va.insert_cpu_va: CPU virtual address of the Insert variable. + * @cpu_va.extract_cpu_va: CPU virtual address of the Extract variable. + * @num_pages: Size of the data buffer, in pages. + * @trace_enable_init_mask: Initial value for the trace enable bit mask. + * @name: NULL terminated string which contains the name of the trace buffer. + */ +struct firmware_trace_buffer { + struct kbase_device *kbdev; + struct list_head node; + struct kbase_csf_mapping data_mapping; + bool updatable; + u32 type; + u32 trace_enable_entry_count; + struct gpu_va { + u32 size_address; + u32 insert_address; + u32 extract_address; + u32 data_address; + u32 trace_enable; + } gpu_va; + struct cpu_va { + u32 *insert_cpu_va; + u32 *extract_cpu_va; + } cpu_va; + u32 num_pages; + u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; + char name[1]; /* this field must be last */ +}; + +/** + * struct firmware_trace_buffer_data - Configuration data for trace buffers + * + * Describe how to set up a trace buffer interface. + * Trace buffers are identified by name and they require a data buffer and + * an initial mask of values for the trace enable bits. + * + * @name: Name identifier of the trace buffer + * @trace_enable_init_mask: Initial value to assign to the trace enable bits + * @size: Size of the data buffer to allocate for the trace buffer, in pages. + * The size of a data buffer must always be a power of 2. + */ +struct firmware_trace_buffer_data { + char name[64]; + u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; + size_t size; +}; + +/* + * Table of configuration data for trace buffers. + * + * This table contains the configuration data for the trace buffers that are + * expected to be parsed from the firmware. + */ +static const struct firmware_trace_buffer_data +trace_buffer_data[] = { +#ifndef MALI_KBASE_BUILD + { "fwutf", {0}, 1 }, +#endif + { FW_TRACE_BUF_NAME, {0}, 4 }, + { "benchmark", {0}, 2 }, + { "timeline", {0}, KBASE_CSF_TL_BUFFER_NR_PAGES }, +}; + +int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) +{ + struct firmware_trace_buffer *trace_buffer; + int ret = 0; + u32 mcu_rw_offset = 0, mcu_write_offset = 0; + const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); + + if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) { + dev_dbg(kbdev->dev, "No trace buffers to initialise\n"); + return 0; + } + + /* GPU-readable,writable memory used for Extract variables */ + ret = kbase_csf_firmware_mcu_shared_mapping_init( + kbdev, 1, PROT_WRITE, + KBASE_REG_GPU_RD | KBASE_REG_GPU_WR, + &kbdev->csf.firmware_trace_buffers.mcu_rw); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to map GPU-rw MCU shared memory\n"); + goto out; + } + + /* GPU-writable memory used for Insert variables */ + ret = kbase_csf_firmware_mcu_shared_mapping_init( + kbdev, 1, PROT_READ, KBASE_REG_GPU_WR, + &kbdev->csf.firmware_trace_buffers.mcu_write); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory\n"); + goto out; + } + + list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { + u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, + trace_enable_size_dwords; + u32 *extract_cpu_va, *insert_cpu_va; + unsigned int i; + + /* GPU-writable data buffer for the individual trace buffer */ + ret = kbase_csf_firmware_mcu_shared_mapping_init( + kbdev, trace_buffer->num_pages, PROT_READ, KBASE_REG_GPU_WR, + &trace_buffer->data_mapping); + if (ret) { + dev_err(kbdev->dev, "Failed to map GPU-writable MCU shared memory for a trace buffer\n"); + goto out; + } + + extract_gpu_va = + (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + + mcu_rw_offset; + extract_cpu_va = (u32*)( + kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + + mcu_rw_offset); + insert_gpu_va = + (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + + mcu_write_offset; + insert_cpu_va = (u32*)( + kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + + mcu_write_offset); + data_buffer_gpu_va = + (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); + + /* Initialize the Extract variable */ + *extract_cpu_va = 0; + + /* Each FW address shall be mapped and set individually, as we can't + * assume anything about their location in the memory address space. + */ + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.size_address, + trace_buffer->num_pages << PAGE_SHIFT); + + trace_enable_size_dwords = + (trace_buffer->trace_enable_entry_count + 31) >> 5; + + for (i = 0; i < trace_enable_size_dwords; i++) { + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.trace_enable + i*4, + trace_buffer->trace_enable_init_mask[i]); + } + + /* Store CPU virtual addresses for permanently mapped variables */ + trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; + trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; + + /* Update offsets */ + mcu_write_offset += cache_line_alignment; + mcu_rw_offset += cache_line_alignment; + } + +out: + return ret; +} + +void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev) +{ + if (list_empty(&kbdev->csf.firmware_trace_buffers.list)) + return; + + while (!list_empty(&kbdev->csf.firmware_trace_buffers.list)) { + struct firmware_trace_buffer *trace_buffer; + + trace_buffer = list_first_entry(&kbdev->csf.firmware_trace_buffers.list, + struct firmware_trace_buffer, node); + kbase_csf_firmware_mcu_shared_mapping_term(kbdev, &trace_buffer->data_mapping); + list_del(&trace_buffer->node); + + kfree(trace_buffer); + } + + kbase_csf_firmware_mcu_shared_mapping_term( + kbdev, &kbdev->csf.firmware_trace_buffers.mcu_rw); + kbase_csf_firmware_mcu_shared_mapping_term( + kbdev, &kbdev->csf.firmware_trace_buffers.mcu_write); +} + +int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, + const u32 *entry, + unsigned int size, + bool updatable) +{ + const char *name = (char *)&entry[7]; + const unsigned int name_len = size - TRACE_BUFFER_ENTRY_NAME_OFFSET; + struct firmware_trace_buffer *trace_buffer; + unsigned int i; + + /* Allocate enough space for struct firmware_trace_buffer and the + * trace buffer name (with NULL termination). + */ + trace_buffer = + kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL); + + if (!trace_buffer) + return -ENOMEM; + + memcpy(&trace_buffer->name, name, name_len); + trace_buffer->name[name_len] = '\0'; + + for (i = 0; i < ARRAY_SIZE(trace_buffer_data); i++) { + if (!strcmp(trace_buffer_data[i].name, trace_buffer->name)) { + unsigned int j; + + trace_buffer->kbdev = kbdev; + trace_buffer->updatable = updatable; + trace_buffer->type = entry[0]; + trace_buffer->gpu_va.size_address = entry[1]; + trace_buffer->gpu_va.insert_address = entry[2]; + trace_buffer->gpu_va.extract_address = entry[3]; + trace_buffer->gpu_va.data_address = entry[4]; + trace_buffer->gpu_va.trace_enable = entry[5]; + trace_buffer->trace_enable_entry_count = entry[6]; + trace_buffer->num_pages = trace_buffer_data[i].size; + + for (j = 0; j < CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX; j++) { + trace_buffer->trace_enable_init_mask[j] = + trace_buffer_data[i].trace_enable_init_mask[j]; + } + break; + } + } + + if (i < ARRAY_SIZE(trace_buffer_data)) { + list_add(&trace_buffer->node, &kbdev->csf.firmware_trace_buffers.list); + dev_dbg(kbdev->dev, "Trace buffer '%s'", trace_buffer->name); + } else { + dev_dbg(kbdev->dev, "Unknown trace buffer '%s'", trace_buffer->name); + kfree(trace_buffer); + } + + return 0; +} + +void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev) +{ + struct firmware_trace_buffer *trace_buffer; + u32 mcu_rw_offset = 0, mcu_write_offset = 0; + const u32 cache_line_alignment = kbase_get_cache_line_alignment(kbdev); + + list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { + u32 extract_gpu_va, insert_gpu_va, data_buffer_gpu_va, + trace_enable_size_dwords; + u32 *extract_cpu_va, *insert_cpu_va; + unsigned int i; + + /* Rely on the fact that all required mappings already exist */ + extract_gpu_va = + (kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) + + mcu_rw_offset; + extract_cpu_va = (u32*)( + kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr + + mcu_rw_offset); + insert_gpu_va = + (kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) + + mcu_write_offset; + insert_cpu_va = (u32*)( + kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr + + mcu_write_offset); + data_buffer_gpu_va = + (trace_buffer->data_mapping.va_reg->start_pfn << PAGE_SHIFT); + + /* Notice that the function only re-updates firmware memory locations + * with information that allows access to the trace buffers without + * really resetting their state. For instance, the Insert offset will + * not change and, as a consequence, the Extract offset is not going + * to be reset to keep consistency. + */ + + /* Each FW address shall be mapped and set individually, as we can't + * assume anything about their location in the memory address space. + */ + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.data_address, data_buffer_gpu_va); + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.insert_address, insert_gpu_va); + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.extract_address, extract_gpu_va); + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.size_address, + trace_buffer->num_pages << PAGE_SHIFT); + + trace_enable_size_dwords = + (trace_buffer->trace_enable_entry_count + 31) >> 5; + + for (i = 0; i < trace_enable_size_dwords; i++) { + kbase_csf_update_firmware_memory( + kbdev, trace_buffer->gpu_va.trace_enable + i*4, + trace_buffer->trace_enable_init_mask[i]); + } + + /* Store CPU virtual addresses for permanently mapped variables, + * as they might have slightly changed. + */ + trace_buffer->cpu_va.insert_cpu_va = insert_cpu_va; + trace_buffer->cpu_va.extract_cpu_va = extract_cpu_va; + + /* Update offsets */ + mcu_write_offset += cache_line_alignment; + mcu_rw_offset += cache_line_alignment; + } +} + +struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( + struct kbase_device *kbdev, const char *name) +{ + struct firmware_trace_buffer *trace_buffer; + + list_for_each_entry(trace_buffer, &kbdev->csf.firmware_trace_buffers.list, node) { + if (!strcmp(trace_buffer->name, name)) + return trace_buffer; + } + + return NULL; +} +EXPORT_SYMBOL(kbase_csf_firmware_get_trace_buffer); + +unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( + const struct firmware_trace_buffer *trace_buffer) +{ + return trace_buffer->trace_enable_entry_count; +} +EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count); + +static void kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( + struct firmware_trace_buffer *tb, unsigned int bit, bool value) +{ + struct kbase_device *kbdev = tb->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (bit < tb->trace_enable_entry_count) { + unsigned int trace_enable_reg_offset = bit >> 5; + u32 trace_enable_bit_mask = 1u << (bit & 0x1F); + + if (value) { + tb->trace_enable_init_mask[trace_enable_reg_offset] |= + trace_enable_bit_mask; + } else { + tb->trace_enable_init_mask[trace_enable_reg_offset] &= + ~trace_enable_bit_mask; + } + + /* This is not strictly needed as the caller is supposed to + * reload the firmware image (through GPU reset) after updating + * the bitmask. Otherwise there is no guarantee that firmware + * will take into account the updated bitmask for all types of + * trace buffers, since firmware could continue to use the + * value of bitmask it cached after the boot. + */ + kbase_csf_update_firmware_memory( + kbdev, + tb->gpu_va.trace_enable + trace_enable_reg_offset * 4, + tb->trace_enable_init_mask[trace_enable_reg_offset]); + } +} + +int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( + struct firmware_trace_buffer *tb, unsigned int bit, bool value) +{ + struct kbase_device *kbdev = tb->kbdev; + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* If trace buffer update cannot be performed with + * FIRMWARE_CONFIG_UPDATE then we need to do a + * silent reset before we update the memory. + */ + if (!tb->updatable) { + /* If there is already a GPU reset pending then inform + * the User to retry the update. + */ + if (kbase_reset_gpu_silent(kbdev)) { + dev_warn( + kbdev->dev, + "GPU reset already in progress when enabling firmware timeline."); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return -EAGAIN; + } + } + + kbasep_csf_firmware_trace_buffer_update_trace_enable_bit(tb, bit, + value); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (tb->updatable) + err = kbase_csf_trigger_firmware_config_update(kbdev); + + return err; +} +EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_update_trace_enable_bit); + +bool kbase_csf_firmware_trace_buffer_is_empty( + const struct firmware_trace_buffer *trace_buffer) +{ + return *(trace_buffer->cpu_va.insert_cpu_va) == + *(trace_buffer->cpu_va.extract_cpu_va); +} +EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_is_empty); + +unsigned int kbase_csf_firmware_trace_buffer_read_data( + struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes) +{ + unsigned int bytes_copied; + u8 *data_cpu_va = trace_buffer->data_mapping.cpu_addr; + u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va); + u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va); + u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; + + if (insert_offset >= extract_offset) { + bytes_copied = min_t(unsigned int, num_bytes, + (insert_offset - extract_offset)); + memcpy(data, &data_cpu_va[extract_offset], bytes_copied); + extract_offset += bytes_copied; + } else { + unsigned int bytes_copied_head, bytes_copied_tail; + + bytes_copied_tail = min_t(unsigned int, num_bytes, + (buffer_size - extract_offset)); + memcpy(data, &data_cpu_va[extract_offset], bytes_copied_tail); + + bytes_copied_head = min_t(unsigned int, + (num_bytes - bytes_copied_tail), insert_offset); + memcpy(&data[bytes_copied_tail], data_cpu_va, bytes_copied_head); + + bytes_copied = bytes_copied_head + bytes_copied_tail; + extract_offset += bytes_copied; + if (extract_offset >= buffer_size) + extract_offset = bytes_copied_head; + } + + *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; + + return bytes_copied; +} +EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +#define U32_BITS 32 +static u64 get_trace_buffer_active_mask64(struct firmware_trace_buffer *tb) +{ + u64 active_mask = tb->trace_enable_init_mask[0]; + + if (tb->trace_enable_entry_count > U32_BITS) + active_mask |= (u64)tb->trace_enable_init_mask[1] << U32_BITS; + + return active_mask; +} + +static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, + u64 mask) +{ + unsigned int i; + + for (i = 0; i < tb->trace_enable_entry_count; i++) + kbasep_csf_firmware_trace_buffer_update_trace_enable_bit( + tb, i, (mask >> i) & 1); +} + +static int set_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, + u64 mask) +{ + struct kbase_device *kbdev = tb->kbdev; + unsigned long flags; + int err = 0; + + if (!tb->updatable) { + /* If there is already a GPU reset pending, need a retry */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + if (kbase_reset_gpu_silent(kbdev)) + err = -EAGAIN; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + if (!err) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + update_trace_buffer_active_mask64(tb, mask); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* if we can update the config we need to just trigger + * FIRMWARE_CONFIG_UPDATE. + */ + if (tb->updatable) + err = kbase_csf_trigger_firmware_config_update(kbdev); + } + + return err; +} + +static int kbase_csf_firmware_trace_enable_mask_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + /* The enabled traces limited to u64 here, regarded practical */ + *val = get_trace_buffer_active_mask64(tb); + return 0; +} + +static int kbase_csf_firmware_trace_enable_mask_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); + u64 new_mask; + unsigned int enable_bits_count; + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + + /* Ignore unsupported types */ + enable_bits_count = + kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count(tb); + if (enable_bits_count > 64) { + dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", + enable_bits_count); + enable_bits_count = 64; + } + new_mask = val & ((1 << enable_bits_count) - 1); + + if (new_mask != get_trace_buffer_active_mask64(tb)) + return set_trace_buffer_active_mask64(tb, new_mask); + else + return 0; +} + +static int kbasep_csf_firmware_trace_debugfs_open(struct inode *in, + struct file *file) +{ + struct kbase_device *kbdev = in->i_private; + + file->private_data = kbdev; + dev_dbg(kbdev->dev, "Opened firmware trace buffer dump debugfs file"); + + return 0; +} + +static ssize_t kbasep_csf_firmware_trace_debugfs_read(struct file *file, + char __user *buf, size_t size, loff_t *ppos) +{ + struct kbase_device *kbdev = file->private_data; + u8 *pbyte; + unsigned int n_read; + unsigned long not_copied; + /* Limit the kernel buffer to no more than two pages */ + size_t mem = MIN(size, 2 * PAGE_SIZE); + unsigned long flags; + + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME); + + if (tb == NULL) { + dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); + return -EIO; + } + + pbyte = kmalloc(mem, GFP_KERNEL); + if (pbyte == NULL) { + dev_err(kbdev->dev, "Couldn't allocate memory for trace buffer dump"); + return -ENOMEM; + } + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + n_read = kbase_csf_firmware_trace_buffer_read_data(tb, pbyte, mem); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* Do the copy, if we have obtained some trace data */ + not_copied = (n_read) ? copy_to_user(buf, pbyte, n_read) : 0; + kfree(pbyte); + + if (!not_copied) { + *ppos += n_read; + return n_read; + } + + dev_err(kbdev->dev, "Couldn't copy trace buffer data to user space buffer"); + return -EFAULT; +} + + +DEFINE_SIMPLE_ATTRIBUTE(kbase_csf_firmware_trace_enable_mask_fops, + kbase_csf_firmware_trace_enable_mask_read, + kbase_csf_firmware_trace_enable_mask_write, "%llx\n"); + +static const struct file_operations kbasep_csf_firmware_trace_debugfs_fops = { + .owner = THIS_MODULE, + .open = kbasep_csf_firmware_trace_debugfs_open, + .read = kbasep_csf_firmware_trace_debugfs_read, + .llseek = no_llseek, +}; + +void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("fw_trace_enable_mask", 0644, + kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_trace_enable_mask_fops); + + debugfs_create_file("fw_traces", 0444, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_firmware_trace_debugfs_fops); +} +#endif /* CONFIG_DEBUG_FS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h new file mode 100644 index 0000000..b9f481d --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf_trace_buffer.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CSF_TRACE_BUFFER_H_ +#define _KBASE_CSF_TRACE_BUFFER_H_ + +#include + +#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) +#define FW_TRACE_BUF_NAME "fwlog" + +/* Forward declarations */ +struct firmware_trace_buffer; +struct kbase_device; + +/** + * kbase_csf_firmware_trace_buffers_init - Initialize trace buffers + * + * Allocate resources for trace buffers. In particular: + * - One memory page of GPU-readable, CPU-writable memory is used for + * the Extract variables of all trace buffers. + * - One memory page of GPU-writable, CPU-readable memory is used for + * the Insert variables of all trace buffers. + * - A data buffer of GPU-writable, CPU-readable memory is allocated + * for each trace buffer. + * + * After that, firmware addresses are written with pointers to the + * insert, extract and data buffer variables. The size and the trace + * enable bits are not dereferenced by the GPU and shall be written + * in the firmware addresses directly. + * + * This function relies on the assumption that the list of + * firmware_trace_buffer elements in the device has already been + * populated with data from the firmware image parsing. + * + * Return: 0 if success, or an error code on failure. + * + * @kbdev: Device pointer + */ +int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_trace_buffer_term - Terminate trace buffers + * + * @kbdev: Device pointer + */ +void kbase_csf_firmware_trace_buffers_term(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_parse_trace_buffer_entry - Process a "trace buffer" section + * + * Read a "trace buffer" section adding metadata for the related trace buffer + * to the kbase_device:csf.firmware_trace_buffers list. + * + * Unexpected trace buffers will not be parsed and, as a consequence, + * will not be initialized. + * + * Return: 0 if successful, negative error code on failure. + * + * @kbdev: Kbase device structure + * @entry: Pointer to the section + * @size: Size (in bytes) of the section + * @updatable: Indicates whether config items can be updated with FIRMWARE_CONFIG_UPDATE + */ +int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, + const u32 *entry, + unsigned int size, + bool updatable); + +/** + * kbase_csf_firmware_reload_trace_buffers_data - + * Reload trace buffers data for firmware reboot + * + * Helper function used when rebooting the firmware to reload the initial setup + * for all the trace buffers which have been previously parsed and initialized. + * + * Almost all of the operations done in the initialization process are + * replicated, with the difference that they might be done in a different order + * and that the variables of a given trace buffer may be mapped to different + * offsets within the same existing mappings. + * + * In other words, the re-initialization done by this function will be + * equivalent but not necessarily identical to the original initialization. + * + * @kbdev: Device pointer + */ +void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_get_trace_buffer - Get a trace buffer + * + * Return: handle to a trace buffer, given the name, or NULL if a trace buffer + * with that name couldn't be found. + * + * @kbdev: Device pointer + * @name: Name of the trace buffer to find + */ +struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( + struct kbase_device *kbdev, const char *name); + +/** + * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - + * Get number of trace enable bits for a trace buffer + * + * Return: Number of trace enable bits in a trace buffer. + * + * @trace_buffer: Trace buffer handle + */ +unsigned int kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count( + const struct firmware_trace_buffer *trace_buffer); + +/** + * kbase_csf_firmware_trace_buffer_update_trace_enable_bit - + * Update a trace enable bit + * + * Update the value of a given trace enable bit. + * + * @trace_buffer: Trace buffer handle + * @bit: Bit to update + * @value: New value for the given bit + * + * Return: 0 if successful, negative error code on failure. + */ +int kbase_csf_firmware_trace_buffer_update_trace_enable_bit( + struct firmware_trace_buffer *trace_buffer, unsigned int bit, + bool value); + +/** + * kbase_csf_firmware_trace_buffer_is_empty - Empty trace buffer predicate + * + * Return: True if the trace buffer is empty, or false otherwise. + * + * @trace_buffer: Trace buffer handle + */ +bool kbase_csf_firmware_trace_buffer_is_empty( + const struct firmware_trace_buffer *trace_buffer); + +/** + * kbase_csf_firmware_trace_buffer_read_data - Read data from a trace buffer + * + * Read available data from a trace buffer. The client provides a data buffer + * of a given size and the maximum number of bytes to read. + * + * Return: Number of bytes read from the trace buffer. + * + * @trace_buffer: Trace buffer handle + * @data: Pointer to a client-allocated where data shall be written. + * @num_bytes: Maximum number of bytes to read from the trace buffer. + */ +unsigned int kbase_csf_firmware_trace_buffer_read_data( + struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +/** + * kbase_csf_fw_trace_buffer_debugfs_init() - Add debugfs entries for setting + * enable mask and dumping the binary + * firmware trace buffer + * + * @kbdev: Pointer to the device + */ +void kbase_csf_firmware_trace_buffer_debugfs_init(struct kbase_device *kbdev); +#endif /* CONFIG_DEBUG_FS */ + +#endif /* _KBASE_CSF_TRACE_BUFFER_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild new file mode 100644 index 0000000..1682c0f --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/Kbuild @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += debug/mali_kbase_debug_ktrace.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_csf.o +else + mali_kbase-y += debug/backend/mali_kbase_debug_ktrace_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h new file mode 100644 index 0000000..d05f802 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_csf.h @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** + */ + +/* + * The purpose of this header file is just to contain a list of trace code + * identifiers + * + * When updating this file, also remember to update + * mali_kbase_debug_linux_ktrace_csf.h + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT + * DESCRIBED IN mali_kbase_debug_ktrace_codes.h + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + /* + * Generic CSF events + */ + KBASE_KTRACE_CODE_MAKE_CODE(EVICT_CTX_SLOTS), + /* info_val[0:7] == fw version_minor + * info_val[15:8] == fw version_major + * info_val[63:32] == fw version_hash + */ + KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_BOOT), + KBASE_KTRACE_CODE_MAKE_CODE(FIRMWARE_REBOOT), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOCK_END), + /* info_val == total number of runnable groups across all kctxs */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TICK_END), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_RESET), + /* info_val = timeout in ms */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT), + /* info_val = remaining ms timeout, or 0 if timedout */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_WAIT_PROTM_QUIT_DONE), + KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT), + KBASE_KTRACE_CODE_MAKE_CODE(SYNC_UPDATE_EVENT_NOTIFY_GPU), + + /* info_val = JOB_IRQ_STATUS */ + KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT), + /* info_val = JOB_IRQ_STATUS */ + KBASE_KTRACE_CODE_MAKE_CODE(CSF_INTERRUPT_END), + /* info_val = JOB_IRQ_STATUS */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS), + /* info_val = GLB_REQ ^ GLB_ACQ */ + KBASE_KTRACE_CODE_MAKE_CODE(GLB_REQ_ACQ), + /* info_val[31:0] = num non idle offslot groups + * info_val[32] = scheduler can suspend on idle + */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CAN_IDLE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ADVANCE_TICK), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NOADVANCE_TICK), + /* kctx is added to the back of the list */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_INSERT_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_REMOVE_RUNNABLE), + /* kctx is moved to the back of the list */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ROTATE_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_HEAD_RUNNABLE), + + KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_BEGIN), + /* 4-bit encoding of boolean values (ease of reading as hex values) + * + * info_val[3:0] = was reset active/failed to be prevented + * info_val[7:4] = whether scheduler was both idle and suspendable + * info_val[11:8] = whether all groups were suspended + */ + KBASE_KTRACE_CODE_MAKE_CODE(IDLE_WORKER_END), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_BEGIN), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_WORKER_END), + + /* info_val = bitmask of slots that gave an ACK for STATUS_UPDATE */ + KBASE_KTRACE_CODE_MAKE_CODE(SLOTS_STATUS_UPDATE_ACK), + + /* + * Group events + */ + /* info_val[2:0] == CSG_REQ state issued + * info_val[19:16] == as_nr + * info_val[63:32] == endpoint config (max number of endpoints allowed) + */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_START), + /* info_val == CSG_REQ state issued */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOP), + /* info_val == CSG_ACK state */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STARTED), + /* info_val == CSG_ACK state */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STOPPED), + /* info_val == slot cleaned */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_CLEANED), + /* info_val = slot requesting STATUS_UPDATE */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_STATUS_UPDATE), + /* info_val = scheduler's new csg_slots_idle_mask[0] + * group->csg_nr indicates which bit was set + */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_SET), + /* info_val = scheduler's new csg_slots_idle_mask[0] + * group->csg_nr indicates which bit was cleared + * + * in case of no group, multiple bits may have been updated + */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SLOT_IDLE_CLEAR), + /* info_val == previous priority */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_PRIO_UPDATE), + /* info_val == CSG_REQ ^ CSG_ACK */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_SYNC_UPDATE_INTERRUPT), + /* info_val == CSG_REQ ^ CSG_ACK */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_IDLE_INTERRUPT), + /* info_val == CSG_REQ ^ CSG_ACK */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_PROGRESS_TIMER_INTERRUPT), + /* info_val[31:0] == CSG_REQ ^ CSG_ACQ + * info_val[63:32] == CSG_IRQ_REQ ^ CSG_IRQ_ACK + */ + KBASE_KTRACE_CODE_MAKE_CODE(CSG_INTERRUPT_PROCESS_END), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SYNC_UPDATE_DONE), + /* info_val == run state of the group */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_DESCHEDULE), + /* info_val == run state of the group */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_SCHEDULE), + /* info_val[31:0] == new run state of the evicted group + * info_val[63:32] == number of runnable groups + */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_EVICT_SCHED), + + /* info_val == new num_runnable_grps + * group is added to the back of the list for its priority level + */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_RUNNABLE), + /* info_val == new num_runnable_grps + */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_RUNNABLE), + /* info_val == num_runnable_grps + * group is moved to the back of the list for its priority level + */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_ROTATE_RUNNABLE), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_RUNNABLE), + /* info_val == new num_idle_wait_grps + * group is added to the back of the list + */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_INSERT_IDLE_WAIT), + /* info_val == new num_idle_wait_grps + * group is added to the back of the list + */ + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_REMOVE_IDLE_WAIT), + KBASE_KTRACE_CODE_MAKE_CODE(GROUP_HEAD_IDLE_WAIT), + + /* info_val == is scheduler running with protected mode tasks */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_CHECK_PROTM_ENTER), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_ENTER_PROTM), + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_EXIT_PROTM), + /* info_val[31:0] == number of GPU address space slots in use + * info_val[63:32] == number of runnable groups + */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_TOP_GRP), + /* info_val == new count of off-slot non-idle groups + * no group indicates it was set rather than incremented + */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_INC), + /* info_val == new count of off-slot non-idle groups */ + KBASE_KTRACE_CODE_MAKE_CODE(SCHEDULER_NONIDLE_OFFSLOT_DEC), + + KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_BEGIN), + KBASE_KTRACE_CODE_MAKE_CODE(PROTM_EVENT_WORKER_END), + + /* + * Group + Queue events + */ + /* info_val == queue->enabled */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_START), + /* info_val == queue->enabled before stop */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP), + KBASE_KTRACE_CODE_MAKE_CODE(CSI_STOP_REQUESTED), + /* info_val == CS_REQ ^ CS_ACK that were not processed due to the group + * being suspended + */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND), + /* info_val == CS_REQ ^ CS_ACK */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_FAULT_INTERRUPT), + /* info_val == CS_REQ ^ CS_ACK */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_TILER_OOM_INTERRUPT), + /* info_val == CS_REQ ^ CS_ACK */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_PEND_INTERRUPT), + /* info_val == CS_ACK_PROTM_PEND ^ CS_REQ_PROTM_PEND */ + KBASE_KTRACE_CODE_MAKE_CODE(CSI_PROTM_ACK), + /* info_val == group->run_State (for group the queue is bound to) */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_START), + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_STOP), + /* info_val == contents of CS_STATUS_WAIT_SYNC_POINTER */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE), + /* info_val == bool for result of the evaluation */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_UPDATE_EVALUATED), + /* info_val == contents of CS_STATUS_WAIT */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_STATUS_WAIT), + /* info_val == current sync value pointed to by queue->sync_ptr */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_CURRENT_VAL), + /* info_val == current value of CS_STATUS_WAIT_SYNC_VALUE */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_TEST_VAL), + /* info_val == current value of CS_STATUS_BLOCKED_REASON */ + KBASE_KTRACE_CODE_MAKE_CODE(QUEUE_SYNC_BLOCKED_REASON), + /* info_val = group's new protm_pending_bitmap[0] + * queue->csi_index indicates which bit was set + */ + KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_SET), + /* info_val = group's new protm_pending_bitmap[0] + * queue->csi_index indicates which bit was cleared + */ + KBASE_KTRACE_CODE_MAKE_CODE(PROTM_PENDING_CLEAR), + + /* + * KCPU queue events + */ + /* KTrace info_val == KCPU queue fence context + * KCPU extra_info_val == N/A. + */ + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_NEW), + /* KTrace info_val == Number of pending commands in KCPU queue when + * it is destroyed. + * KCPU extra_info_val == Number of CQS wait operations present in + * the KCPU queue when it is destroyed. + */ + KBASE_KTRACE_CODE_MAKE_CODE(KCPU_QUEUE_DESTROY), + /* KTrace info_val == CQS event memory address + * KCPU extra_info_val == Upper 32 bits of event memory, i.e. contents + * of error field. + */ + KBASE_KTRACE_CODE_MAKE_CODE(CQS_SET), + /* KTrace info_val == Number of CQS objects to be waited upon + * KCPU extra_info_val == N/A. + */ + KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_START), + /* KTrace info_val == CQS event memory address + * KCPU extra_info_val == 1 if CQS was signaled with an error and queue + * inherited the error, otherwise 0. + */ + KBASE_KTRACE_CODE_MAKE_CODE(CQS_WAIT_END), + /* KTrace info_val == Fence context + * KCPU extra_info_val == Fence seqno. + */ + KBASE_KTRACE_CODE_MAKE_CODE(FENCE_SIGNAL), + /* KTrace info_val == Fence context + * KCPU extra_info_val == Fence seqno. + */ + KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_START), + /* KTrace info_val == Fence context + * KCPU extra_info_val == Fence seqno. + */ + KBASE_KTRACE_CODE_MAKE_CODE(FENCE_WAIT_END), + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h index d534f30..f419f70 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_codes_jm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -30,6 +29,9 @@ * The purpose of this header file is just to contain a list of trace code * identifiers * + * When updating this file, also remember to update + * mali_kbase_debug_linux_ktrace_jm.h + * * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THAT * DESCRIBED IN mali_kbase_debug_ktrace_codes.h */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c new file mode 100644 index 0000000..824ca4b --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include "debug/mali_kbase_debug_ktrace_internal.h" +#include "debug/backend/mali_kbase_debug_ktrace_csf.h" + +#if KBASE_KTRACE_TARGET_RBUF + +void kbasep_ktrace_backend_format_header(char *buffer, int sz, s32 *written) +{ + *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), + "group,slot,prio,csi,kcpu"), 0); +} + +void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, + char *buffer, int sz, s32 *written) +{ + const union kbase_ktrace_backend * const be_msg = &trace_msg->backend; + /* At present, no need to check for KBASE_KTRACE_FLAG_BACKEND, as the + * other backend-specific flags currently imply this anyway + */ + + /* group parts */ + if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_GROUP) { + const s8 slot = be_msg->gpu.csg_nr; + /* group,slot, */ + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%u,%d,", be_msg->gpu.group_handle, slot), 0); + + /* prio */ + if (slot >= 0) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%u", be_msg->gpu.slot_prio), 0); + + /* , */ + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ","), 0); + } else { + /* No group,slot,prio fields, but ensure ending with "," */ + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ",,,"), 0); + } + + /* queue parts: csi */ + if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_CSF_QUEUE) + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "%d", be_msg->gpu.csi_index), 0); + + /* , */ + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + ","), 0); + + if (be_msg->gpu.flags & KBASE_KTRACE_FLAG_CSF_KCPU) { + /* kcpu data */ + *written += MAX(snprintf(buffer + *written, + MAX(sz - *written, 0), + "kcpu %d (0x%llx)", + be_msg->kcpu.id, + be_msg->kcpu.extra_info_val), 0); + } + + /* Don't end with a trailing "," - this is a 'standalone' formatted + * msg, caller will handle the delimiters + */ +} + +void kbasep_ktrace_add_csf(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_queue_group *group, + struct kbase_queue *queue, kbase_ktrace_flag_t flags, + u64 info_val) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + struct kbase_context *kctx = NULL; + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Determine the kctx */ + if (group) + kctx = group->kctx; + else if (queue) + kctx = queue->kctx; + + /* Fill the common part of the message (including backend.gpu.flags) */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, + info_val); + + /* Indicate to the common code that backend-specific parts will be + * valid + */ + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; + + /* Fill the CSF-specific parts of the message + * + * Generally, no need to use default initializers when queue/group not + * present - can usually check the flags instead. + */ + + if (queue) { + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_QUEUE; + trace_msg->backend.gpu.csi_index = queue->csi_index; + } + + if (group) { + const s8 slot = group->csg_nr; + + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_GROUP; + + trace_msg->backend.gpu.csg_nr = slot; + + if (slot >= 0) { + struct kbase_csf_csg_slot *csg_slot = + &kbdev->csf.scheduler.csg_slots[slot]; + + trace_msg->backend.gpu.slot_prio = + csg_slot->priority; + } + /* slot >=0 indicates whether slot_prio valid, so no need to + * initialize in the case where it's invalid + */ + + trace_msg->backend.gpu.group_handle = group->handle; + } + + WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, + enum kbase_ktrace_code code, + struct kbase_kcpu_command_queue *queue, + u64 info_val1, u64 info_val2) +{ + unsigned long irqflags; + struct kbase_ktrace_msg *trace_msg; + struct kbase_context *kctx = queue->kctx; + + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); + + /* Reserve and update indices */ + trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); + + /* Fill the common part of the message */ + kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, 0, + info_val1); + + /* Indicate to the common code that backend-specific parts will be + * valid + */ + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; + + /* Fill the KCPU-specific parts of the message */ + trace_msg->backend.kcpu.id = queue->id; + trace_msg->backend.kcpu.extra_info_val = info_val2; + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_CSF_KCPU; + + WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); + + /* Done */ + spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); +} + +#endif /* KBASE_KTRACE_TARGET_RBUF */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h new file mode 100644 index 0000000..0593c30 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_csf.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_CSF_H_ +#define _KBASE_DEBUG_KTRACE_CSF_H_ + +/* + * KTrace target for internal ringbuffer + */ +#if KBASE_KTRACE_TARGET_RBUF +/** + * kbasep_ktrace_add_csf - internal function to add trace about CSF + * @kbdev: kbase device + * @code: trace code + * @group: queue group, or NULL if no queue group + * @queue: queue, or NULL if no queue + * @flags: flags about the message + * @info_val: generic information about @code to add to the trace + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF() instead. + */ + +void kbasep_ktrace_add_csf(struct kbase_device *kbdev, + enum kbase_ktrace_code code, struct kbase_queue_group *group, + struct kbase_queue *queue, kbase_ktrace_flag_t flags, + u64 info_val); + +/** + * kbasep_ktrace_add_csf_kcpu - internal function to add trace about the CSF + * KCPU queues. + * @kbdev: kbase device + * @code: trace code + * @queue: queue, or NULL if no queue + * @info_val1: Main infoval variable with information based on the KCPU + * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h + * for information on the infoval values. + * @info_val2: Extra infoval variable with information based on the KCPU + * ktrace call. Refer to mali_kbase_debug_ktrace_codes_csf.h + * for information on the infoval values. + * + * PRIVATE: do not use directly. Use KBASE_KTRACE_ADD_CSF_KCPU() instead. + */ +void kbasep_ktrace_add_csf_kcpu(struct kbase_device *kbdev, + enum kbase_ktrace_code code, + struct kbase_kcpu_command_queue *queue, + u64 info_val1, u64 info_val2); + +#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ + kbasep_ktrace_add_csf(kbdev, KBASE_KTRACE_CODE(code), group, queue, \ + flags, info_val) + +#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, \ + info_val2) kbasep_ktrace_add_csf_kcpu(kbdev, KBASE_KTRACE_CODE(code), \ + queue, info_val1, info_val2) + +#else /* KBASE_KTRACE_TARGET_RBUF */ + +#define KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, group, queue, flags, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(group);\ + CSTD_UNUSED(queue);\ + CSTD_UNUSED(flags);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(queue);\ + CSTD_UNUSED(info_val1);\ + CSTD_UNUSED(info_val2);\ + } while (0) + +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * KTrace target for Linux's ftrace + * + * Note: the header file(s) that define the trace_mali_<...> tracepoints are + * included by the parent header file + */ +#if KBASE_KTRACE_TARGET_FTRACE + +#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ + trace_mali_##code(kbdev, group, queue, info_val) + +#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ + trace_mali_##code(queue, info_val1, info_val2) + +#else /* KBASE_KTRACE_TARGET_FTRACE */ + +#define KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, group, queue, info_val) \ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(group);\ + CSTD_UNUSED(queue);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_KTRACE_FTRACE_ADD_KCPU(code, queue, info_val1, info_val2) \ + do {\ + CSTD_NOP(code);\ + CSTD_UNUSED(queue);\ + CSTD_UNUSED(info_val1);\ + CSTD_UNUSED(info_val2);\ + } while (0) + +#endif /* KBASE_KTRACE_TARGET_FTRACE */ + +/* + * Master set of macros to route KTrace to any of the targets + */ + +/** + * KBASE_KTRACE_ADD_CSF_GRP - Add trace values about a group, with info + * @kbdev: kbase device + * @code: trace code + * @group: queue group, or NULL if no queue group + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_CSF_GRP(kbdev, code, group, info_val) \ + do { \ + /* capture values that could come from non-pure fn calls */ \ + struct kbase_queue_group *__group = group; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, NULL, 0u, \ + __info_val); \ + KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, NULL, \ + __info_val); \ + } while (0) + +/** + * KBASE_KTRACE_ADD_CSF_GRP_Q - Add trace values about a group, queue, with info + * @kbdev: kbase device + * @code: trace code + * @group: queue group, or NULL if no queue group + * @queue: queue, or NULL if no queue + * @info_val: generic information about @code to add to the trace + * + * Note: Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when + * KBASE_KTRACE_ENABLE == 0 any functions called to get the parameters supplied + * to this macro must: + * a) be static or static inline, and + * b) just return 0 and have no other statements present in the body. + */ +#define KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, code, group, queue, info_val) \ + do { \ + /* capture values that could come from non-pure fn calls */ \ + struct kbase_queue_group *__group = group; \ + struct kbase_queue *__queue = queue; \ + u64 __info_val = info_val; \ + KBASE_KTRACE_RBUF_ADD_CSF(kbdev, code, __group, __queue, 0u, \ + __info_val); \ + KBASE_KTRACE_FTRACE_ADD_CSF(kbdev, code, __group, \ + __queue, __info_val); \ + } while (0) + + +#define KBASE_KTRACE_ADD_CSF_KCPU(kbdev, code, queue, info_val1, info_val2) \ + do { \ + /* capture values that could come from non-pure fn calls */ \ + struct kbase_kcpu_command_queue *__queue = queue; \ + u64 __info_val1 = info_val1; \ + u64 __info_val2 = info_val2; \ + KBASE_KTRACE_RBUF_ADD_CSF_KCPU(kbdev, code, __queue, \ + __info_val1, __info_val2); \ + KBASE_KTRACE_FTRACE_ADD_KCPU(code, __queue, \ + __info_val1, __info_val2); \ + } while (0) + +#endif /* _KBASE_DEBUG_KTRACE_CSF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h new file mode 100644 index 0000000..7f32cd2 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_csf.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ +#define _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ + +#if KBASE_KTRACE_TARGET_RBUF +/** + * DOC: KTrace version history, CSF variant + * + * 1.0: + * First version, with version information in the header. + * + * 1.1: + * kctx field is no longer a pointer, and is now an ID of the format %d_%u as + * used by kctx directories in mali debugfs entries: (tgid creating the kctx), + * (unique kctx id) + * + * ftrace backend now outputs kctx field (as %d_%u format). + * + * Add fields group, slot, prio, csi into backend-specific part. + * + * 1.2: + * There is a new class of KCPU traces; with this, a new KCPU column in the + * ringbuffer RBUF (mali_trace) between csi and info_val, which is empty + * for non-kcpu related traces, and usually displays the KCPU Queue ID and + * an extra information value. ftrace also displays these KCPU traces. + * + * 1.3: + * Add a lot of extra new traces. Tweak some existing scheduler related traces + * to contain extra information information/happen at slightly different times. + * SCHEDULER_EXIT_PROTM now has group information + */ +#define KBASE_KTRACE_VERSION_MAJOR 1 +#define KBASE_KTRACE_VERSION_MINOR 3 + +/* indicates if the trace message has valid queue-group related info. */ +#define KBASE_KTRACE_FLAG_CSF_GROUP (((kbase_ktrace_flag_t)1) << 0) + +/* indicates if the trace message has valid queue related info. */ +#define KBASE_KTRACE_FLAG_CSF_QUEUE (((kbase_ktrace_flag_t)1) << 1) + +/* indicates if the trace message has valid KCPU-queue related info. */ +#define KBASE_KTRACE_FLAG_CSF_KCPU (((kbase_ktrace_flag_t)1) << 2) + +/* Collect all the flags together for debug checking */ +#define KBASE_KTRACE_FLAG_BACKEND_ALL \ + (KBASE_KTRACE_FLAG_CSF_GROUP | KBASE_KTRACE_FLAG_CSF_QUEUE | \ + KBASE_KTRACE_FLAG_CSF_KCPU) + +/** + * union kbase_ktrace_backend - backend specific part of a trace message + * @kcpu: kcpu union member + * @kcpu.code: Identifies the event, refer to enum kbase_ktrace_code. + * @kcpu.flags: indicates information about the trace message itself. Used + * during dumping of the message. + * @kcpu.id: ID of the KCPU queue. + * @kcpu.extra_info_val: value specific to the type of KCPU event being traced. + * Refer to the KPU specific code in enum kbase_ktrace_code in + * mali_kbase_debug_ktrace_codes_csf.h + * @gpu: gpu union member + * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. + * @gpu.flags: indicates information about the trace message itself. Used + * during dumping of the message. + * @gpu.group_handle: Handle identifying the associated queue group. Only valid + * when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. + * @gpu.csg_nr: Number/index of the associated queue group's CS group to + * which it is mapped, or negative if none associated. Only + * valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. + * @gpu.slot_prio: The priority of the slot for the associated group, if it + * was scheduled. Hence, only valid when @csg_nr >=0 and + * @flags contains KBASE_KTRACE_FLAG_CSF_GROUP. + * @gpu.csi_index: ID of the associated queue's CS HW interface. + * Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE. + */ + +union kbase_ktrace_backend { + /* Place 64 and 32-bit members together */ + /* Pack smaller members together */ + struct { + kbase_ktrace_code_t code; + kbase_ktrace_flag_t flags; + u8 id; + u64 extra_info_val; + } kcpu; + + struct { + kbase_ktrace_code_t code; + kbase_ktrace_flag_t flags; + u8 group_handle; + s8 csg_nr; + u8 slot_prio; + s8 csi_index; + } gpu; +}; + +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#endif /* _KBASE_DEBUG_KTRACE_DEFS_CSF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h index 55b66ad..c01f930 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_defs_jm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,25 +17,44 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUG_KTRACE_DEFS_JM_H_ #define _KBASE_DEBUG_KTRACE_DEFS_JM_H_ +#if KBASE_KTRACE_TARGET_RBUF /** * DOC: KTrace version history, JM variant + * * 1.0: - * - Original version (implicit, header did not carry version information) + * Original version (implicit, header did not carry version information). + * * 2.0: - * - Introduced version information into the header - * - some changes of parameter names in header - * - trace now uses all 64-bits of info_val - * - Non-JM specific parts moved to using info_val instead of refcount/gpu_addr + * Introduced version information into the header. + * + * Some changes of parameter names in header. + * + * Trace now uses all 64-bits of info_val. + * + * Non-JM specific parts moved to using info_val instead of refcount/gpu_addr. + * + * 2.1: + * kctx field is no longer a pointer, and is now an ID of the format %d_%u as + * used by kctx directories in mali debugfs entries: (tgid creating the kctx), + * (unique kctx id). + * + * ftrace backend now outputs kctx field (as %d_%u format). + * */ #define KBASE_KTRACE_VERSION_MAJOR 2 -#define KBASE_KTRACE_VERSION_MINOR 0 +#define KBASE_KTRACE_VERSION_MINOR 1 +#endif /* KBASE_KTRACE_TARGET_RBUF */ + +/* + * Note: mali_kbase_debug_ktrace_jm.h needs these value even if the RBUF target + * is disabled (they get discarded with CSTD_UNUSED(), but they're still + * referenced) + */ /* indicates if the trace message has a valid refcount member */ #define KBASE_KTRACE_FLAG_JM_REFCOUNT (((kbase_ktrace_flag_t)1) << 0) @@ -43,33 +63,47 @@ /* indicates if the trace message has valid atom related info. */ #define KBASE_KTRACE_FLAG_JM_ATOM (((kbase_ktrace_flag_t)1) << 2) +#if KBASE_KTRACE_TARGET_RBUF +/* Collect all the flags together for debug checking */ +#define KBASE_KTRACE_FLAG_BACKEND_ALL \ + (KBASE_KTRACE_FLAG_JM_REFCOUNT | KBASE_KTRACE_FLAG_JM_JOBSLOT \ + | KBASE_KTRACE_FLAG_JM_ATOM) /** - * struct kbase_ktrace_backend - backend specific part of a trace message + * union kbase_ktrace_backend - backend specific part of a trace message + * Contains only a struct but is a union such that it is compatible with + * generic JM and CSF KTrace calls. * - * @atom_udata: Copy of the user data sent for the atom in base_jd_submit. - * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags - * @gpu_addr: GPU address, usually of the job-chain represented by an atom. - * @atom_number: id of the atom for which trace message was added. Only valid - * if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags - * @code: Identifies the event, refer to enum kbase_ktrace_code. - * @flags: indicates information about the trace message itself. Used - * during dumping of the message. - * @jobslot: job-slot for which trace message was added, valid only for - * job-slot management events. - * @refcount: reference count for the context, valid for certain events - * related to scheduler core and policy. + * @gpu: gpu union member + * @gpu.atom_udata: Copy of the user data sent for the atom in base_jd_submit. + * Only valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @gpu.gpu_addr: GPU address, usually of the job-chain represented by an + * atom. + * @gpu.atom_number: id of the atom for which trace message was added. Only + * valid if KBASE_KTRACE_FLAG_JM_ATOM is set in @flags + * @gpu.code: Identifies the event, refer to enum kbase_ktrace_code. + * @gpu.flags: indicates information about the trace message itself. Used + * during dumping of the message. + * @gpu.jobslot: job-slot for which trace message was added, valid only for + * job-slot management events. + * @gpu.refcount: reference count for the context, valid for certain events + * related to scheduler core and policy. */ -struct kbase_ktrace_backend { - /* Place 64 and 32-bit members together */ - u64 atom_udata[2]; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ - u64 gpu_addr; - int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ - /* Pack smaller members together */ - kbase_ktrace_code_t code; - kbase_ktrace_flag_t flags; - u8 jobslot; - u8 refcount; +union kbase_ktrace_backend { + struct { + /* Place 64 and 32-bit members together */ + u64 atom_udata[2]; /* Only valid for + * KBASE_KTRACE_FLAG_JM_ATOM + */ + u64 gpu_addr; + int atom_number; /* Only valid for KBASE_KTRACE_FLAG_JM_ATOM */ + /* Pack smaller members together */ + kbase_ktrace_code_t code; + kbase_ktrace_flag_t flags; + u8 jobslot; + u8 refcount; + } gpu; }; +#endif /* KBASE_KTRACE_TARGET_RBUF */ #endif /* _KBASE_DEBUG_KTRACE_DEFS_JM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c index e651a09..fed9c1f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,9 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ + #include #include "debug/mali_kbase_debug_ktrace_internal.h" #include "debug/backend/mali_kbase_debug_ktrace_jm.h" @@ -35,38 +35,39 @@ void kbasep_ktrace_backend_format_msg(struct kbase_ktrace_msg *trace_msg, char *buffer, int sz, s32 *written) { /* katom */ - if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_ATOM) + if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_ATOM) *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), "atom %d (ud: 0x%llx 0x%llx)", - trace_msg->backend.atom_number, - trace_msg->backend.atom_udata[0], - trace_msg->backend.atom_udata[1]), 0); + trace_msg->backend.gpu.atom_number, + trace_msg->backend.gpu.atom_udata[0], + trace_msg->backend.gpu.atom_udata[1]), 0); /* gpu_addr */ - if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_BACKEND) + if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_BACKEND) *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), - ",%.8llx,", trace_msg->backend.gpu_addr), 0); + ",%.8llx,", trace_msg->backend.gpu.gpu_addr), + 0); else *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ",,"), 0); /* jobslot */ - if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) + if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_JOBSLOT) *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), - "%d", trace_msg->backend.jobslot), 0); + "%d", trace_msg->backend.gpu.jobslot), 0); *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), ","), 0); /* refcount */ - if (trace_msg->backend.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) + if (trace_msg->backend.gpu.flags & KBASE_KTRACE_FLAG_JM_REFCOUNT) *written += MAX(snprintf(buffer + *written, MAX(sz - *written, 0), - "%d", trace_msg->backend.refcount), 0); + "%d", trace_msg->backend.gpu.refcount), 0); } void kbasep_ktrace_add_jm(struct kbase_device *kbdev, @@ -83,28 +84,31 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, /* Reserve and update indices */ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); - /* Fill the common part of the message (including backend.flags) */ + /* Fill the common part of the message (including backend.gpu.flags) */ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, info_val); /* Indicate to the common code that backend-specific parts will be * valid */ - trace_msg->backend.flags |= KBASE_KTRACE_FLAG_BACKEND; + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_BACKEND; /* Fill the JM-specific parts of the message */ if (katom) { - trace_msg->backend.flags |= KBASE_KTRACE_FLAG_JM_ATOM; + trace_msg->backend.gpu.flags |= KBASE_KTRACE_FLAG_JM_ATOM; - trace_msg->backend.atom_number = kbase_jd_atom_id(katom->kctx, katom); - trace_msg->backend.atom_udata[0] = katom->udata.blob[0]; - trace_msg->backend.atom_udata[1] = katom->udata.blob[1]; + trace_msg->backend.gpu.atom_number = + kbase_jd_atom_id(katom->kctx, katom); + trace_msg->backend.gpu.atom_udata[0] = katom->udata.blob[0]; + trace_msg->backend.gpu.atom_udata[1] = katom->udata.blob[1]; } - trace_msg->backend.gpu_addr = gpu_addr; - trace_msg->backend.jobslot = jobslot; + trace_msg->backend.gpu.gpu_addr = gpu_addr; + trace_msg->backend.gpu.jobslot = jobslot; /* Clamp refcount */ - trace_msg->backend.refcount = MIN((unsigned int)refcount, 0xFF); + trace_msg->backend.gpu.refcount = MIN((unsigned int)refcount, 0xFF); + + WARN_ON((trace_msg->backend.gpu.flags & ~KBASE_KTRACE_FLAG_ALL)); /* Done */ spin_unlock_irqrestore(&kbdev->ktrace.lock, irqflags); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h index c1bacf9..8b09d05 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_ktrace_jm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUG_KTRACE_JM_H_ @@ -47,89 +46,24 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, kbase_ktrace_flag_t flags, int refcount, int jobslot, u64 info_val); -#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ - jobslot) \ - kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ - gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, 0) - -#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ - jobslot, info_val) \ +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ + refcount, jobslot, info_val) \ kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ - gpu_addr, KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, jobslot, \ - info_val) - -#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ - refcount) \ - kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ - gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, 0) -#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, refcount, info_val) \ - kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ - gpu_addr, KBASE_KTRACE_FLAG_JM_REFCOUNT, refcount, 0, \ - info_val) - -#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, info_val) \ - kbasep_ktrace_add_jm(kbdev, KBASE_KTRACE_CODE(code), kctx, katom, \ - gpu_addr, 0, 0, 0, info_val) + gpu_addr, flags, refcount, jobslot, info_val) #else /* KBASE_KTRACE_TARGET_RBUF */ -#define KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ - jobslot) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, gpu_addr, \ - jobslot, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(jobslot);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ - } while (0) -#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, gpu_addr, \ - refcount) \ +#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, flags, \ + refcount, jobslot, info_val) \ do {\ CSTD_UNUSED(kbdev);\ CSTD_NOP(code);\ CSTD_UNUSED(kctx);\ CSTD_UNUSED(katom);\ CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(flags);\ CSTD_UNUSED(refcount);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ - gpu_addr, refcount, info_val) \ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ - CSTD_UNUSED(info_val);\ - CSTD_NOP(0);\ - } while (0) - -#define KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ - info_val)\ - do {\ - CSTD_UNUSED(kbdev);\ - CSTD_NOP(code);\ - CSTD_UNUSED(kctx);\ - CSTD_UNUSED(katom);\ - CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ CSTD_UNUSED(info_val);\ CSTD_NOP(0);\ } while (0) @@ -137,27 +71,30 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, /* * KTrace target for Linux's ftrace + * + * Note: the header file(s) that define the trace_mali_<...> tracepoints are + * included by the parent header file */ #if KBASE_KTRACE_TARGET_FTRACE #define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ jobslot) \ - trace_mali_##code(jobslot, 0) + trace_mali_##code(kctx, jobslot, 0) #define KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, \ gpu_addr, jobslot, info_val) \ - trace_mali_##code(jobslot, info_val) + trace_mali_##code(kctx, jobslot, info_val) #define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, \ gpu_addr, refcount) \ - trace_mali_##code(refcount, 0) + trace_mali_##code(kctx, refcount, 0) #define KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT_INFO(kbdev, code, kctx, katom, \ gpu_addr, refcount, info_val) \ - trace_mali_##code(refcount, info_val) + trace_mali_##code(kctx, refcount, info_val) #define KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, gpu_addr, \ info_val) \ - trace_mali_##code(gpu_addr, info_val) + trace_mali_##code(kctx, gpu_addr, info_val) #else /* KBASE_KTRACE_TARGET_FTRACE */ #define KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, gpu_addr, \ jobslot) \ @@ -247,7 +184,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, /* capture values that could come from non-pure function calls */ \ u64 __gpu_addr = gpu_addr; \ int __jobslot = jobslot; \ - KBASE_KTRACE_RBUF_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ + 0); \ KBASE_KTRACE_FTRACE_ADD_JM_SLOT(kbdev, code, kctx, katom, __gpu_addr, __jobslot); \ } while (0) @@ -275,7 +214,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, u64 __gpu_addr = gpu_addr; \ int __jobslot = jobslot; \ u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_JOBSLOT, 0, __jobslot, \ + __info_val); \ KBASE_KTRACE_FTRACE_ADD_JM_SLOT_INFO(kbdev, code, kctx, katom, __gpu_addr, __jobslot, __info_val); \ } while (0) @@ -301,7 +242,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, /* capture values that could come from non-pure function calls */ \ u64 __gpu_addr = gpu_addr; \ int __refcount = refcount; \ - KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ + 0u); \ KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount); \ } while (0) @@ -330,7 +273,9 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, u64 __gpu_addr = gpu_addr; \ int __refcount = refcount; \ u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + KBASE_KTRACE_FLAG_JM_REFCOUNT, __refcount, 0, \ + __info_val); \ KBASE_KTRACE_FTRACE_ADD_JM_REFCOUNT(kbdev, code, kctx, katom, __gpu_addr, __refcount, __info_val); \ } while (0) @@ -355,7 +300,8 @@ void kbasep_ktrace_add_jm(struct kbase_device *kbdev, /* capture values that could come from non-pure function calls */ \ u64 __gpu_addr = gpu_addr; \ u64 __info_val = info_val; \ - KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ + KBASE_KTRACE_RBUF_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, \ + 0u, 0, 0, __info_val); \ KBASE_KTRACE_FTRACE_ADD_JM(kbdev, code, kctx, katom, __gpu_addr, __info_val); \ } while (0) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h new file mode 100644 index 0000000..4b23fc9 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_csf.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * NOTE: This must **only** be included through mali_linux_trace.h, + * otherwise it will fail to setup tracepoints correctly + */ + +#if !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _KBASE_DEBUG_LINUX_KTRACE_CSF_H_ + +/* + * Generic CSF events - using the common DEFINE_MALI_ADD_EVENT + */ +DEFINE_MALI_ADD_EVENT(EVICT_CTX_SLOTS); +DEFINE_MALI_ADD_EVENT(FIRMWARE_BOOT); +DEFINE_MALI_ADD_EVENT(FIRMWARE_REBOOT); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TOCK_END); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_TICK_END); +DEFINE_MALI_ADD_EVENT(SCHEDULER_RESET); +DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT); +DEFINE_MALI_ADD_EVENT(SCHEDULER_WAIT_PROTM_QUIT_DONE); +DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT); +DEFINE_MALI_ADD_EVENT(SYNC_UPDATE_EVENT_NOTIFY_GPU); +DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT); +DEFINE_MALI_ADD_EVENT(CSF_INTERRUPT_END); +DEFINE_MALI_ADD_EVENT(CSG_INTERRUPT_PROCESS); +DEFINE_MALI_ADD_EVENT(GLB_REQ_ACQ); +DEFINE_MALI_ADD_EVENT(SCHEDULER_CAN_IDLE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_ADVANCE_TICK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_NOADVANCE_TICK); +DEFINE_MALI_ADD_EVENT(SCHEDULER_INSERT_RUNNABLE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_REMOVE_RUNNABLE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_ROTATE_RUNNABLE); +DEFINE_MALI_ADD_EVENT(SCHEDULER_HEAD_RUNNABLE); +DEFINE_MALI_ADD_EVENT(IDLE_WORKER_BEGIN); +DEFINE_MALI_ADD_EVENT(IDLE_WORKER_END); +DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_BEGIN); +DEFINE_MALI_ADD_EVENT(GROUP_SYNC_UPDATE_WORKER_END); +DEFINE_MALI_ADD_EVENT(SLOTS_STATUS_UPDATE_ACK); + +DECLARE_EVENT_CLASS(mali_csf_grp_q_template, + TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, + struct kbase_queue *queue, u64 info_val), + TP_ARGS(kbdev, group, queue, info_val), + TP_STRUCT__entry( + __field(u64, info_val) + __field(pid_t, kctx_tgid) + __field(u32, kctx_id) + __field(u8, group_handle) + __field(s8, csg_nr) + __field(u8, slot_prio) + __field(s8, csi_index) + ), + TP_fast_assign( + { + struct kbase_context *kctx = NULL; + + __entry->info_val = info_val; + /* Note: if required in future, we could record some + * flags in __entry about whether the group/queue parts + * are valid, and add that to the trace message e.g. + * by using __print_flags()/__print_symbolic() + */ + if (queue) { + /* Note: kctx overridden by group->kctx later if group is valid */ + kctx = queue->kctx; + __entry->csi_index = queue->csi_index; + } else { + __entry->csi_index = -1; + } + + if (group) { + kctx = group->kctx; + __entry->group_handle = group->handle; + __entry->csg_nr = group->csg_nr; + if (group->csg_nr >= 0) + __entry->slot_prio = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; + else + __entry->slot_prio = 0u; + } else { + __entry->group_handle = 0u; + __entry->csg_nr = -1; + __entry->slot_prio = 0u; + } + __entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; + } + + ), + TP_printk("kctx=%d_%u group=%u slot=%d prio=%u csi=%d info=0x%llx", + __entry->kctx_tgid, __entry->kctx_id, + __entry->group_handle, __entry->csg_nr, + __entry->slot_prio, __entry->csi_index, + __entry->info_val) +); + +/* + * Group events + */ +#define DEFINE_MALI_CSF_GRP_EVENT(name) \ + DEFINE_EVENT_PRINT(mali_csf_grp_q_template, mali_##name, \ + TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ + struct kbase_queue *queue, u64 info_val), \ + TP_ARGS(kbdev, group, queue, info_val), \ + TP_printk("kctx=%d_%u group=%u slot=%d prio=%u info=0x%llx", \ + __entry->kctx_tgid, __entry->kctx_id, __entry->group_handle, \ + __entry->csg_nr, __entry->slot_prio, __entry->info_val)) + +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_START); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOP); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STARTED); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STOPPED); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_CLEANED); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_STATUS_UPDATE); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_SET); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SLOT_IDLE_CLEAR); +DEFINE_MALI_CSF_GRP_EVENT(CSG_PRIO_UPDATE); +DEFINE_MALI_CSF_GRP_EVENT(CSG_SYNC_UPDATE_INTERRUPT); +DEFINE_MALI_CSF_GRP_EVENT(CSG_IDLE_INTERRUPT); +DEFINE_MALI_CSF_GRP_EVENT(CSG_PROGRESS_TIMER_INTERRUPT); +DEFINE_MALI_CSF_GRP_EVENT(CSG_INTERRUPT_PROCESS_END); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_SYNC_UPDATE_DONE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_DESCHEDULE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_SCHEDULE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_EVICT_SCHED); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_RUNNABLE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_RUNNABLE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_ROTATE_RUNNABLE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_RUNNABLE); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_INSERT_IDLE_WAIT); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_REMOVE_IDLE_WAIT); +DEFINE_MALI_CSF_GRP_EVENT(GROUP_HEAD_IDLE_WAIT); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_CHECK_PROTM_ENTER); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_ENTER_PROTM); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_EXIT_PROTM); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_TOP_GRP); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_INC); +DEFINE_MALI_CSF_GRP_EVENT(SCHEDULER_NONIDLE_OFFSLOT_DEC); +DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_BEGIN); +DEFINE_MALI_CSF_GRP_EVENT(PROTM_EVENT_WORKER_END); + +#undef DEFINE_MALI_CSF_GRP_EVENT + +/* + * Group + Queue events + */ +#define DEFINE_MALI_CSF_GRP_Q_EVENT(name) \ + DEFINE_EVENT(mali_csf_grp_q_template, mali_##name, \ + TP_PROTO(struct kbase_device *kbdev, struct kbase_queue_group *group, \ + struct kbase_queue *queue, u64 info_val), \ + TP_ARGS(kbdev, group, queue, info_val)) + +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_START); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_STOP_REQUESTED); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_FAULT_INTERRUPT); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_TILER_OOM_INTERRUPT); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_PEND_INTERRUPT); +DEFINE_MALI_CSF_GRP_Q_EVENT(CSI_PROTM_ACK); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_START); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_STOP); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_UPDATE_EVALUATED); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_STATUS_WAIT); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_CURRENT_VAL); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_TEST_VAL); +DEFINE_MALI_CSF_GRP_Q_EVENT(QUEUE_SYNC_BLOCKED_REASON); +DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_SET); +DEFINE_MALI_CSF_GRP_Q_EVENT(PROTM_PENDING_CLEAR); + +#undef DEFINE_MALI_CSF_GRP_Q_EVENT + +/* + * KCPU queue events + */ +DECLARE_EVENT_CLASS(mali_csf_kcpu_queue_template, + TP_PROTO(struct kbase_kcpu_command_queue *queue, + u64 info_val1, u64 info_val2), + TP_ARGS(queue, info_val1, info_val2), + TP_STRUCT__entry( + __field(u64, info_val1) + __field(u64, info_val2) + __field(pid_t, kctx_tgid) + __field(u32, kctx_id) + __field(u8, id) + ), + TP_fast_assign( + { + __entry->info_val1 = info_val1; + __entry->info_val2 = info_val2; + __entry->kctx_id = queue->kctx->id; + __entry->kctx_tgid = queue->kctx->tgid; + __entry->id = queue->id; + } + + ), + TP_printk("kctx=%d_%u id=%u info_val1=0x%llx info_val2=0x%llx", + __entry->kctx_tgid, __entry->kctx_id, __entry->id, + __entry->info_val1, __entry->info_val2) +); + +#define DEFINE_MALI_CSF_KCPU_EVENT(name) \ + DEFINE_EVENT(mali_csf_kcpu_queue_template, mali_##name, \ + TP_PROTO(struct kbase_kcpu_command_queue *queue, \ + u64 info_val1, u64 info_val2), \ + TP_ARGS(queue, info_val1, info_val2)) + +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_NEW); +DEFINE_MALI_CSF_KCPU_EVENT(KCPU_QUEUE_DESTROY); +DEFINE_MALI_CSF_KCPU_EVENT(CQS_SET); +DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_START); +DEFINE_MALI_CSF_KCPU_EVENT(CQS_WAIT_END); +DEFINE_MALI_CSF_KCPU_EVENT(FENCE_SIGNAL); +DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_START); +DEFINE_MALI_CSF_KCPU_EVENT(FENCE_WAIT_END); + +#undef DEFINE_MALI_CSF_KCPU_EVENT + +#endif /* !defined(_KBASE_DEBUG_LINUX_KTRACE_CSF_H_) || defined(TRACE_HEADER_MULTI_READ) */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h index d964e5a..2e88e69 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/backend/mali_kbase_debug_linux_ktrace_jm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -29,23 +28,28 @@ #define _KBASE_DEBUG_LINUX_KTRACE_JM_H_ DECLARE_EVENT_CLASS(mali_jm_slot_template, - TP_PROTO(int jobslot, u64 info_val), - TP_ARGS(jobslot, info_val), + TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), + TP_ARGS(kctx, jobslot, info_val), TP_STRUCT__entry( + __field(pid_t, kctx_tgid) + __field(u32, kctx_id) __field(unsigned int, jobslot) __field(u64, info_val) ), TP_fast_assign( + __entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; __entry->jobslot = jobslot; __entry->info_val = info_val; ), - TP_printk("jobslot=%u info=0x%llx", __entry->jobslot, __entry->info_val) + TP_printk("kctx=%d_%u jobslot=%u info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->jobslot, __entry->info_val) ); #define DEFINE_MALI_JM_SLOT_EVENT(name) \ DEFINE_EVENT(mali_jm_slot_template, mali_##name, \ - TP_PROTO(int jobslot, u64 info_val), \ - TP_ARGS(jobslot, info_val)) + TP_PROTO(struct kbase_context *kctx, int jobslot, u64 info_val), \ + TP_ARGS(kctx, jobslot, info_val)) DEFINE_MALI_JM_SLOT_EVENT(JM_SUBMIT); DEFINE_MALI_JM_SLOT_EVENT(JM_JOB_DONE); DEFINE_MALI_JM_SLOT_EVENT(JM_UPDATE_HEAD); @@ -75,23 +79,28 @@ DEFINE_MALI_JM_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); #undef DEFINE_MALI_JM_SLOT_EVENT DECLARE_EVENT_CLASS(mali_jm_refcount_template, - TP_PROTO(int refcount, u64 info_val), - TP_ARGS(refcount, info_val), + TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), + TP_ARGS(kctx, refcount, info_val), TP_STRUCT__entry( + __field(pid_t, kctx_tgid) + __field(u32, kctx_id) __field(unsigned int, refcount) __field(u64, info_val) ), TP_fast_assign( + __entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; __entry->refcount = refcount; __entry->info_val = info_val; ), - TP_printk("refcount=%u info=0x%llx", __entry->refcount, __entry->info_val) + TP_printk("kctx=%d_%u refcount=%u info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->refcount, __entry->info_val) ); #define DEFINE_MALI_JM_REFCOUNT_EVENT(name) \ DEFINE_EVENT(mali_jm_refcount_template, mali_##name, \ - TP_PROTO(int refcount, u64 info_val), \ - TP_ARGS(refcount, info_val)) + TP_PROTO(struct kbase_context *kctx, int refcount, u64 info_val), \ + TP_ARGS(kctx, refcount, info_val)) DEFINE_MALI_JM_REFCOUNT_EVENT(JS_ADD_JOB); DEFINE_MALI_JM_REFCOUNT_EVENT(JS_REMOVE_JOB); DEFINE_MALI_JM_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); @@ -106,23 +115,28 @@ DEFINE_MALI_JM_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); #undef DEFINE_MALI_JM_REFCOUNT_EVENT DECLARE_EVENT_CLASS(mali_jm_add_template, - TP_PROTO(u64 gpu_addr, u64 info_val), - TP_ARGS(gpu_addr, info_val), + TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), + TP_ARGS(kctx, gpu_addr, info_val), TP_STRUCT__entry( + __field(pid_t, kctx_tgid) + __field(u32, kctx_id) __field(u64, gpu_addr) __field(u64, info_val) ), TP_fast_assign( + __entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; __entry->gpu_addr = gpu_addr; __entry->info_val = info_val; ), - TP_printk("gpu_addr=0x%llx info=0x%llx", __entry->gpu_addr, __entry->info_val) + TP_printk("kctx=%d_%u gpu_addr=0x%llx info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->gpu_addr, __entry->info_val) ); #define DEFINE_MALI_JM_ADD_EVENT(name) \ DEFINE_EVENT(mali_jm_add_template, mali_##name, \ - TP_PROTO(u64 gpu_addr, u64 info_val), \ - TP_ARGS(gpu_addr, info_val)) + TP_PROTO(struct kbase_context *kctx, u64 gpu_addr, u64 info_val), \ + TP_ARGS(kctx, gpu_addr, info_val)) DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER); DEFINE_MALI_JM_ADD_EVENT(JD_DONE_WORKER_END); DEFINE_MALI_JM_ADD_EVENT(JD_CANCEL_WORKER); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c index 6322abb..9bf8610 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,9 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ + #include #include "debug/mali_kbase_debug_ktrace_internal.h" @@ -27,11 +27,6 @@ int kbase_ktrace_init(struct kbase_device *kbdev) #if KBASE_KTRACE_TARGET_RBUF struct kbase_ktrace_msg *rbuf; - /* See also documentation of enum kbase_ktrace_code */ - compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || - KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), - "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); - rbuf = kmalloc_array(KBASE_KTRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); if (!rbuf) @@ -91,15 +86,25 @@ static void kbasep_ktrace_format_msg(struct kbase_ktrace_msg *trace_msg, /* Initial part of message: * - * secs,thread_id,cpu,code,kctx, + * secs,thread_id,cpu,code, */ written += MAX(snprintf(buffer + written, MAX(sz - written, 0), - "%d.%.6d,%d,%d,%s,%p,", + "%d.%.6d,%d,%d,%s,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, - kbasep_ktrace_code_string[trace_msg->backend.code], - trace_msg->kctx), 0); + kbasep_ktrace_code_string[trace_msg->backend.gpu.code]), + 0); + + /* kctx part: */ + if (trace_msg->kctx_tgid) { + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + "%d_%u", + trace_msg->kctx_tgid, trace_msg->kctx_id), 0); + } + /* Trailing comma */ + written += MAX(snprintf(buffer + written, MAX(sz - written, 0), + ","), 0); /* Backend parts */ kbasep_ktrace_backend_format_msg(trace_msg, buffer, sz, @@ -156,11 +161,19 @@ void kbasep_ktrace_msg_init(struct kbase_ktrace *ktrace, ktime_get_real_ts64(&trace_msg->timestamp); - trace_msg->kctx = kctx; - + /* No need to store a flag about whether there was a kctx, tgid==0 is + * sufficient + */ + if (kctx) { + trace_msg->kctx_tgid = kctx->tgid; + trace_msg->kctx_id = kctx->id; + } else { + trace_msg->kctx_tgid = 0; + trace_msg->kctx_id = 0; + } trace_msg->info_val = info_val; - trace_msg->backend.code = code; - trace_msg->backend.flags = flags; + trace_msg->backend.gpu.code = code; + trace_msg->backend.gpu.flags = flags; } void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, @@ -170,12 +183,14 @@ void kbasep_ktrace_add(struct kbase_device *kbdev, enum kbase_ktrace_code code, unsigned long irqflags; struct kbase_ktrace_msg *trace_msg; + WARN_ON((flags & ~KBASE_KTRACE_FLAG_COMMON_ALL)); + spin_lock_irqsave(&kbdev->ktrace.lock, irqflags); /* Reserve and update indices */ trace_msg = kbasep_ktrace_reserve(&kbdev->ktrace); - /* Fill the common part of the message (including backend.flags) */ + /* Fill the common part of the message (including backend.gpu.flags) */ kbasep_ktrace_msg_init(&kbdev->ktrace, trace_msg, code, kctx, flags, info_val); @@ -225,7 +240,7 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->ktrace.lock, flags); } -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) struct trace_seq_state { struct kbase_ktrace_msg trace_buf[KBASE_KTRACE_SIZE]; u32 start; @@ -333,7 +348,7 @@ void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) #else /* KBASE_KTRACE_TARGET_RBUF */ -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) void kbase_ktrace_debugfs_init(struct kbase_device *kbdev) { CSTD_UNUSED(kbdev); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h index 0dd8b7a..f943696 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -37,7 +36,15 @@ #ifndef _KBASE_DEBUG_KTRACE_H_ #define _KBASE_DEBUG_KTRACE_H_ +#if KBASE_KTRACE_TARGET_FTRACE +#include "mali_linux_trace.h" +#endif + +#if MALI_USE_CSF +#include "debug/backend/mali_kbase_debug_ktrace_csf.h" +#else #include "debug/backend/mali_kbase_debug_ktrace_jm.h" +#endif /** * kbase_ktrace_init - initialize kbase ktrace. @@ -58,7 +65,7 @@ void kbase_ktrace_term(struct kbase_device *kbdev); */ void kbase_ktrace_hook_wrapper(void *param); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) /** * kbase_ktrace_debugfs_init - initialize kbase ktrace for debugfs usage, if * the selected targets support it. @@ -140,10 +147,9 @@ void kbasep_ktrace_dump(struct kbase_device *kbdev); * KTrace target for Linux's ftrace */ #if KBASE_KTRACE_TARGET_FTRACE -#include "mali_linux_trace.h" #define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ - trace_mali_##code(info_val) + trace_mali_##code(kctx, info_val) #else /* KBASE_KTRACE_TARGET_FTRACE */ #define KBASE_KTRACE_FTRACE_ADD(kbdev, code, kctx, info_val) \ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h index 364ed60..3309834 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_codes.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2015,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -30,6 +29,9 @@ * The purpose of this header file is just to contain a list of trace code * identifiers * + * When updating this file, also remember to update + * mali_kbase_debug_linux_ktrace.h + * * Each identifier is wrapped in a macro, so that its string form and enum form * can be created * @@ -112,6 +114,7 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), + KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_L2), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), @@ -142,9 +145,20 @@ int dummy_array[] = { KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RETAIN_CTX_NOLOCK), /* info_val == kctx->refcount */ KBASE_KTRACE_CODE_MAKE_CODE(SCHED_RELEASE_CTX), +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* + * Arbitration events + */ + KBASE_KTRACE_CODE_MAKE_CODE(ARB_GPU_LOST), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_STATE), + KBASE_KTRACE_CODE_MAKE_CODE(ARB_VM_EVT), +#endif - +#if MALI_USE_CSF +#include "debug/backend/mali_kbase_debug_ktrace_codes_csf.h" +#else #include "debug/backend/mali_kbase_debug_ktrace_codes_jm.h" +#endif /* * Unused code just to make it easier to not have a comma at the end. * All other codes MUST come before this diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h index d6baaf1..4694b78 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUG_KTRACE_DEFS_H_ @@ -58,6 +57,16 @@ #define KBASE_KTRACE_TARGET_RBUF 0 #endif /* KBASE_KTRACE_ENABLE */ +/* + * Note: Some backends define flags in this type even if the RBUF target is + * disabled (they get discarded with CSTD_UNUSED(), but they're still + * referenced) + */ +typedef u8 kbase_ktrace_flag_t; + +#if KBASE_KTRACE_TARGET_RBUF +typedef u8 kbase_ktrace_code_t; + /* * NOTE: KBASE_KTRACE_VERSION_MAJOR, KBASE_KTRACE_VERSION_MINOR are kept in * the backend, since updates can be made to one backend in a way that doesn't @@ -67,20 +76,28 @@ * updated. */ -#if KBASE_KTRACE_TARGET_RBUF -typedef u8 kbase_ktrace_flag_t; -typedef u8 kbase_ktrace_code_t; - /* - * struct kbase_ktrace_backend - backend specific part of a trace message - * - * At the very least, this must contain a kbase_ktrace_code_t 'code' member and - * a kbase_ktrace_flag_t 'flags' member + * union kbase_ktrace_backend - backend specific part of a trace message. + * At the very least, this must contain a kbase_ktrace_code_t 'code' member + * and a kbase_ktrace_flag_t 'flags' inside a "gpu" sub-struct. Should a + * backend need several sub structs in its union to optimize the data storage + * for different message types, then it can use a "common initial sequence" to + * allow 'flags' and 'code' to pack optimally without corrupting them. + * Different backends need not share common initial sequences between them, they + * only need to ensure they have gpu.flags and gpu.code members, it + * is up to the backend then how to order these. */ -struct kbase_ktrace_backend; +union kbase_ktrace_backend; + +#endif /* KBASE_KTRACE_TARGET_RBUF */ +#if MALI_USE_CSF +#include "debug/backend/mali_kbase_debug_ktrace_defs_csf.h" +#else #include "debug/backend/mali_kbase_debug_ktrace_defs_jm.h" +#endif +#if KBASE_KTRACE_TARGET_RBUF /* Indicates if the trace message has backend related info. * * If not set, consider the &kbase_ktrace_backend part of a &kbase_ktrace_msg @@ -90,7 +107,14 @@ struct kbase_ktrace_backend; */ #define KBASE_KTRACE_FLAG_BACKEND (((kbase_ktrace_flag_t)1) << 7) -#define KBASE_KTRACE_SHIFT 8 /* 256 entries */ +/* Collect all the common flags together for debug checking */ +#define KBASE_KTRACE_FLAG_COMMON_ALL \ + (KBASE_KTRACE_FLAG_BACKEND) + +#define KBASE_KTRACE_FLAG_ALL \ + (KBASE_KTRACE_FLAG_COMMON_ALL | KBASE_KTRACE_FLAG_BACKEND_ALL) + +#define KBASE_KTRACE_SHIFT (9) /* 512 entries */ #define KBASE_KTRACE_SIZE (1 << KBASE_KTRACE_SHIFT) #define KBASE_KTRACE_MASK ((1 << KBASE_KTRACE_SHIFT)-1) @@ -121,24 +145,23 @@ enum kbase_ktrace_code { * added. * @cpu: indicates which CPU the @thread_id was scheduled on when the * trace message was added. - * @kctx: Pointer to the kbase context for which the trace message was - * added. Will be NULL for certain trace messages associated with - * the &kbase_device itself, such as power management events. - * Will point to the appropriate context corresponding to - * backend-specific events. + * @kctx_tgid: Thread group ID of the &kbase_context associated with the + * message, or 0 if none associated. + * @kctx_id: Unique identifier of the &kbase_context associated with the + * message. Only valid if @kctx_tgid != 0. * @info_val: value specific to the type of event being traced. Refer to the - * specific code in enum kbase_ktrace_code + * specific code in enum kbase_ktrace_code. * @backend: backend-specific trace information. All backends must implement - * a minimum common set of members + * a minimum common set of members. */ struct kbase_ktrace_msg { struct timespec64 timestamp; u32 thread_id; u32 cpu; - void *kctx; + pid_t kctx_tgid; + u32 kctx_id; u64 info_val; - - struct kbase_ktrace_backend backend; + union kbase_ktrace_backend backend; }; struct kbase_ktrace { @@ -148,5 +171,17 @@ struct kbase_ktrace { struct kbase_ktrace_msg *rbuf; }; + +static inline void kbase_ktrace_compiletime_asserts(void) +{ + /* See also documentation of enum kbase_ktrace_code */ + compiletime_assert(sizeof(kbase_ktrace_code_t) == sizeof(unsigned long long) || + KBASE_KTRACE_CODE_COUNT <= (1ull << (sizeof(kbase_ktrace_code_t) * BITS_PER_BYTE)), + "kbase_ktrace_code_t not wide enough for KBASE_KTRACE_CODE_COUNT"); + compiletime_assert((KBASE_KTRACE_FLAG_BACKEND_ALL & KBASE_KTRACE_FLAG_COMMON_ALL) == 0, + "KTrace backend flags intersect with KTrace common flags"); + +} + #endif /* KBASE_KTRACE_TARGET_RBUF */ #endif /* _KBASE_DEBUG_KTRACE_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h index e450760..d9bd351 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_ktrace_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUG_KTRACE_INTERNAL_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h index 18e4f7c..b56dec4 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/debug/mali_kbase_debug_linux_ktrace.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014,2018,2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -31,21 +30,29 @@ #if KBASE_KTRACE_TARGET_FTRACE DECLARE_EVENT_CLASS(mali_add_template, - TP_PROTO(u64 info_val), - TP_ARGS(info_val), + TP_PROTO(struct kbase_context *kctx, u64 info_val), + TP_ARGS(kctx, info_val), TP_STRUCT__entry( + __field(pid_t, kctx_tgid) + __field(u32, kctx_id) __field(u64, info_val) ), TP_fast_assign( + __entry->kctx_id = (kctx) ? kctx->id : 0u; + __entry->kctx_tgid = (kctx) ? kctx->tgid : 0; __entry->info_val = info_val; ), - TP_printk("info=0x%llx", __entry->info_val) + TP_printk("kctx=%d_%u info=0x%llx", __entry->kctx_tgid, + __entry->kctx_id, __entry->info_val) ); +/* DEFINE_MALI_ADD_EVENT is available also to backends for backend-specific + * simple trace codes + */ #define DEFINE_MALI_ADD_EVENT(name) \ DEFINE_EVENT(mali_add_template, mali_##name, \ - TP_PROTO(u64 info_val), \ - TP_ARGS(info_val)) + TP_PROTO(struct kbase_context *kctx, u64 info_val), \ + TP_ARGS(kctx, info_val)) DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); @@ -78,6 +85,7 @@ DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_L2); DEFINE_MALI_ADD_EVENT(PM_GPU_ON); DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); @@ -89,10 +97,20 @@ DEFINE_MALI_ADD_EVENT(PM_CONTEXT_IDLE); DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); DEFINE_MALI_ADD_EVENT(SCHED_RETAIN_CTX_NOLOCK); DEFINE_MALI_ADD_EVENT(SCHED_RELEASE_CTX); +#ifdef CONFIG_MALI_ARBITER_SUPPORT -#undef DEFINE_MALI_ADD_EVENT +DEFINE_MALI_ADD_EVENT(ARB_GPU_LOST); +DEFINE_MALI_ADD_EVENT(ARB_VM_STATE); +DEFINE_MALI_ADD_EVENT(ARB_VM_EVT); -#include "mali_kbase_debug_linux_ktrace_jm.h" +#endif +#if MALI_USE_CSF +#include "backend/mali_kbase_debug_linux_ktrace_csf.h" +#else +#include "backend/mali_kbase_debug_linux_ktrace_jm.h" +#endif + +#undef DEFINE_MALI_ADD_EVENT #endif /* KBASE_KTRACE_TARGET_FTRACE */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild new file mode 100644 index 0000000..90e7024 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/Kbuild @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + device/mali_kbase_device.o \ + device/mali_kbase_device_hw.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += \ + device/backend/mali_kbase_device_csf.o \ + device/backend/mali_kbase_device_hw_csf.o +else + mali_kbase-y += \ + device/backend/mali_kbase_device_jm.o \ + device/backend/mali_kbase_device_hw_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c new file mode 100644 index 0000000..0c5052b --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_csf.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * + * When a kbase driver is removed, terminate CSF firmware and hardware counter + * components. + */ +static void kbase_device_firmware_hwcnt_term(struct kbase_device *kbdev) +{ + if (kbdev->csf.firmware_inited) { + kbase_vinstr_term(kbdev->vinstr_ctx); + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); + kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); + kbase_csf_firmware_term(kbdev); + } +} + +/** + * kbase_backend_late_init - Perform any backend-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +static int kbase_backend_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbase_hwaccess_pm_init(kbdev); + if (err) + return err; + + err = kbase_reset_gpu_init(kbdev); + if (err) + goto fail_reset_gpu_init; + + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + goto fail_pm_powerup; + + err = kbase_backend_timer_init(kbdev); + if (err) + goto fail_timer; + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI + if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { + dev_err(kbdev->dev, "Interrupt assignment check failed.\n"); + err = -EINVAL; + goto fail_interrupt_test; + } +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + kbase_ipa_control_init(kbdev); + + /* Initialise the metrics subsystem, it couldn't be initialized earlier + * due to dependency on kbase_ipa_control. + */ + err = kbasep_pm_metrics_init(kbdev); + if (err) + goto fail_pm_metrics_init; + + /* Do the initialisation of devfreq. + * Devfreq needs backend_timer_init() for completion of its + * initialisation and it also needs to catch the first callback + * occurrence of the runtime_suspend event for maintaining state + * coherence with the backend power management, hence needs to be + * placed before the kbase_pm_context_idle(). + */ + err = kbase_backend_devfreq_init(kbdev); + if (err) + goto fail_devfreq_init; + + /* Update gpuprops with L2_FEATURES if applicable */ + err = kbase_gpuprops_update_l2_features(kbdev); + if (err) + goto fail_update_l2_features; + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + + kbase_pm_context_idle(kbdev); + + mutex_init(&kbdev->fw_load_lock); + + return 0; + +fail_update_l2_features: + kbase_backend_devfreq_term(kbdev); +fail_devfreq_init: + kbasep_pm_metrics_term(kbdev); +fail_pm_metrics_init: + kbase_ipa_control_term(kbdev); + +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI +fail_interrupt_test: +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + kbase_backend_timer_term(kbdev); +fail_timer: + kbase_pm_context_idle(kbdev); + kbase_hwaccess_pm_halt(kbdev); +fail_pm_powerup: + kbase_reset_gpu_term(kbdev); +fail_reset_gpu_init: + kbase_hwaccess_pm_term(kbdev); + + return err; +} + +/** + * kbase_backend_late_term - Perform any backend-specific termination. + * @kbdev: Device pointer + */ +static void kbase_backend_late_term(struct kbase_device *kbdev) +{ + kbase_backend_devfreq_term(kbdev); + kbasep_pm_metrics_term(kbdev); + kbase_ipa_control_term(kbdev); + kbase_hwaccess_pm_halt(kbdev); + kbase_reset_gpu_term(kbdev); + kbase_hwaccess_pm_term(kbdev); +} + +/** + * kbase_csf_early_init - Early initialization for firmware & scheduler. + * @kbdev: Device pointer + * + * Return: 0 on success, error code otherwise. + */ +static int kbase_csf_early_init(struct kbase_device *kbdev) +{ + int err = kbase_csf_firmware_early_init(kbdev); + + if (err) + return err; + + err = kbase_csf_scheduler_early_init(kbdev); + + return err; +} + +/** + * kbase_csf_early_init - Early termination for firmware & scheduler. + * @kbdev: Device pointer + */ +static void kbase_csf_early_term(struct kbase_device *kbdev) +{ + kbase_csf_scheduler_early_term(kbdev); +} + +/** + * kbase_device_hwcnt_backend_csf_if_init - Create hardware counter backend + * firmware interface. + * @kbdev: Device pointer + */ +static int kbase_device_hwcnt_backend_csf_if_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_csf_if_fw_create( + kbdev, &kbdev->hwcnt_backend_csf_if_fw); +} + +/** + * kbase_device_hwcnt_backend_csf_if_term - Terminate hardware counter backend + * firmware interface. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_if_fw_destroy(&kbdev->hwcnt_backend_csf_if_fw); +} + +/** + * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend. + * @kbdev: Device pointer + */ + +static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_csf_create( + &kbdev->hwcnt_backend_csf_if_fw, + KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT, + &kbdev->hwcnt_gpu_iface); +} + +/** + * kbase_device_hwcnt_backend_csf_term - Terminate hardware counter backend. + * @kbdev: Device pointer + */ +static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_csf_destroy(&kbdev->hwcnt_gpu_iface); +} + +static const struct kbase_device_init dev_init[] = { + { assign_irqs, NULL, "IRQ search failed" }, + { registers_map, registers_unmap, "Register map failed" }, + { power_control_init, power_control_term, + "Power control initialization failed" }, + { kbase_device_io_history_init, kbase_device_io_history_term, + "Register access history initialization failed" }, + { kbase_device_early_init, kbase_device_early_term, + "Early device initialization failed" }, + { kbase_device_populate_max_freq, NULL, + "Populating max frequency failed" }, + { kbase_device_misc_init, kbase_device_misc_term, + "Miscellaneous device initialization failed" }, + { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, + "Priority control manager initialization failed" }, + { kbase_ctx_sched_init, kbase_ctx_sched_term, + "Context scheduler initialization failed" }, + { kbase_mem_init, kbase_mem_term, + "Memory subsystem initialization failed" }, + { kbase_csf_protected_memory_init, kbase_csf_protected_memory_term, + "Protected memory allocator initialization failed" }, + { kbase_device_coherency_init, NULL, "Device coherency init failed" }, + { kbase_protected_mode_init, kbase_protected_mode_term, + "Protected mode subsystem initialization failed" }, + { kbase_device_list_init, kbase_device_list_term, + "Device list setup failed" }, + { kbase_device_timeline_init, kbase_device_timeline_term, + "Timeline stream initialization failed" }, + { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, + "Clock rate trace manager initialization failed" }, + { kbase_device_hwcnt_backend_csf_if_init, + kbase_device_hwcnt_backend_csf_if_term, + "GPU hwcnt backend CSF interface creation failed" }, + { kbase_device_hwcnt_backend_csf_init, + kbase_device_hwcnt_backend_csf_term, + "GPU hwcnt backend creation failed" }, + { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, + "GPU hwcnt context initialization failed" }, + { kbase_backend_late_init, kbase_backend_late_term, + "Late backend initialization failed" }, + { kbase_csf_early_init, kbase_csf_early_term, + "Early CSF initialization failed" }, + { NULL, kbase_device_firmware_hwcnt_term, NULL }, +#ifdef MALI_KBASE_BUILD + { kbase_device_debugfs_init, kbase_device_debugfs_term, + "DebugFS initialization failed" }, + /* Sysfs init needs to happen before registering the device with + * misc_register(), otherwise it causes a race condition between + * registering the device and a uevent event being generated for + * userspace, causing udev rules to run which might expect certain + * sysfs attributes present. As a result of the race condition + * we avoid, some Mali sysfs entries may have appeared to udev + * to not exist. + * For more information, see + * https://www.kernel.org/doc/Documentation/driver-model/device.txt, the + * paragraph that starts with "Word of warning", currently the + * second-last paragraph. + */ + { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, + { kbase_device_misc_register, kbase_device_misc_deregister, + "Misc device registration failed" }, + { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, + "GPU property population failed" }, + { kbase_device_late_init, kbase_device_late_term, + "Late device initialization failed" }, +#endif +}; + +static void kbase_device_term_partial(struct kbase_device *kbdev, + unsigned int i) +{ + while (i-- > 0) { + if (dev_init[i].term) + dev_init[i].term(kbdev); + } +} + +void kbase_device_term(struct kbase_device *kbdev) +{ + kbdev->csf.mali_file_inode = NULL; + kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init)); + kbase_mem_halt(kbdev); +} + +int kbase_device_init(struct kbase_device *kbdev) +{ + int err = 0; + unsigned int i = 0; + + dev_info(kbdev->dev, "Kernel DDK version %s", MALI_RELEASE_NAME); + + kbase_device_id_init(kbdev); + kbase_disjoint_init(kbdev); + + for (i = 0; i < ARRAY_SIZE(dev_init); i++) { + if (dev_init[i].init) { + err = dev_init[i].init(kbdev); + if (err) { + dev_err(kbdev->dev, "%s error = %d\n", + dev_init[i].err_mes, err); + kbase_device_term_partial(kbdev, i); + break; + } + } + } + + return err; +} + +/** + * kbase_device_hwcnt_csf_deferred_init - Initialize CSF deferred HWC components + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * + * Hardware counter components depending on firmware are initialized after CSF + * firmware is loaded. + * + * @return 0 on success. An error code on failure. + */ +static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev) +{ + int ret = 0; + + /* For CSF GPUs, HWC metadata needs to query information from CSF + * firmware, so the initialization of HWC metadata only can be called + * after firmware initialized, but firmware initialization depends on + * HWC backend initialization, so we need to separate HWC backend + * metadata initialization from HWC backend initialization. + */ + ret = kbase_hwcnt_backend_csf_metadata_init(&kbdev->hwcnt_gpu_iface); + if (ret) { + dev_err(kbdev->dev, + "GPU hwcnt backend metadata creation failed"); + return ret; + } + + ret = kbase_hwcnt_virtualizer_init( + kbdev->hwcnt_gpu_ctx, + KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS, + &kbdev->hwcnt_gpu_virt); + if (ret) { + dev_err(kbdev->dev, + "GPU hwcnt virtualizer initialization failed"); + goto virt_fail; + } + + ret = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx); + if (ret) { + dev_err(kbdev->dev, + "Virtual instrumentation initialization failed"); + goto vinstr_fail; + } + + return ret; + +vinstr_fail: + kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt); + +virt_fail: + kbase_hwcnt_backend_csf_metadata_term(&kbdev->hwcnt_gpu_iface); + return ret; +} + +/** + * kbase_csf_firmware_deferred_init - Load and initialize CSF firmware + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * + * Called when a device file is opened for the first time. + * To meet Android GKI vendor guideline, firmware load is deferred at + * the time when @ref kbase_open is called for the first time. + * + * @return 0 on success. An error code on failure. + */ +static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev) +{ + int err = 0; + + lockdep_assert_held(&kbdev->fw_load_lock); + + kbase_pm_context_active(kbdev); + + err = kbase_csf_firmware_init(kbdev); + if (!err) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->pm.backend.mcu_state = KBASE_MCU_ON; + kbdev->csf.firmware_inited = true; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + dev_err(kbdev->dev, "Firmware initialization failed"); + } + + kbase_pm_context_idle(kbdev); + + return err; +} + +int kbase_device_firmware_init_once(struct kbase_device *kbdev) +{ + int ret = 0; + + mutex_lock(&kbdev->fw_load_lock); + + if (!kbdev->csf.firmware_inited) { + ret = kbase_csf_firmware_deferred_init(kbdev); + if (ret) + goto out; + + ret = kbase_device_hwcnt_csf_deferred_init(kbdev); + if (ret) { + kbase_csf_firmware_term(kbdev); + goto out; + } + + kbase_csf_debugfs_init(kbdev); + } + +out: + mutex_unlock(&kbdev->fw_load_lock); + + return ret; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c new file mode 100644 index 0000000..3fce637 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_csf.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * kbase_report_gpu_fault - Report a GPU fault of the device. + * + * @kbdev: Kbase device pointer + * @status: Fault status + * @as_nr: Faulty address space + * @as_valid: true if address space is valid + * + * This function is called from the interrupt handler when a GPU fault occurs. + */ +static void kbase_report_gpu_fault(struct kbase_device *kbdev, u32 status, + u32 as_nr, bool as_valid) +{ + u64 address = (u64) kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; + + address |= kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + + /* Report GPU fault for all contexts in case either + * the address space is invalid or it's MCU address space. + */ + meson_gpu_fault ++; + kbase_mmu_gpu_fault_interrupt(kbdev, status, as_nr, address, as_valid); +} + +static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) +{ + const u32 status = kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTSTATUS)); + const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; + const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> + GPU_FAULTSTATUS_JASID_SHIFT; + bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) == + GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT; + + if (bus_fault) { + /* If as_valid, reset gpu when ASID is for MCU. */ + if (!as_valid || (as_nr == MCU_AS_NR)) { + kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); + + dev_err(kbdev->dev, "GPU bus fault triggering gpu-reset ...\n"); + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } else { + /* Handle Bus fault */ + if (kbase_mmu_bus_fault_interrupt(kbdev, status, as_nr)) + dev_warn(kbdev->dev, + "fail to handle GPU bus fault ...\n"); + } + } else + kbase_report_gpu_fault(kbdev, status, as_nr, as_valid); +} + +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); + if (val & GPU_FAULT) + kbase_gpu_fault_interrupt(kbdev); + + if (val & GPU_PROTECTED_FAULT) { + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + unsigned long flags; + + dev_err_ratelimited(kbdev->dev, "GPU fault in protected mode"); + + /* Mask the protected fault interrupt to avoid the potential + * deluge of such interrupts. It will be unmasked on GPU reset. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + GPU_IRQ_REG_ALL & ~GPU_PROTECTED_FAULT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use( + kbdev))) { + struct base_gpu_queue_group_error const + err_payload = { .error_type = + BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { + .fatal_group = { + .status = + GPU_EXCEPTION_TYPE_SW_FAULT_0, + } } }; + + scheduler->active_protm_grp->faulted = true; + kbase_csf_add_group_fatal_error( + scheduler->active_protm_grp, &err_payload); + kbase_event_wakeup(scheduler->active_protm_grp->kctx); + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); + + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); + } + + if (val & RESET_COMPLETED) + kbase_pm_reset_done(kbdev); + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); + + /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must + * be called after the IRQ has been cleared. This is because it might + * trigger further power transitions and we don't want to miss the + * interrupt raised to notify us that these further transitions have + * finished. The same applies to kbase_clean_caches_done() - if another + * clean was queued, it might trigger another clean, which might + * generate another interrupt which shouldn't be missed. + */ + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + if (val & (POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)) { + kbase_pm_power_changed(kbdev); + } else if (val & CLEAN_CACHES_COMPLETED) { + /* If cache line evict messages can be lost when shader cores + * power down then we need to flush the L2 cache before powering + * down cores. When the flush completes, the shaders' state + * machine needs to be re-invoked to proceed with powering down + * cores. + */ + if (kbdev->pm.backend.l2_always_on || + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_pm_power_changed(kbdev); + } + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c new file mode 100644 index 0000000..384e385 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_hw_jm.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * kbase_report_gpu_fault - Report a GPU fault. + * @kbdev: Kbase device pointer + * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS + * was also set + * + * This function is called from the interrupt handler when a GPU fault occurs. + * It reports the details of the fault using dev_warn(). + */ +static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) +{ + u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); + u64 address = (u64) kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; + + address |= kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + meson_gpu_fault ++; + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", + status, + kbase_gpu_exception_name(status & 0xFF), + address); + if (multiple) + dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); +} + +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, val); + if (val & GPU_FAULT) + kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); + + if (val & RESET_COMPLETED) + kbase_pm_reset_done(kbdev); + + if (val & PRFCNT_SAMPLE_COMPLETED) + kbase_instr_hwcnt_sample_done(kbdev); + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val); + + /* kbase_pm_check_transitions (called by kbase_pm_power_changed) must + * be called after the IRQ has been cleared. This is because it might + * trigger further power transitions and we don't want to miss the + * interrupt raised to notify us that these further transitions have + * finished. The same applies to kbase_clean_caches_done() - if another + * clean was queued, it might trigger another clean, which might + * generate another interrupt which shouldn't be missed. + */ + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + if (val & POWER_CHANGED_ALL) { + kbase_pm_power_changed(kbdev); + } else if (val & CLEAN_CACHES_COMPLETED) { + /* If cache line evict messages can be lost when shader cores + * power down then we need to flush the L2 cache before powering + * down cores. When the flush completes, the shaders' state + * machine needs to be re-invoked to proceed with powering down + * cores. + */ + if (kbdev->pm.backend.l2_always_on || + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) + kbase_pm_power_changed(kbdev); + } + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c index fbba2e7..6a6ab60 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/backend/mali_kbase_device_jm.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,21 +17,17 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ -#include "../mali_kbase_device_internal.h" -#include "../mali_kbase_device.h" +#include +#include +#include #include #include #include #include -#ifdef CONFIG_MALI_NO_MALI -#include -#endif #ifdef CONFIG_MALI_ARBITER_SUPPORT #include @@ -43,6 +39,7 @@ #include #include #include +#include /** * kbase_backend_late_init - Perform any backend-specific initialization. @@ -95,9 +92,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_devfreq_init; - /* Idle the GPU and/or cores, if the policy wants it to */ - kbase_pm_context_idle(kbdev); - /* Update gpuprops with L2_FEATURES if applicable */ err = kbase_gpuprops_update_l2_features(kbdev); if (err) @@ -105,9 +99,15 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + mutex_init(&kbdev->fw_load_lock); + return 0; fail_update_l2_features: + kbase_backend_devfreq_term(kbdev); fail_devfreq_init: kbase_job_slot_term(kbdev); fail_job_slot: @@ -120,6 +120,7 @@ fail_interrupt_test: kbase_backend_timer_term(kbdev); fail_timer: + kbase_pm_context_idle(kbdev); kbase_hwaccess_pm_halt(kbdev); fail_pm_powerup: kbase_reset_gpu_term(kbdev); @@ -144,57 +145,65 @@ static void kbase_backend_late_term(struct kbase_device *kbdev) kbase_hwaccess_pm_term(kbdev); } +static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev) +{ + return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface); +} + +static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev) +{ + kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface); +} + static const struct kbase_device_init dev_init[] = { -#ifdef CONFIG_MALI_NO_MALI - {kbase_gpu_device_create, kbase_gpu_device_destroy, - "Dummy model initialization failed"}, -#else - {assign_irqs, NULL, - "IRQ search failed"}, - {registers_map, registers_unmap, - "Register map failed"}, -#endif - {kbase_device_io_history_init, kbase_device_io_history_term, - "Register access history initialization failed"}, - {kbase_device_pm_init, kbase_device_pm_term, - "Power management initialization failed"}, - {kbase_device_early_init, kbase_device_early_term, - "Early device initialization failed"}, - {kbase_device_populate_max_freq, NULL, - "Populating max frequency failed"}, - {kbase_device_misc_init, kbase_device_misc_term, - "Miscellaneous device initialization failed"}, - {kbase_ctx_sched_init, kbase_ctx_sched_term, - "Context scheduler initialization failed"}, - {kbase_mem_init, kbase_mem_term, - "Memory subsystem initialization failed"}, - {kbase_device_coherency_init, NULL, - "Device coherency init failed"}, - {kbase_protected_mode_init, kbase_protected_mode_term, - "Protected mode subsystem initialization failed"}, - {kbase_device_list_init, kbase_device_list_term, - "Device list setup failed"}, - {kbasep_js_devdata_init, kbasep_js_devdata_term, - "Job JS devdata initialization failed"}, - {kbase_device_timeline_init, kbase_device_timeline_term, - "Timeline stream initialization failed"}, - {kbase_device_hwcnt_backend_gpu_init, - kbase_device_hwcnt_backend_gpu_term, - "GPU hwcnt backend creation failed"}, - {kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, - "GPU hwcnt context initialization failed"}, - {kbase_device_hwcnt_virtualizer_init, - kbase_device_hwcnt_virtualizer_term, - "GPU hwcnt virtualizer initialization failed"}, - {kbase_device_vinstr_init, kbase_device_vinstr_term, - "Virtual instrumentation initialization failed"}, - {kbase_backend_late_init, kbase_backend_late_term, - "Late backend initialization failed"}, + { assign_irqs, NULL, "IRQ search failed" }, + { registers_map, registers_unmap, "Register map failed" }, + { kbase_device_io_history_init, kbase_device_io_history_term, + "Register access history initialization failed" }, + { kbase_device_pm_init, kbase_device_pm_term, + "Power management initialization failed" }, + { kbase_device_early_init, kbase_device_early_term, + "Early device initialization failed" }, + { kbase_device_populate_max_freq, NULL, + "Populating max frequency failed" }, + { kbase_device_misc_init, kbase_device_misc_term, + "Miscellaneous device initialization failed" }, + { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, + "Priority control manager initialization failed" }, + { kbase_ctx_sched_init, kbase_ctx_sched_term, + "Context scheduler initialization failed" }, + { kbase_mem_init, kbase_mem_term, + "Memory subsystem initialization failed" }, + { kbase_device_coherency_init, NULL, "Device coherency init failed" }, + { kbase_protected_mode_init, kbase_protected_mode_term, + "Protected mode subsystem initialization failed" }, + { kbase_device_list_init, kbase_device_list_term, + "Device list setup failed" }, + { kbasep_js_devdata_init, kbasep_js_devdata_term, + "Job JS devdata initialization failed" }, + { kbase_device_timeline_init, kbase_device_timeline_term, + "Timeline stream initialization failed" }, + { kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term, + "Clock rate trace manager initialization failed" }, + { kbase_instr_backend_init, kbase_instr_backend_term, + "Instrumentation backend initialization failed" }, + { kbase_device_hwcnt_backend_jm_init, + kbase_device_hwcnt_backend_jm_term, + "GPU hwcnt backend creation failed" }, + { kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term, + "GPU hwcnt context initialization failed" }, + { kbase_device_hwcnt_virtualizer_init, + kbase_device_hwcnt_virtualizer_term, + "GPU hwcnt virtualizer initialization failed" }, + { kbase_device_vinstr_init, kbase_device_vinstr_term, + "Virtual instrumentation initialization failed" }, + { kbase_backend_late_init, kbase_backend_late_term, + "Late backend initialization failed" }, #ifdef MALI_KBASE_BUILD - {kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, - "Job fault debug initialization failed"}, - {kbase_device_debugfs_init, kbase_device_debugfs_term, - "DebugFS initialization failed"}, + { kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term, + "Job fault debug initialization failed" }, + { kbase_device_debugfs_init, kbase_device_debugfs_term, + "DebugFS initialization failed" }, /* Sysfs init needs to happen before registering the device with * misc_register(), otherwise it causes a race condition between * registering the device and a uevent event being generated for @@ -207,17 +216,15 @@ static const struct kbase_device_init dev_init[] = { * paragraph that starts with "Word of warning", currently the * second-last paragraph. */ - {kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed"}, - {kbase_device_misc_register, kbase_device_misc_deregister, - "Misc device registration failed"}, -#ifdef CONFIG_MALI_BUSLOG - {buslog_init, buslog_term, "Bus log client registration failed"}, + { kbase_sysfs_init, kbase_sysfs_term, "SysFS group creation failed" }, + { kbase_device_misc_register, kbase_device_misc_deregister, + "Misc device registration failed" }, + { kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, + "GPU property population failed" }, #endif - {kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer, - "GPU property population failed"}, -#endif - {kbase_dummy_job_wa_load, kbase_dummy_job_wa_cleanup, - "Dummy job workaround load failed"}, + { NULL, kbase_dummy_job_wa_cleanup, NULL }, + { kbase_device_late_init, kbase_device_late_term, + "Late device initialization failed" }, }; static void kbase_device_term_partial(struct kbase_device *kbdev, @@ -247,14 +254,34 @@ int kbase_device_init(struct kbase_device *kbdev) kbase_disjoint_init(kbdev); for (i = 0; i < ARRAY_SIZE(dev_init); i++) { - err = dev_init[i].init(kbdev); - if (err) { - dev_err(kbdev->dev, "%s error = %d\n", + if (dev_init[i].init) { + err = dev_init[i].init(kbdev); + if (err) { + if (err != -EPROBE_DEFER) + dev_err(kbdev->dev, "%s error = %d\n", dev_init[i].err_mes, err); - kbase_device_term_partial(kbdev, i); - break; + kbase_device_term_partial(kbdev, i); + break; + } } } return err; } + +int kbase_device_firmware_init_once(struct kbase_device *kbdev) +{ + int ret = 0; + + mutex_lock(&kbdev->fw_load_lock); + + if (!kbdev->dummy_job_wa_loaded) { + ret = kbase_dummy_job_wa_load(kbdev); + if (!ret) + kbdev->dummy_job_wa_loaded = true; + } + + mutex_unlock(&kbdev->fw_load_lock); + + return ret; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c index 76f14e5..1ebd8aa 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Base kernel device APIs */ @@ -34,12 +30,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include "mali_kbase_vinstr.h" @@ -50,6 +48,7 @@ #include "mali_kbase_device_internal.h" #include "backend/gpu/mali_kbase_pm_internal.h" #include "backend/gpu/mali_kbase_irq_internal.h" +#include "mali_kbase_regs_history_debugfs.h" #ifdef CONFIG_MALI_ARBITER_SUPPORT #include "arbiter/mali_kbase_arbiter_pm.h" @@ -75,64 +74,152 @@ struct kbase_device *kbase_device_alloc(void) return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); } -static int kbase_device_as_init(struct kbase_device *kbdev, int i) +/** + * kbase_device_all_as_init() - Initialise address space objects of the device. + * + * @kbdev: Pointer to kbase device. + * + * Return: 0 on success otherwise non-zero. + */ +static int kbase_device_all_as_init(struct kbase_device *kbdev) { - kbdev->as[i].number = i; - kbdev->as[i].bf_data.addr = 0ULL; - kbdev->as[i].pf_data.addr = 0ULL; + int i, err = 0; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); - if (!kbdev->as[i].pf_wq) - return -EINVAL; + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + err = kbase_mmu_as_init(kbdev, i); + if (err) + break; + } - INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); - INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); + if (err) { + while (i-- > 0) + kbase_mmu_as_term(kbdev, i); + } - return 0; + return err; } -static void kbase_device_as_term(struct kbase_device *kbdev, int i) +static void kbase_device_all_as_term(struct kbase_device *kbdev) { - destroy_workqueue(kbdev->as[i].pf_wq); + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) + kbase_mmu_as_term(kbdev, i); } -static int kbase_device_all_as_init(struct kbase_device *kbdev) +int kbase_device_pcm_dev_init(struct kbase_device *const kbdev) { - int i, err; + int err = 0; - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { - err = kbase_device_as_init(kbdev, i); - if (err) - goto free_workqs; - } +#if IS_ENABLED(CONFIG_OF) + struct device_node *prio_ctrl_node; - return 0; - -free_workqs: - for (; i > 0; i--) - kbase_device_as_term(kbdev, i); + /* Check to see whether or not a platform specific priority control manager + * is available. + */ + prio_ctrl_node = of_parse_phandle(kbdev->dev->of_node, + "priority-control-manager", 0); + if (!prio_ctrl_node) { + dev_info(kbdev->dev, + "No priority control manager is configured"); + } else { + struct platform_device *const pdev = + of_find_device_by_node(prio_ctrl_node); + + if (!pdev) { + dev_err(kbdev->dev, + "The configured priority control manager was not found"); + } else { + struct priority_control_manager_device *pcm_dev = + platform_get_drvdata(pdev); + if (!pcm_dev) { + dev_info(kbdev->dev, "Priority control manager is not ready"); + err = -EPROBE_DEFER; + } else if (!try_module_get(pcm_dev->owner)) { + dev_err(kbdev->dev, "Failed to get priority control manager module"); + err = -ENODEV; + } else { + dev_info(kbdev->dev, "Priority control manager successfully loaded"); + kbdev->pcm_dev = pcm_dev; + } + } + of_node_put(prio_ctrl_node); + } +#endif /* CONFIG_OF */ return err; } -static void kbase_device_all_as_term(struct kbase_device *kbdev) +void kbase_device_pcm_dev_term(struct kbase_device *const kbdev) { - int i; + if (kbdev->pcm_dev) + module_put(kbdev->pcm_dev->owner); +} - for (i = 0; i < kbdev->nr_hw_address_spaces; i++) - kbase_device_as_term(kbdev, i); +#define KBASE_PAGES_TO_KIB(pages) (((unsigned int)pages) << (PAGE_SHIFT - 10)) + +/** + * mali_oom_notifier_handler - Mali driver out-of-memory handler + * + * @nb - notifier block - used to retrieve kbdev pointer + * @action - action (unused) + * @data - data pointer (unused) + * This function simply lists memory usage by the Mali driver, per GPU device, + * for diagnostic purposes. + */ +static int mali_oom_notifier_handler(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct kbase_device *kbdev; + struct kbase_context *kctx = NULL; + unsigned long kbdev_alloc_total; + + if (WARN_ON(nb == NULL)) + return NOTIFY_BAD; + + kbdev = container_of(nb, struct kbase_device, oom_notifier_block); + + kbdev_alloc_total = + KBASE_PAGES_TO_KIB(atomic_read(&(kbdev->memdev.used_pages))); + + dev_err(kbdev->dev, "OOM notifier: dev %s %lu kB\n", kbdev->devname, + kbdev_alloc_total); + + mutex_lock(&kbdev->kctx_list_lock); + + list_for_each_entry (kctx, &kbdev->kctx_list, kctx_list_link) { + struct pid *pid_struct; + struct task_struct *task; + unsigned long task_alloc_total = + KBASE_PAGES_TO_KIB(atomic_read(&(kctx->used_pages))); + + rcu_read_lock(); + pid_struct = find_get_pid(kctx->pid); + task = pid_task(pid_struct, PIDTYPE_PID); + + dev_err(kbdev->dev, + "OOM notifier: tsk %s tgid (%u) pid (%u) %lu kB\n", + task ? task->comm : "[null task]", kctx->tgid, + kctx->pid, task_alloc_total); + + put_pid(pid_struct); + rcu_read_unlock(); + } + + mutex_unlock(&kbdev->kctx_list_lock); + return NOTIFY_OK; } int kbase_device_misc_init(struct kbase_device * const kbdev) { int err; -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) struct device_node *np = NULL; #endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); mutex_init(&kbdev->mmu_hw_mutex); -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) kbdev->cci_snoop_enabled = false; np = kbdev->dev->of_node; if (np != NULL) { @@ -153,6 +240,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) } } #endif /* CONFIG_ARM64 */ + /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register) */ @@ -169,11 +257,6 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) if (err) goto fail; - /* On Linux 4.0+, dma coherency is determined from device tree */ -#if defined(CONFIG_ARM64) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) - set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); -#endif - /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our * device structure was created by device-tree */ @@ -194,9 +277,7 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) err = kbase_device_all_as_init(kbdev); if (err) - goto as_init_failed; - - spin_lock_init(&kbdev->hwcnt.lock); + goto dma_set_mask_failed; err = kbase_ktrace_init(kbdev); if (err) @@ -208,30 +289,28 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) atomic_set(&kbdev->ctx_num, 0); - err = kbase_instr_backend_init(kbdev); - if (err) - goto term_trace; - kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - else - kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); mutex_init(&kbdev->kctx_list_lock); INIT_LIST_HEAD(&kbdev->kctx_list); - spin_lock_init(&kbdev->hwaccess_lock); + dev_dbg(kbdev->dev, "Registering mali_oom_notifier_handlern"); + kbdev->oom_notifier_block.notifier_call = mali_oom_notifier_handler; + err = register_oom_notifier(&kbdev->oom_notifier_block); + if (err) { + dev_err(kbdev->dev, + "Unable to register OOM notifier for Mali - but will continue\n"); + kbdev->oom_notifier_block.notifier_call = NULL; + } return 0; -term_trace: - kbase_ktrace_term(kbdev); + term_as: kbase_device_all_as_term(kbdev); -as_init_failed: dma_set_mask_failed: fail: return err; @@ -247,11 +326,12 @@ void kbase_device_misc_term(struct kbase_device *kbdev) kbase_debug_assert_register_hook(NULL, NULL); #endif - kbase_instr_backend_term(kbdev); - kbase_ktrace_term(kbdev); kbase_device_all_as_term(kbdev); + + if (kbdev->oom_notifier_block.notifier_call) + unregister_oom_notifier(&kbdev->oom_notifier_block); } void kbase_device_free(struct kbase_device *kbdev) @@ -271,16 +351,6 @@ void kbase_increment_device_id(void) kbase_dev_nr++; } -int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev) -{ - return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface); -} - -void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev) -{ - kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface); -} - int kbase_device_hwcnt_context_init(struct kbase_device *kbdev) { return kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface, @@ -400,7 +470,18 @@ int kbase_device_early_init(struct kbase_device *kbdev) /* We're done accessing the GPU registers for now. */ kbase_pm_register_access_disable(kbdev); + /* This spinlock has to be initialized before installing interrupt + * handlers that require to hold it to process interrupts. + */ + spin_lock_init(&kbdev->hwaccess_lock); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + if (kbdev->arb.arb_if) + err = kbase_arbiter_pm_install_interrupts(kbdev); + else + err = kbase_install_interrupts(kbdev); +#else err = kbase_install_interrupts(kbdev); +#endif if (err) goto fail_interrupts; @@ -427,3 +508,17 @@ void kbase_device_early_term(struct kbase_device *kbdev) kbase_pm_runtime_term(kbdev); kbasep_platform_device_term(kbdev); } + +int kbase_device_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbasep_platform_device_late_init(kbdev); + + return err; +} + +void kbase_device_late_term(struct kbase_device *kbdev) +{ + kbasep_platform_device_late_term(kbdev); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h index 16f1d70..517c16b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -47,6 +46,19 @@ void kbase_device_put_list(const struct list_head *dev_list); */ void kbase_increment_device_id(void); +/** + * kbase_device_firmware_init_once - Initialize firmware and HWC + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * + * When a device file is opened for the first time, + * load firmware and initialize hardware counter components. + * + * @return 0 on success. An error code on failure. + */ +int kbase_device_firmware_init_once(struct kbase_device *kbdev); + /** * kbase_device_init - Device initialisation. * @@ -69,3 +81,109 @@ int kbase_device_init(struct kbase_device *kbdev); * */ void kbase_device_term(struct kbase_device *kbdev); + +/** + * kbase_reg_write - write to GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @value: Value to write + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + */ +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value); + +/** + * kbase_reg_read - read from GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). + * + * Return: Value in desired register + */ +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset); + +/** + * kbase_is_gpu_removed() - Has the GPU been removed. + * @kbdev: Kbase device pointer + * + * When Kbase takes too long to give up the GPU, the Arbiter + * can remove it. This will then be followed by a GPU lost event. + * This function will return true if the GPU has been removed. + * When this happens register reads will be zero. A zero GPU_ID is + * invalid so this is used to detect when GPU is removed. + * + * Return: True if GPU removed + */ +bool kbase_is_gpu_removed(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. This function will + * take hwaccess_lock. + */ +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_start_cache_clean_nolock - Start a cache clean + * @kbdev: Kbase device + * + * Issue a cache clean and invalidate command to hardware. hwaccess_lock + * must be held by the caller. + */ +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish + * @kbdev: Kbase device + * + * This function will take hwaccess_lock, and may sleep. + */ +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); + +/** + * kbase_gpu_wait_cache_clean_timeout - Wait for certain time for cache + * cleaning to finish + * @kbdev: Kbase device + * @wait_timeout_ms: Time in milliseconds, to wait for cache clean to complete. + * + * This function will take hwaccess_lock, and may sleep. This is supposed to be + * called from paths (like GPU reset) where an indefinite wait for the + * completion of cache clean operation can cause deadlock, as the operation may + * never complete. + * + * Return: 0 if successful or a negative error code on failure. + */ +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms); + +/** + * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is + * finished. Would also be called after + * the GPU reset. + * @kbdev: Kbase device + * + * Caller must hold the hwaccess_lock. + */ +void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev); + +/** + * kbase_clean_caches_done - Issue preiously queued cache clean request or + * wake up the requester that issued cache clean. + * @kbdev: Kbase device + * + * Caller must hold the hwaccess_lock. + */ +void kbase_clean_caches_done(struct kbase_device *kbdev); + +/** + * kbase_gpu_interrupt - GPU interrupt handler + * @kbdev: Kbase device pointer + * @val: The value of the GPU IRQ status register which triggered the call + * + * This function is called from the interrupt handler when a GPU irq is to be + * handled. + */ +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c new file mode 100644 index 0000000..e80559a --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_hw.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2014-2016, 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value) +{ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + + writel(value, kbdev->reg + offset); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + value, 1); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "w: reg %08x val %08x", offset, value); +} + +KBASE_EXPORT_TEST_API(kbase_reg_write); + +u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) +{ + u32 val; + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + + val = readl(kbdev->reg + offset); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + if (unlikely(kbdev->io_history.enabled)) + kbase_io_history_add(&kbdev->io_history, kbdev->reg + offset, + val, 0); +#endif /* CONFIG_DEBUG_FS */ + dev_dbg(kbdev->dev, "r: reg %08x val %08x", offset, val); + + return val; +} + +KBASE_EXPORT_TEST_API(kbase_reg_read); + +bool kbase_is_gpu_removed(struct kbase_device *kbdev) +{ + u32 val; + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); + + return val == 0; +} +#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ + +void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev) +{ + u32 irq_mask; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (kbdev->cache_clean_in_progress) { + /* If this is called while another clean is in progress, we + * can't rely on the current one to flush any new changes in + * the cache. Instead, trigger another cache clean immediately + * after this one finishes. + */ + kbdev->cache_clean_queued = true; + return; + } + + /* Enable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask | CLEAN_CACHES_COMPLETED); + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + + kbdev->cache_clean_in_progress = true; +} + +void kbase_gpu_start_cache_clean(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_start_cache_clean_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbdev->cache_clean_queued = false; + kbdev->cache_clean_in_progress = false; + wake_up(&kbdev->cache_clean_wait); +} + +void kbase_clean_caches_done(struct kbase_device *kbdev) +{ + u32 irq_mask; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if (kbdev->cache_clean_queued) { + kbdev->cache_clean_queued = false; + + KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES); + } else { + /* Disable interrupt */ + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED); + + kbase_gpu_cache_clean_wait_complete(kbdev); + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +static inline bool get_cache_clean_flag(struct kbase_device *kbdev) +{ + bool cache_clean_in_progress; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + cache_clean_in_progress = kbdev->cache_clean_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return cache_clean_in_progress; +} + +void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) +{ + while (get_cache_clean_flag(kbdev)) { + wait_event_interruptible(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress); + } +} + +int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, + unsigned int wait_timeout_ms) +{ + long remaining = msecs_to_jiffies(wait_timeout_ms); + + while (remaining && get_cache_clean_flag(kbdev)) { + remaining = wait_event_timeout(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress, + remaining); + } + + return (remaining ? 0 : -ETIMEDOUT); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h index 9f96db0..d422407 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/device/mali_kbase_device_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -43,9 +42,6 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev); int kbase_device_timeline_init(struct kbase_device *kbdev); void kbase_device_timeline_term(struct kbase_device *kbdev); -int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev); -void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev); - int kbase_device_hwcnt_context_init(struct kbase_device *kbdev); void kbase_device_hwcnt_context_term(struct kbase_device *kbdev); @@ -76,3 +72,17 @@ int kbase_device_early_init(struct kbase_device *kbdev); * @kbdev: Device pointer */ void kbase_device_early_term(struct kbase_device *kbdev); + +/** + * kbase_device_late_init - Complete any device-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_device_late_init(struct kbase_device *kbdev); + +/** + * kbase_device_late_term - Complete any device-specific termination. + * @kbdev: Device pointer + */ +void kbase_device_late_term(struct kbase_device *kbdev); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild new file mode 100644 index 0000000..c3ab811 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/Kbuild @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2012, 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += gpu/mali_kbase_gpu.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += gpu/backend/mali_kbase_gpu_fault_csf.o +else + mali_kbase-y += gpu/backend/mali_kbase_gpu_fault_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c new file mode 100644 index 0000000..f9d4c14 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_csf.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include + +const char *kbase_gpu_exception_name(u32 const exception_code) +{ + const char *e; + + switch (exception_code) { + /* CS exceptions */ + case CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED: + e = "CS_RESOURCE_TERMINATED"; + break; + case CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT: + e = "CS_INHERIT_FAULT"; + break; + /* CS fatal exceptions */ + case CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT: + e = "CS_CONFIG_FAULT"; + break; + case CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT: + e = "FATAL_CS_ENDPOINT_FAULT"; + break; + case CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT: + e = "FATAL_CS_BUS_FAULT"; + break; + case CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION: + e = "FATAL_CS_INVALID_INSTRUCTION"; + break; + case CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW: + e = "FATAL_CS_CALL_STACK_OVERFLOW"; + break; + /* Shader exceptions */ + case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC: + e = "INSTR_INVALID_PC"; + break; + case CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC: + e = "INSTR_INVALID_ENC"; + break; + case CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT: + e = "INSTR_BARRIER_FAULT"; + break; + /* Misc exceptions */ + case CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT: + e = "DATA_INVALID_FAULT"; + break; + case CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT: + e = "TILE_RANGE_FAULT"; + break; + case CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT: + e = "ADDR_RANGE_FAULT"; + break; + case CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT: + e = "IMPRECISE_FAULT"; + break; + /* FW exceptions */ + case CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR: + e = "FIRMWARE_INTERNAL_ERROR"; + break; + case CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT: + e = "RESOURCE_EVICTION_TIMEOUT"; + break; + /* GPU Fault */ + case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT: + e = "GPU_BUS_FAULT"; + break; + case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT: + e = "GPU_SHAREABILITY_FAULT"; + break; + case GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT: + e = "SYSTEM_SHAREABILITY_FAULT"; + break; + case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT: + e = "GPU_CACHEABILITY_FAULT"; + break; + /* Any other exception code is unknown */ + default: + e = "UNKNOWN"; + break; + } + + return e; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c index 63132dc..37015cc 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_fault_jm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include -#include "../mali_kbase_gpu_fault.h" +#include const char *kbase_gpu_exception_name(u32 const exception_code) { @@ -119,8 +118,6 @@ const char *kbase_gpu_exception_name(u32 const exception_code) e = "TRANSLATION_FAULT"; break; case 0xC8: - e = "PERMISSION_FAULT"; - break; case 0xC9: case 0xCA: case 0xCB: @@ -141,8 +138,6 @@ const char *kbase_gpu_exception_name(u32 const exception_code) e = "TRANSTAB_BUS_FAULT"; break; case 0xD8: - e = "ACCESS_FLAG"; - break; case 0xD9: case 0xDA: case 0xDB: diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c index 3128db4..8a84ef5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include +#include const char *kbase_gpu_access_type_name(u32 fault_status) { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h index b59b9d1..d1e9f77 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_fault.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,14 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_GPU_FAULT_H_ #define _KBASE_GPU_FAULT_H_ -/** Returns the name associated with a Mali exception code - * +/** + * kbase_gpu_exception_name() - + * Returns the name associated with a Mali exception code * @exception_code: exception code * * This function is called from the interrupt handler when a GPU fault occurs. @@ -33,17 +33,6 @@ */ const char *kbase_gpu_exception_name(u32 exception_code); -/** Returns the name associated with a Mali fatal exception code - * - * @fatal_exception_code: fatal exception code - * - * This function is called from the interrupt handler when a GPU fatal - * exception occurs. - * - * Return: name associated with the fatal exception code - */ -const char *kbase_gpu_fatal_exception_name(u32 const fatal_exception_code); - /** * kbase_gpu_access_type_name - Convert MMU_AS_CONTROL.FAULTSTATUS.ACCESS_TYPE * into string. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h index 759f30d..47e7781 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,422 +17,17 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_GPU_REGMAP_H_ #define _KBASE_GPU_REGMAP_H_ -#include "mali_kbase_gpu_coherency.h" -#include "mali_kbase_gpu_id.h" -#include "backend/mali_kbase_gpu_regmap_jm.h" - -/* Begin Register Offsets */ -/* GPU control registers */ - -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ -#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ -#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ -#define MEM_FEATURES 0x010 /* (RO) Memory system features */ -#define MMU_FEATURES 0x014 /* (RO) MMU features */ -#define AS_PRESENT 0x018 /* (RO) Address space slots present */ -#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_MASK 0x028 /* (RW) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - -#define GPU_COMMAND 0x030 /* (WO) */ -#define GPU_STATUS 0x034 /* (RO) */ - -#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ - -#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ -#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ -#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ - -#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ - -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core - * supergroup are l2 coherent - */ - -#define PWR_KEY 0x050 /* (WO) Power manager key register */ -#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ -#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ - -#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ -#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ -#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ -#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ - -#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ -#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ -#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ -#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ -#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ - -#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ -#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ -#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ -#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ - -#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) - -#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ -#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ - -#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ -#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ - -#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ -#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ - -#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ -#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ - -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - -#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ -#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ - -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ - -#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ -#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ - -#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ -#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ - -#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ -#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ - -#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ -#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ - -#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ -#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ - -#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ -#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ - -#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ -#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ - -#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ -#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ - -#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ -#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ - -#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ -#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ - -#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ -#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ - -#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ -#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ - -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ - -#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ -#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ -#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ - -/* Job control registers */ - -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - -#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ - -/* MMU control registers */ - -#define MEMORY_MANAGEMENT_BASE 0x2000 -#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) - -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ -#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ -#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ -#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ -#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ -#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ -#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ -#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ -#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ -#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ -#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ -#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ -#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ -#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ -#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ -#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ - -/* MMU address space control registers */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ -#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C - -/* End Register Offsets */ - -/* IRQ flags */ -#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ -#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ -#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ -#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ -#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ - -#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ -#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ - -/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. - */ -#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ - | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) +#include +/* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */ #ifdef CONFIG_MALI_DEBUG +#undef GPU_IRQ_REG_ALL #define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE) -#else /* CONFIG_MALI_DEBUG */ -#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) #endif /* CONFIG_MALI_DEBUG */ -/* - * MMU_IRQ_RAWSTAT register values. Values are valid also for - * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. - */ - -#define MMU_PAGE_FAULT_FLAGS 16 - -/* Macros returning a bitmask to retrieve page fault or bus error flags from - * MMU registers */ -#define MMU_PAGE_FAULT(n) (1UL << (n)) -#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) - -/* - * Begin LPAE MMU TRANSTAB register values - */ -#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 -#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) -#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) -#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) -#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) -#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) - -#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 - -/* - * Begin AARCH64 MMU TRANSTAB register values - */ -#define MMU_HW_OUTA_BITS 40 -#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) - -/* - * Begin MMU STATUS register values - */ -#define AS_STATUS_AS_ACTIVE 0x01 - -#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) - -#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 -#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) -#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) - -#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 -#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) -#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) - -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) -#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) -#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) - -#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 -#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) -#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ - (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) - -/* - * Begin MMU TRANSCFG register values - */ -#define AS_TRANSCFG_ADRMODE_LEGACY 0 -#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 - -#define AS_TRANSCFG_ADRMODE_MASK 0xF - -/* - * Begin TRANSCFG register values - */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) -#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) -#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) - -/* - * Begin Command Values - */ - -/* AS_COMMAND register commands */ -#define AS_COMMAND_NOP 0x00 /* NOP Operation */ -#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ -#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ -#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs - (deprecated - only for use with T60x) */ -#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ -#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then - flush all L2 caches then issue a flush region command to all MMUs */ - -/* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ - -/* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - -/* The performance counters are disabled. */ -#define PRFCNT_CONFIG_MODE_OFF 0 -/* The performance counters are enabled, but are only written out when a - * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. - */ -#define PRFCNT_CONFIG_MODE_MANUAL 1 -/* The performance counters are enabled, and are written out each time a tile - * finishes rendering. - */ -#define PRFCNT_CONFIG_MODE_TILE 2 - -/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_WRITE_ALLOC 0x8Dull - -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull -/* There is no LPAE support for non-cacheable, since the memory type is always - * write-back. - * Marking this setting as reserved for LPAE - */ -#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED - -/* L2_MMU_CONFIG register */ -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) - -/* End L2_MMU_CONFIG register */ - -/* THREAD_* registers */ - -/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ -#define IMPLEMENTATION_UNSPECIFIED 0 -#define IMPLEMENTATION_SILICON 1 -#define IMPLEMENTATION_FPGA 2 -#define IMPLEMENTATION_MODEL 3 - -/* Default values when registers are not supported by the implemented hardware */ -#define THREAD_MT_DEFAULT 256 -#define THREAD_MWS_DEFAULT 256 -#define THREAD_MBS_DEFAULT 256 -#define THREAD_MR_DEFAULT 1024 -#define THREAD_MTQ_DEFAULT 4 -#define THREAD_MTGS_DEFAULT 10 - -/* End THREAD_* registers */ - -/* SHADER_CONFIG register */ -#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) -#define SC_TLS_HASH_ENABLE (1ul << 17) -#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) -#define SC_VAR_ALGORITHM (1ul << 29) -/* End SHADER_CONFIG register */ - -/* TILER_CONFIG register */ -#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) -/* End TILER_CONFIG register */ - -/* L2_CONFIG register */ -#define L2_CONFIG_SIZE_SHIFT 16 -#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) -#define L2_CONFIG_HASH_SHIFT 24 -#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) -/* End L2_CONFIG register */ - -/* IDVS_GROUP register */ -#define IDVS_GROUP_SIZE_SHIFT (16) -#define IDVS_GROUP_MAX_SIZE (0x3F) - #endif /* _KBASE_GPU_REGMAP_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild index 3d9cf80..96977e9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,14 +16,20 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # mali_kbase-y += \ - ipa/mali_kbase_ipa_simple.o \ - ipa/mali_kbase_ipa.o \ - ipa/mali_kbase_ipa_vinstr_g7x.o \ - ipa/mali_kbase_ipa_vinstr_common.o + ipa/mali_kbase_ipa_simple.o \ + ipa/mali_kbase_ipa.o + +mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o -mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o \ No newline at end of file +ifeq ($(MALI_USE_CSF),1) + mali_kbase-y += \ + ipa/backend/mali_kbase_ipa_counter_csf.o \ + ipa/backend/mali_kbase_ipa_counter_common_csf.o +else + mali_kbase-y += \ + ipa/backend/mali_kbase_ipa_counter_jm.o \ + ipa/backend/mali_kbase_ipa_counter_common_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c new file mode 100644 index 0000000..81dc56b --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_ipa_counter_common_csf.h" +#include "ipa/mali_kbase_ipa_debugfs.h" + +#define DEFAULT_SCALING_FACTOR 5 + +/* If the value of GPU_ACTIVE is below this, use the simple model + * instead, to avoid extrapolating small amounts of counter data across + * large sample periods. + */ +#define DEFAULT_MIN_SAMPLE_CYCLES 10000 + +/* Typical value for the sampling interval is expected to be less than 100ms, + * So 5 seconds is a reasonable upper limit for the time gap between the + * 2 samples. + */ +#define MAX_SAMPLE_INTERVAL_MS ((s64)5000) + +/* Maximum increment that is expected for a counter value during a sampling + * interval is derived assuming + * - max sampling interval of 1 second. + * - max GPU frequency of 2 GHz. + * - max number of cores as 32. + * - max increment of 4 in per core counter value at every clock cycle. + * + * So max increment = 2 * 10^9 * 32 * 4 = ~2^38. + * If a counter increases by an amount greater than this value, then an error + * will be returned and the simple power model will be used. + */ +#define MAX_COUNTER_INCREMENT (((u64)1 << 38) - 1) + +static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) +{ + s64 rtn; + + if (a > 0 && (S64_MAX - a) < b) + rtn = S64_MAX; + else if (a < 0 && (S64_MIN - a) > b) + rtn = S64_MIN; + else + rtn = a + b; + + return rtn; +} + +static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value) +{ + /* Range: 0 < counter_value < 2^38 */ + + /* Range: -2^59 < ret < 2^59 (as -2^21 < coeff < 2^21) */ + return counter_value * (s64)coeff; +} + +/** + * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control + * @model_data: Pointer to counter model data + * + * Register IPA counter model as a client of kbase_ipa_control, which + * provides an interface to retreive the accumulated value of hardware + * counters to calculate energy consumption. + * + * Return: 0 on success, or an error code. + */ +static int +kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data) +{ + struct kbase_device *kbdev = model_data->kbdev; + struct kbase_ipa_control_perf_counter *perf_counters; + u32 cnt_idx = 0; + int err; + size_t i; + + /* Value for GPU_ACTIVE counter also needs to be queried. It is required + * for the normalization of top-level and shader core counters. + */ + model_data->num_counters = 1 + model_data->num_top_level_cntrs + + model_data->num_shader_cores_cntrs; + + perf_counters = kcalloc(model_data->num_counters, + sizeof(*perf_counters), GFP_KERNEL); + + if (!perf_counters) { + dev_err(kbdev->dev, + "Failed to allocate memory for perf_counters array"); + return -ENOMEM; + } + + /* Fill in the description for GPU_ACTIVE counter which is always + * needed, as mentioned above, regardless of the energy model used + * by the CSF GPUs. + */ + perf_counters[cnt_idx].type = KBASE_IPA_CORE_TYPE_CSHW; + perf_counters[cnt_idx].idx = GPU_ACTIVE_CNT_IDX; + perf_counters[cnt_idx].gpu_norm = false; + perf_counters[cnt_idx].scaling_factor = 1; + cnt_idx++; + + for (i = 0; i < model_data->num_top_level_cntrs; ++i) { + const struct kbase_ipa_counter *counter = + &model_data->top_level_cntrs_def[i]; + + perf_counters[cnt_idx].type = counter->counter_block_type; + perf_counters[cnt_idx].idx = counter->counter_block_offset; + perf_counters[cnt_idx].gpu_norm = false; + perf_counters[cnt_idx].scaling_factor = 1; + cnt_idx++; + } + + for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { + const struct kbase_ipa_counter *counter = + &model_data->shader_cores_cntrs_def[i]; + + perf_counters[cnt_idx].type = counter->counter_block_type; + perf_counters[cnt_idx].idx = counter->counter_block_offset; + perf_counters[cnt_idx].gpu_norm = false; + perf_counters[cnt_idx].scaling_factor = 1; + cnt_idx++; + } + + err = kbase_ipa_control_register(kbdev, perf_counters, + model_data->num_counters, + &model_data->ipa_control_client); + if (err) + dev_err(kbdev->dev, + "Failed to register IPA with kbase_ipa_control"); + + kfree(perf_counters); + return err; +} + +/** + * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control. + * @model_data: Pointer to counter model data + */ +static void +kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data) +{ + if (model_data->ipa_control_client) { + kbase_ipa_control_unregister(model_data->kbdev, + model_data->ipa_control_client); + model_data->ipa_control_client = NULL; + } +} + +static int calculate_coeff(struct kbase_ipa_counter_model_data *model_data, + const struct kbase_ipa_counter *const cnt_defs, + size_t num_counters, s32 *counter_coeffs, + u64 *counter_values, u32 active_cycles, u32 *coeffp) +{ + u64 coeff = 0, coeff_mul = 0; + s64 total_energy = 0; + size_t i; + + /* Range for the 'counter_value' is [0, 2^38) + * Range for the 'coeff' is [-2^21, 2^21] + * So range for the 'group_energy' is [-2^59, 2^59) and range for the + * 'total_energy' is +/- 2^59 * number of IPA groups (~16), i.e. + * [-2^63, 2^63). + */ + for (i = 0; i < num_counters; i++) { + s32 coeff = counter_coeffs[i]; + u64 counter_value = counter_values[i]; + s64 group_energy = kbase_ipa_group_energy(coeff, counter_value); + + if (counter_value > MAX_COUNTER_INCREMENT) { + dev_warn(model_data->kbdev->dev, + "Increment in counter %s more than expected", + cnt_defs[i].name); + return -ERANGE; + } + + total_energy = + kbase_ipa_add_saturate(total_energy, group_energy); + } + + /* Range: 0 <= coeff < 2^63 */ + if (total_energy >= 0) + coeff = total_energy; + else + dev_dbg(model_data->kbdev->dev, + "Energy value came negative as %lld", total_energy); + + /* Range: 0 <= coeff < 2^63 (because active_cycles >= 1). However, this + * can be constrained further: the value of counters that are being + * used for dynamic power estimation can only increment by about 128 + * maximum per clock cycle. This is because max number of shader + * cores is expected to be 32 (max number of L2 slices is expected to + * be 8) and some counters (per shader core) like SC_BEATS_RD_TEX_EXT & + * SC_EXEC_STARVE_ARITH can increment by 4 every clock cycle. + * Each "beat" is defined as 128 bits and each shader core can + * (currently) do 512 bits read and 512 bits write to/from the L2 + * cache per cycle, so the SC_BEATS_RD_TEX_EXT counter can increment + * [0, 4] per shader core per cycle. + * We can thus write the range of 'coeff' in terms of active_cycles: + * + * coeff = SUM(coeffN * counterN * num_cores_for_counterN) + * coeff <= SUM(coeffN * counterN) * max_cores + * coeff <= num_IPA_groups * max_coeff * max_counter * max_cores + * (substitute max_counter = 2^2 * active_cycles) + * coeff <= num_IPA_groups * max_coeff * 2^2 * active_cycles * max_cores + * coeff <= 2^4 * 2^21 * 2^2 * active_cycles * 2^5 + * coeff <= 2^32 * active_cycles + * + * So after the division: 0 <= coeff <= 2^32 + */ + coeff = div_u64(coeff, active_cycles); + + /* Not all models were derived at the same reference voltage. Voltage + * scaling is done by multiplying by V^2, so we need to *divide* by + * Vref^2 here. + * Range: 0 <= coeff <= 2^35 + */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + /* Range: 0 <= coeff <= 2^38 */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + + /* Scale by user-specified integer factor. + * Range: 0 <= coeff_mul < 2^43 + */ + coeff_mul = coeff * model_data->scaling_factor; + + /* The power models have results with units + * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this + * becomes fW/(Hz V^2), which are the units of coeff_mul. However, + * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide + * by 1000. + * Range: 0 <= coeff_mul < 2^33 + */ + coeff_mul = div_u64(coeff_mul, 1000u); + + /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ + *coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16); + + return 0; +} + +int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + struct kbase_ipa_counter_model_data *model_data = + (struct kbase_ipa_counter_model_data *)model->model_data; + struct kbase_device *kbdev = model->kbdev; + s32 *counter_coeffs_p = model_data->counter_coeffs; + u64 *cnt_values_p = model_data->counter_values; + const u64 num_counters = model_data->num_counters; + u32 active_cycles; + ktime_t now, diff; + s64 diff_ms; + int ret; + + lockdep_assert_held(&kbdev->ipa.lock); + + /* The last argument is supposed to be a pointer to the location that + * will store the time for which GPU has been in protected mode since + * last query. This can be passed as NULL as counter model itself will + * not be used when GPU enters protected mode, as IPA is supposed to + * switch to the simple power model. + */ + ret = kbase_ipa_control_query(kbdev, + model_data->ipa_control_client, + cnt_values_p, num_counters, NULL); + if (WARN_ON(ret)) + return ret; + + now = ktime_get(); + diff = ktime_sub(now, kbdev->ipa.last_sample_time); + diff_ms = ktime_to_ms(diff); + + kbdev->ipa.last_sample_time = now; + + /* The counter values cannot be relied upon if the sampling interval was + * too long. Typically this will happen when the polling is started + * after the temperature has risen above a certain trip point. After + * that regular calls every 25-100 ms interval are expected. + */ + if (diff_ms > MAX_SAMPLE_INTERVAL_MS) { + dev_dbg(kbdev->dev, + "Last sample was taken %lld milli seconds ago", + diff_ms); + return -EOVERFLOW; + } + + /* Range: 0 (GPU not used at all), to the max sampling interval, say + * 1 seconds, * max GPU frequency (GPU 100% utilized). + * 0 <= active_cycles <= 1 * ~2GHz + * 0 <= active_cycles < 2^31 + */ + if (*cnt_values_p > U32_MAX) { + dev_warn(kbdev->dev, + "Increment in GPU_ACTIVE counter more than expected"); + return -ERANGE; + } + + active_cycles = (u32)*cnt_values_p; + + /* If the value of the active_cycles is less than the threshold, then + * return an error so that IPA framework can approximate using the + * cached simple model results instead. This may be more accurate + * than extrapolating using a very small counter dump. + */ + if (active_cycles < (u32)max(model_data->min_sample_cycles, 0)) + return -ENODATA; + + /* Range: 1 <= active_cycles < 2^31 */ + active_cycles = max(1u, active_cycles); + + cnt_values_p++; + ret = calculate_coeff(model_data, model_data->top_level_cntrs_def, + model_data->num_top_level_cntrs, + counter_coeffs_p, cnt_values_p, active_cycles, + &coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + if (ret) + return ret; + + cnt_values_p += model_data->num_top_level_cntrs; + counter_coeffs_p += model_data->num_top_level_cntrs; + ret = calculate_coeff(model_data, model_data->shader_cores_cntrs_def, + model_data->num_shader_cores_cntrs, + counter_coeffs_p, cnt_values_p, active_cycles, + &coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + + return ret; +} + +void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model) +{ + struct kbase_ipa_counter_model_data *model_data = + (struct kbase_ipa_counter_model_data *)model->model_data; + u64 *cnt_values_p = model_data->counter_values; + const u64 num_counters = model_data->num_counters; + int ret; + + lockdep_assert_held(&model->kbdev->ipa.lock); + + ret = kbase_ipa_control_query(model->kbdev, + model_data->ipa_control_client, + cnt_values_p, num_counters, NULL); + WARN_ON(ret); +} + +int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_counter *top_level_cntrs_def, + size_t num_top_level_cntrs, + const struct kbase_ipa_counter *shader_cores_cntrs_def, + size_t num_shader_cores_cntrs, + s32 reference_voltage) +{ + struct kbase_ipa_counter_model_data *model_data; + s32 *counter_coeffs_p; + int err = 0; + size_t i; + + if (!model || !top_level_cntrs_def || !shader_cores_cntrs_def || + !num_top_level_cntrs || !num_shader_cores_cntrs) + return -EINVAL; + + model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); + if (!model_data) + return -ENOMEM; + + model_data->kbdev = model->kbdev; + + model_data->top_level_cntrs_def = top_level_cntrs_def; + model_data->num_top_level_cntrs = num_top_level_cntrs; + + model_data->shader_cores_cntrs_def = shader_cores_cntrs_def; + model_data->num_shader_cores_cntrs = num_shader_cores_cntrs; + + model->model_data = (void *)model_data; + + counter_coeffs_p = model_data->counter_coeffs; + + for (i = 0; i < model_data->num_top_level_cntrs; ++i) { + const struct kbase_ipa_counter *counter = + &model_data->top_level_cntrs_def[i]; + + *counter_coeffs_p = counter->coeff_default_value; + + err = kbase_ipa_model_add_param_s32( + model, counter->name, counter_coeffs_p, 1, false); + if (err) + goto exit; + + counter_coeffs_p++; + } + + for (i = 0; i < model_data->num_shader_cores_cntrs; ++i) { + const struct kbase_ipa_counter *counter = + &model_data->shader_cores_cntrs_def[i]; + + *counter_coeffs_p = counter->coeff_default_value; + + err = kbase_ipa_model_add_param_s32( + model, counter->name, counter_coeffs_p, 1, false); + if (err) + goto exit; + + counter_coeffs_p++; + } + + model_data->scaling_factor = DEFAULT_SCALING_FACTOR; + err = kbase_ipa_model_add_param_s32( + model, "scale", &model_data->scaling_factor, 1, false); + if (err) + goto exit; + + model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; + err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", + &model_data->min_sample_cycles, 1, + false); + if (err) + goto exit; + + model_data->reference_voltage = reference_voltage; + err = kbase_ipa_model_add_param_s32(model, "reference_voltage", + &model_data->reference_voltage, 1, + false); + if (err) + goto exit; + + err = kbase_ipa_attach_ipa_control(model_data); + +exit: + if (err) { + kbase_ipa_model_param_free_all(model); + kfree(model_data); + } + return err; +} + +void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model) +{ + struct kbase_ipa_counter_model_data *model_data = + (struct kbase_ipa_counter_model_data *)model->model_data; + + kbase_ipa_detach_ipa_control(model_data); + kfree(model_data); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h new file mode 100644 index 0000000..37d2efc --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_csf.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_IPA_COUNTER_COMMON_CSF_H_ +#define _KBASE_IPA_COUNTER_COMMON_CSF_H_ + +#include "mali_kbase.h" +#include "csf/ipa_control/mali_kbase_csf_ipa_control.h" + +/* Maximum number of HW counters used by the IPA counter model. */ +#define KBASE_IPA_MAX_COUNTER_DEF_NUM 24 + +struct kbase_ipa_counter_model_data; + +/** + * struct kbase_ipa_counter_model_data - IPA counter model context per device + * @kbdev: Pointer to kbase device + * @ipa_control_client: Handle returned on registering IPA counter model as a + * client of kbase_ipa_control. + * @top_level_cntrs_def: Array of description of HW counters used by the IPA + * counter model for top-level. + * @num_top_level_cntrs: Number of elements in @top_level_cntrs_def array. + * @shader_cores_cntrs_def: Array of description of HW counters used by the IPA + * counter model for shader cores. + * @num_shader_cores_cntrs: Number of elements in @shader_cores_cntrs_def array. + * @counter_coeffs: Buffer to store coefficient value used for HW counters + * @counter_values: Buffer to store the accumulated value of HW counters + * retreived from kbase_ipa_control. + * @num_counters: Number of counters queried from kbase_ipa_control. + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. Range approx + * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 + * @scaling_factor: User-specified power scaling factor. This is an + * integer, which is multiplied by the power coefficient + * just before OPP scaling. + * Range approx 0-32: 0 < scaling_factor < 2^5 + * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of + * cycles the GPU was working) is less than + * min_sample_cycles, the counter model will return an + * error, causing the IPA framework to approximate using + * the cached simple model results instead. This may be + * more accurate than extrapolating using a very small + * counter dump. + */ +struct kbase_ipa_counter_model_data { + struct kbase_device *kbdev; + void *ipa_control_client; + const struct kbase_ipa_counter *top_level_cntrs_def; + size_t num_top_level_cntrs; + const struct kbase_ipa_counter *shader_cores_cntrs_def; + size_t num_shader_cores_cntrs; + s32 counter_coeffs[KBASE_IPA_MAX_COUNTER_DEF_NUM]; + u64 counter_values[KBASE_IPA_MAX_COUNTER_DEF_NUM]; + u64 num_counters; + s32 reference_voltage; + s32 scaling_factor; + s32 min_sample_cycles; +}; + +/** + * struct kbase_ipa_counter - represents a single HW counter used by IPA model + * @name: Name of the HW counter used by IPA counter model + * for energy estimation. + * @coeff_default_value: Default value of coefficient for the counter. + * Coefficients are interpreted as fractions where the + * denominator is 1000000. + * @counter_block_offset: Index to the counter within the counter block of + * type @counter_block_type. + * @counter_block_type: Type of the counter block. + */ +struct kbase_ipa_counter { + const char *name; + s32 coeff_default_value; + u32 counter_block_offset; + enum kbase_ipa_core_type counter_block_type; +}; + +/** + * kbase_ipa_counter_dynamic_coeff() - calculate dynamic power based on HW counters + * @model: pointer to instantiated model + * @coeffp: pointer to location where calculated power, in + * pW/(Hz V^2), is stored for top level and shader cores. + * + * This is a GPU-agnostic implementation of the get_dynamic_coeff() + * function of an IPA model. It relies on the model being populated + * with GPU-specific attributes at initialization time. + * + * Return: 0 on success, or an error code. + */ +int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); + +/** + * kbase_ipa_counter_reset_data() - Reset the counters data used for dynamic + * power estimation + * @model: pointer to instantiated model + * + * Retrieve the accumulated value of HW counters from the kbase_ipa_control + * component, without doing any processing, which is effectively a reset as the + * next call to kbase_ipa_counter_dynamic_coeff() will see the increment in + * counter values from this point onwards. + */ +void kbase_ipa_counter_reset_data(struct kbase_ipa_model *model); + +/** + * kbase_ipa_counter_common_model_init() - initialize ipa power model + * @model: Pointer to the ipa power model to initialize + * @top_level_cntrs_def: Array corresponding to the HW counters used in the + * top level counter model, contains the counter index, + * default value of the coefficient. + * @num_top_level_cntrs: Number of elements in the array @top_level_cntrs_def + * @shader_cores_cntrs_def: Array corresponding to the HW counters used in the + * shader cores counter model, contains the counter index, + * default value of the coefficient. + * @num_shader_cores_cntrs: Number of elements in the array + * @shader_cores_cntrs_def. + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. + * + * This function performs initialization steps common for ipa counter based + * model of all CSF GPUs. The set of counters and their respective weights + * could be different for each GPU. The tuple of counter index and weight + * is passed via @top_level_cntrs_def and @shader_cores_cntrs_def array. + * + * Return: 0 on success, error code otherwise + */ +int kbase_ipa_counter_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_counter *top_level_cntrs_def, + size_t num_top_level_cntrs, + const struct kbase_ipa_counter *shader_cores_cntrs_def, + size_t num_shader_cores_cntrs, + s32 reference_voltage); +/** + * kbase_ipa_counter_common_model_term() - terminate ipa power model + * @model: ipa power model to terminate + * + * This function performs all necessary steps to terminate ipa power model + * including clean up of resources allocated to hold model data. + */ +void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model); + +#endif /* _KBASE_IPA_COUNTER_COMMON_CSF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c new file mode 100644 index 0000000..4737b0e --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_ipa_counter_common_jm.h" +#include "ipa/mali_kbase_ipa_debugfs.h" + +#define DEFAULT_SCALING_FACTOR 5 + +/* If the value of GPU_ACTIVE is below this, use the simple model + * instead, to avoid extrapolating small amounts of counter data across + * large sample periods. + */ +#define DEFAULT_MIN_SAMPLE_CYCLES 10000 + +/** + * read_hwcnt() - read a counter value + * @model_data: pointer to model data + * @offset: offset, in bytes, into vinstr buffer + * + * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be + * incrementing every cycle over a ~100ms sample period at a high frequency, + * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27. + */ +static inline u32 kbase_ipa_read_hwcnt( + struct kbase_ipa_model_vinstr_data *model_data, + u32 offset) +{ + u8 *p = (u8 *)model_data->dump_buf.dump_buf; + + return *(u32 *)&p[offset]; +} + +static inline s64 kbase_ipa_add_saturate(s64 a, s64 b) +{ + s64 rtn; + + if (a > 0 && (S64_MAX - a) < b) + rtn = S64_MAX; + else if (a < 0 && (S64_MIN - a) > b) + rtn = S64_MIN; + else + rtn = a + b; + + return rtn; +} + +s64 kbase_ipa_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) +{ + struct kbase_device *kbdev = model_data->kbdev; + u64 core_mask; + u32 base = 0; + s64 ret = 0; + + core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) { + /* 0 < counter_value < 2^27 */ + u32 counter_value = kbase_ipa_read_hwcnt(model_data, + base + counter); + + /* 0 < ret < 2^27 * max_num_cores = 2^32 */ + ret = kbase_ipa_add_saturate(ret, counter_value); + } + base += KBASE_IPA_NR_BYTES_PER_BLOCK; + core_mask >>= 1; + } + + /* Range: -2^54 < ret * coeff < 2^54 */ + return ret * coeff; +} + +s64 kbase_ipa_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) +{ + struct kbase_device *kbdev = model_data->kbdev; + const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices; + u32 base = 0; + s64 ret = 0; + u32 i; + + for (i = 0; i < num_blocks; i++) { + /* 0 < counter_value < 2^27 */ + u32 counter_value = kbase_ipa_read_hwcnt(model_data, + base + counter); + + /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */ + ret = kbase_ipa_add_saturate(ret, counter_value); + base += KBASE_IPA_NR_BYTES_PER_BLOCK; + } + + /* Range: -2^51 < ret * coeff < 2^51 */ + return ret * coeff; +} + +s64 kbase_ipa_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter) +{ + /* Range: 0 < counter_value < 2^27 */ + const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter); + + /* Range: -2^49 < ret < 2^49 */ + return counter_value * (s64) coeff; +} + +int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) +{ + int errcode; + struct kbase_device *kbdev = model_data->kbdev; + struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt; + struct kbase_hwcnt_enable_map enable_map; + const struct kbase_hwcnt_metadata *metadata = + kbase_hwcnt_virtualizer_metadata(hvirt); + + if (!metadata) + return -1; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA enable map"); + return errcode; + } + + kbase_hwcnt_enable_map_enable_all(&enable_map); + + /* Disable cycle counter only. */ + enable_map.clk_enable_map = 0; + + errcode = kbase_hwcnt_virtualizer_client_create( + hvirt, &enable_map, &model_data->hvirt_cli); + kbase_hwcnt_enable_map_free(&enable_map); + if (errcode) { + dev_err(kbdev->dev, "Failed to register IPA with virtualizer"); + model_data->hvirt_cli = NULL; + return errcode; + } + + errcode = kbase_hwcnt_dump_buffer_alloc( + metadata, &model_data->dump_buf); + if (errcode) { + dev_err(kbdev->dev, "Failed to allocate IPA dump buffer"); + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + model_data->hvirt_cli = NULL; + return errcode; + } + + return 0; +} + +void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data) +{ + if (model_data->hvirt_cli) { + kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli); + kbase_hwcnt_dump_buffer_free(&model_data->dump_buf); + model_data->hvirt_cli = NULL; + } +} + +int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) +{ + struct kbase_ipa_model_vinstr_data *model_data = + (struct kbase_ipa_model_vinstr_data *)model->model_data; + s64 energy = 0; + size_t i; + u64 coeff = 0, coeff_mul = 0; + u64 start_ts_ns, end_ts_ns; + u32 active_cycles; + int err = 0; + + err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli, + &start_ts_ns, &end_ts_ns, &model_data->dump_buf); + if (err) + goto err0; + + /* Range: 0 (GPU not used at all), to the max sampling interval, say + * 1s, * max GPU frequency (GPU 100% utilized). + * 0 <= active_cycles <= 1 * ~2GHz + * 0 <= active_cycles < 2^31 + */ + active_cycles = model_data->get_active_cycles(model_data); + + if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) { + err = -ENODATA; + goto err0; + } + + /* Range: 1 <= active_cycles < 2^31 */ + active_cycles = max(1u, active_cycles); + + /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around + * -2^57 < energy < 2^57 + */ + for (i = 0; i < model_data->groups_def_num; i++) { + const struct kbase_ipa_group *group = &model_data->groups_def[i]; + s32 coeff = model_data->group_values[i]; + s64 group_energy = group->op(model_data, coeff, + group->counter_block_offset); + + energy = kbase_ipa_add_saturate(energy, group_energy); + } + + /* Range: 0 <= coeff < 2^57 */ + if (energy > 0) + coeff = energy; + + /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this + * can be constrained further: Counter values can only be increased by + * a theoretical maximum of about 64k per clock cycle. Beyond this, + * we'd have to sample every 1ms to avoid them overflowing at the + * lowest clock frequency (say 100MHz). Therefore, we can write the + * range of 'coeff' in terms of active_cycles: + * + * coeff = SUM(coeffN * counterN * num_cores_for_counterN) + * coeff <= SUM(coeffN * counterN) * max_num_cores + * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores + * (substitute max_counter = 2^16 * active_cycles) + * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores + * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5 + * coeff <= 2^46 * active_cycles + * + * So after the division: 0 <= coeff <= 2^46 + */ + coeff = div_u64(coeff, active_cycles); + + /* Not all models were derived at the same reference voltage. Voltage + * scaling is done by multiplying by V^2, so we need to *divide* by + * Vref^2 here. + * Range: 0 <= coeff <= 2^49 + */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + /* Range: 0 <= coeff <= 2^52 */ + coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1)); + + /* Scale by user-specified integer factor. + * Range: 0 <= coeff_mul < 2^57 + */ + coeff_mul = coeff * model_data->scaling_factor; + + /* The power models have results with units + * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this + * becomes fW/(Hz V^2), which are the units of coeff_mul. However, + * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide + * by 1000. + * Range: 0 <= coeff_mul < 2^47 + */ + coeff_mul = div_u64(coeff_mul, 1000u); + +err0: + /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */ + *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16); + return err; +} + +void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model) +{ + /* Currently not implemented */ + WARN_ON_ONCE(1); +} + +int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size, + kbase_ipa_get_active_cycles_callback get_active_cycles, + s32 reference_voltage) +{ + int err = 0; + size_t i; + struct kbase_ipa_model_vinstr_data *model_data; + + if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles) + return -EINVAL; + + model_data = kzalloc(sizeof(*model_data), GFP_KERNEL); + if (!model_data) + return -ENOMEM; + + model_data->kbdev = model->kbdev; + model_data->groups_def = ipa_groups_def; + model_data->groups_def_num = ipa_group_size; + model_data->get_active_cycles = get_active_cycles; + + model->model_data = (void *) model_data; + + for (i = 0; i < model_data->groups_def_num; ++i) { + const struct kbase_ipa_group *group = &model_data->groups_def[i]; + + model_data->group_values[i] = group->default_value; + err = kbase_ipa_model_add_param_s32(model, group->name, + &model_data->group_values[i], + 1, false); + if (err) + goto exit; + } + + model_data->scaling_factor = DEFAULT_SCALING_FACTOR; + err = kbase_ipa_model_add_param_s32(model, "scale", + &model_data->scaling_factor, + 1, false); + if (err) + goto exit; + + model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES; + err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles", + &model_data->min_sample_cycles, + 1, false); + if (err) + goto exit; + + model_data->reference_voltage = reference_voltage; + err = kbase_ipa_model_add_param_s32(model, "reference_voltage", + &model_data->reference_voltage, + 1, false); + if (err) + goto exit; + + err = kbase_ipa_attach_vinstr(model_data); + +exit: + if (err) { + kbase_ipa_model_param_free_all(model); + kfree(model_data); + } + return err; +} + +void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model) +{ + struct kbase_ipa_model_vinstr_data *model_data = + (struct kbase_ipa_model_vinstr_data *)model->model_data; + + kbase_ipa_detach_vinstr(model_data); + kfree(model_data); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h new file mode 100644 index 0000000..3486a9b --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_common_jm.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_IPA_COUNTER_COMMON_JM_H_ +#define _KBASE_IPA_COUNTER_COMMON_JM_H_ + +#include "mali_kbase.h" +#include "mali_kbase_hwcnt_virtualizer.h" +#include "mali_kbase_hwcnt_types.h" + +/* Maximum number of IPA groups for an IPA model. */ +#define KBASE_IPA_MAX_GROUP_DEF_NUM 16 + +/* Number of bytes per hardware counter in a vinstr_buffer. */ +#define KBASE_IPA_NR_BYTES_PER_CNT 4 + +/* Number of hardware counters per block in a vinstr_buffer. */ +#define KBASE_IPA_NR_CNT_PER_BLOCK 64 + +/* Number of bytes per block in a vinstr_buffer. */ +#define KBASE_IPA_NR_BYTES_PER_BLOCK \ + (KBASE_IPA_NR_CNT_PER_BLOCK * KBASE_IPA_NR_BYTES_PER_CNT) + +struct kbase_ipa_model_vinstr_data; + +typedef u32 +kbase_ipa_get_active_cycles_callback(struct kbase_ipa_model_vinstr_data *); + +/** + * struct kbase_ipa_model_vinstr_data - IPA context per device + * @kbdev: pointer to kbase device + * @group_values: values of coefficients for IPA groups + * @groups_def: Array of IPA groups. + * @groups_def_num: Number of elements in the array of IPA groups. + * @get_active_cycles: Callback to return number of active cycles during + * counter sample period + * @hvirt_cli: hardware counter virtualizer client handle + * @dump_buf: buffer to dump hardware counters onto + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. Range approx + * 0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13 + * @scaling_factor: User-specified power scaling factor. This is an + * integer, which is multiplied by the power coefficient + * just before OPP scaling. + * Range approx 0-32: 0 < scaling_factor < 2^5 + * @min_sample_cycles: If the value of the GPU_ACTIVE counter (the number of + * cycles the GPU was working) is less than + * min_sample_cycles, the counter model will return an + * error, causing the IPA framework to approximate using + * the cached simple model results instead. This may be + * more accurate than extrapolating using a very small + * counter dump. + */ +struct kbase_ipa_model_vinstr_data { + struct kbase_device *kbdev; + s32 group_values[KBASE_IPA_MAX_GROUP_DEF_NUM]; + const struct kbase_ipa_group *groups_def; + size_t groups_def_num; + kbase_ipa_get_active_cycles_callback *get_active_cycles; + struct kbase_hwcnt_virtualizer_client *hvirt_cli; + struct kbase_hwcnt_dump_buffer dump_buf; + s32 reference_voltage; + s32 scaling_factor; + s32 min_sample_cycles; +}; + +/** + * struct ipa_group - represents a single IPA group + * @name: name of the IPA group + * @default_value: default value of coefficient for IPA group. + * Coefficients are interpreted as fractions where the + * denominator is 1000000. + * @op: which operation to be performed on the counter values + * @counter_block_offset: block offset in bytes of the counter used to calculate energy for IPA group + */ +struct kbase_ipa_group { + const char *name; + s32 default_value; + s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32); + u32 counter_block_offset; +}; + +/** + * kbase_ipa_sum_all_shader_cores() - sum a counter over all cores + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter' + * across all shader cores. + * + * Return: Sum of counter values. Range: -2^54 < ret < 2^54 + */ +s64 kbase_ipa_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * kbase_ipa_sum_all_memsys_blocks() - sum a counter over all mem system blocks + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter' across all + * memory system blocks. + * + * Return: Sum of counter values. Range: -2^51 < ret < 2^51 + */ +s64 kbase_ipa_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * kbase_ipa_single_counter() - sum a single counter + * @model_data: pointer to model data + * @coeff: model coefficient. Unity is ~2^20, so range approx + * +/- 4.0: -2^22 < coeff < 2^22 + * @counter: offset in bytes of the counter used to calculate energy + * for IPA group + * + * Calculate energy estimation based on hardware counter `counter'. + * + * Return: Counter value. Range: -2^49 < ret < 2^49 + */ +s64 kbase_ipa_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, u32 counter); + +/** + * attach_vinstr() - attach a vinstr_buffer to an IPA model. + * @model_data: pointer to model data + * + * Attach a vinstr_buffer to an IPA model. The vinstr_buffer + * allows access to the hardware counters used to calculate + * energy consumption. + * + * Return: 0 on success, or an error code. + */ +int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + +/** + * detach_vinstr() - detach a vinstr_buffer from an IPA model. + * @model_data: pointer to model data + * + * Detach a vinstr_buffer from an IPA model. + */ +void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data); + +/** + * kbase_ipa_vinstr_dynamic_coeff() - calculate dynamic power based on HW counters + * @model: pointer to instantiated model + * @coeffp: pointer to location where calculated power, in + * pW/(Hz V^2), is stored. + * + * This is a GPU-agnostic implementation of the get_dynamic_coeff() + * function of an IPA model. It relies on the model being populated + * with GPU-specific attributes at initialization time. + * + * Return: 0 on success, or an error code. + */ +int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp); + +/** + * kbase_ipa_vinstr_reset_data() - Reset the counters data used for dynamic + * power estimation + * @model: pointer to instantiated model + * + * Currently it is not implemented for JM GPUs. + * When implemented it is expected to retrieve the accumulated value of HW + * counters from the Vinstr component, without doing any processing, which is + * effectively a reset as the next call to kbase_ipa_counter_dynamic_coeff() + * will see the increment in counter values from this point onwards. + */ +void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model); + +/** + * kbase_ipa_vinstr_common_model_init() - initialize ipa power model + * @model: ipa power model to initialize + * @ipa_groups_def: array of ipa groups which sets coefficients for + * the corresponding counters used in the ipa model + * @ipa_group_size: number of elements in the array @ipa_groups_def + * @get_active_cycles: callback to return the number of cycles the GPU was + * active during the counter sample period. + * @reference_voltage: voltage, in mV, of the operating point used when + * deriving the power model coefficients. + * + * This initialization function performs initialization steps common + * for ipa models based on counter values. In each call, the model + * passes its specific coefficient values per ipa counter group via + * @ipa_groups_def array. + * + * Return: 0 on success, error code otherwise + */ +int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model, + const struct kbase_ipa_group *ipa_groups_def, + size_t ipa_group_size, + kbase_ipa_get_active_cycles_callback *get_active_cycles, + s32 reference_voltage); + +/** + * kbase_ipa_vinstr_common_model_term() - terminate ipa power model + * @model: ipa power model to terminate + * + * This function performs all necessary steps to terminate ipa power model + * including clean up of resources allocated to hold model data. + */ +void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model); + +#endif /* _KBASE_IPA_COUNTER_COMMON_JM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c new file mode 100644 index 0000000..1852c3c --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_csf.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_ipa_counter_common_csf.h" +#include "mali_kbase.h" + +/* MEMSYS counter block offsets */ +#define L2_RD_MSG_IN (16) +#define L2_WR_MSG_IN (18) +#define L2_READ_LOOKUP (26) +#define L2_EXT_WRITE_NOSNP_FULL (43) + +/* SC counter block offsets */ +#define FRAG_QUADS_EZS_UPDATE (13) +#define FULL_QUAD_WARPS (21) +#define EXEC_INSTR_FMA (27) +#define EXEC_INSTR_CVT (28) +#define TEX_FILT_NUM_OPS (39) +#define LS_MEM_READ_SHORT (45) +#define LS_MEM_WRITE_SHORT (47) +#define VARY_SLOT_16 (51) + +/* Tiler counter block offsets */ +#define IDVS_POS_SHAD_STALL (23) +#define PREFETCH_STALL (25) +#define VFETCH_POS_READ_WAIT (29) +#define VFETCH_VERTEX_WAIT (30) +#define IDVS_VAR_SHAD_STALL (38) + +#define COUNTER_DEF(cnt_name, coeff, cnt_idx, block_type) \ + { \ + .name = cnt_name, \ + .coeff_default_value = coeff, \ + .counter_block_offset = cnt_idx, \ + .counter_block_type = block_type, \ + } + +#define CSHW_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ + COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_CSHW) + +#define MEMSYS_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ + COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_MEMSYS) + +#define SC_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ + COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_SHADER) + +#define TILER_COUNTER_DEF(cnt_name, coeff, cnt_idx) \ + COUNTER_DEF(cnt_name, coeff, cnt_idx, KBASE_IPA_CORE_TYPE_TILER) + +/* Tables of description of HW counters used by IPA counter model. + * + * These tables provide a description of each performance counter + * used by the top level counter model for energy estimation. + */ +static const struct kbase_ipa_counter ipa_top_level_cntrs_def_todx[] = { + MEMSYS_COUNTER_DEF("l2_rd_msg_in", 295631, L2_RD_MSG_IN), + MEMSYS_COUNTER_DEF("l2_ext_write_nosnp_ull", 325168, L2_EXT_WRITE_NOSNP_FULL), + + TILER_COUNTER_DEF("prefetch_stall", 145435, PREFETCH_STALL), + TILER_COUNTER_DEF("idvs_var_shad_stall", -171917, IDVS_VAR_SHAD_STALL), + TILER_COUNTER_DEF("idvs_pos_shad_stall", 109980, IDVS_POS_SHAD_STALL), + TILER_COUNTER_DEF("vfetch_pos_read_wait", -119118, VFETCH_POS_READ_WAIT), +}; + + +/* These tables provide a description of each performance counter + * used by the shader cores counter model for energy estimation. + */ +static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = { + SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA), + SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS), + SC_COUNTER_DEF("ls_mem_read_short", 60917, LS_MEM_READ_SHORT), + SC_COUNTER_DEF("frag_quads_ezs_update", 694555, FRAG_QUADS_EZS_UPDATE), + SC_COUNTER_DEF("ls_mem_write_short", 698290, LS_MEM_WRITE_SHORT), + SC_COUNTER_DEF("vary_slot_16", 181069, VARY_SLOT_16), +}; + + +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_ ## init_token ## _power_model_init, \ + .term = kbase_ipa_counter_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \ + .reset_counter_data = kbase_ipa_counter_reset_data, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + +#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ + static int kbase_ ## gpu ## _power_model_init(\ + struct kbase_ipa_model *model) \ + { \ + BUILD_BUG_ON((1 + \ + ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu) +\ + ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu)) > \ + KBASE_IPA_MAX_COUNTER_DEF_NUM); \ + return kbase_ipa_counter_common_model_init(model, \ + ipa_top_level_cntrs_def_ ## gpu, \ + ARRAY_SIZE(ipa_top_level_cntrs_def_ ## gpu), \ + ipa_shader_core_cntrs_def_ ## gpu, \ + ARRAY_SIZE(ipa_shader_core_cntrs_def_ ## gpu), \ + (reference_voltage)); \ + } \ + IPA_POWER_MODEL_OPS(gpu, gpu) + + +#define ALIAS_POWER_MODEL(gpu, as_gpu) \ + IPA_POWER_MODEL_OPS(gpu, as_gpu) + +/* Reference voltage value is 750 mV. + */ +STANDARD_POWER_MODEL(todx, 750); + + +/* Assuming LODX is an alias of TODX for IPA */ +ALIAS_POWER_MODEL(lodx, todx); + +static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { + &kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops, +}; + +const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( + struct kbase_device *kbdev, const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { + const struct kbase_ipa_model_ops *ops = + ipa_counter_model_ops[i]; + + if (!strcmp(ops->name, name)) + return ops; + } + + dev_err(kbdev->dev, "power model \'%s\' not found\n", name); + + return NULL; +} + +const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) +{ + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { + case GPU_ID2_PRODUCT_TODX: + return "mali-todx-power-model"; + case GPU_ID2_PRODUCT_LODX: + return "mali-lodx-power-model"; + default: + return NULL; + } +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c new file mode 100644 index 0000000..2f4c9d9 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/backend/mali_kbase_ipa_counter_jm.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include + +#include "mali_kbase_ipa_counter_common_jm.h" +#include "mali_kbase.h" + + +/* Performance counter blocks base offsets */ +#define JM_BASE (0 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define TILER_BASE (1 * KBASE_IPA_NR_BYTES_PER_BLOCK) +#define MEMSYS_BASE (2 * KBASE_IPA_NR_BYTES_PER_BLOCK) + +/* JM counter block offsets */ +#define JM_GPU_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 6) + +/* Tiler counter block offsets */ +#define TILER_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 45) + +/* MEMSYS counter block offsets */ +#define MEMSYS_L2_ANY_LOOKUP (KBASE_IPA_NR_BYTES_PER_CNT * 25) + +/* SC counter block offsets */ +#define SC_FRAG_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 4) +#define SC_EXEC_CORE_ACTIVE (KBASE_IPA_NR_BYTES_PER_CNT * 26) +#define SC_EXEC_INSTR_FMA (KBASE_IPA_NR_BYTES_PER_CNT * 27) +#define SC_EXEC_INSTR_COUNT (KBASE_IPA_NR_BYTES_PER_CNT * 28) +#define SC_EXEC_INSTR_MSG (KBASE_IPA_NR_BYTES_PER_CNT * 30) +#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39) +#define SC_TEX_COORD_ISSUE (KBASE_IPA_NR_BYTES_PER_CNT * 40) +#define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42) +#define SC_VARY_INSTR (KBASE_IPA_NR_BYTES_PER_CNT * 49) +#define SC_VARY_SLOT_32 (KBASE_IPA_NR_BYTES_PER_CNT * 50) +#define SC_VARY_SLOT_16 (KBASE_IPA_NR_BYTES_PER_CNT * 51) +#define SC_BEATS_RD_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 56) +#define SC_BEATS_WR_LSC (KBASE_IPA_NR_BYTES_PER_CNT * 61) +#define SC_BEATS_WR_TIB (KBASE_IPA_NR_BYTES_PER_CNT * 62) + +/** + * get_jm_counter() - get performance counter offset inside the Job Manager block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the Job Manager block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_jm_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + return JM_BASE + counter_block_offset; +} + +/** + * get_memsys_counter() - get performance counter offset inside the Memory System block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the (first) Memory System block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_memsys_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + /* The base address of Memory System performance counters is always the same, although their number + * may vary based on the number of cores. For the moment it's ok to return a constant. + */ + return MEMSYS_BASE + counter_block_offset; +} + +/** + * get_sc_counter() - get performance counter offset inside the Shader Cores block + * @model_data: pointer to GPU model data. + * @counter_block_offset: offset in bytes of the performance counter inside the (first) Shader Cores block. + * + * Return: Block offset in bytes of the required performance counter. + */ +static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_data *model_data, + u32 counter_block_offset) +{ + const u32 sc_base = MEMSYS_BASE + + (model_data->kbdev->gpu_props.props.l2_props.num_l2_slices * + KBASE_IPA_NR_BYTES_PER_BLOCK); + return sc_base + counter_block_offset; +} + +/** + * memsys_single_counter() - calculate energy for a single Memory System performance counter. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a single Memory System performance counter. + */ +static s64 kbase_g7x_sum_all_memsys_blocks( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset); + return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter); +} + +/** + * sum_all_shader_cores() - calculate energy for a Shader Cores performance counter for all cores. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a Shader Cores performance counter for all cores. + */ +static s64 kbase_g7x_sum_all_shader_cores( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 counter_block_offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_sc_counter(model_data, + counter_block_offset); + return kbase_ipa_sum_all_shader_cores(model_data, coeff, counter); +} + +/** + * jm_single_counter() - calculate energy for a single Job Manager performance counter. + * @model_data: pointer to GPU model data. + * @coeff: default value of coefficient for IPA group. + * @counter_block_offset: offset in bytes of the counter inside the block it belongs to. + * + * Return: Energy estimation for a single Job Manager performance counter. + */ +static s64 kbase_g7x_jm_single_counter( + struct kbase_ipa_model_vinstr_data *model_data, + s32 coeff, + u32 counter_block_offset) +{ + u32 counter; + + counter = kbase_g7x_power_model_get_jm_counter(model_data, + counter_block_offset); + return kbase_ipa_single_counter(model_data, coeff, counter); +} + +/** + * get_active_cycles() - return the GPU_ACTIVE counter + * @model_data: pointer to GPU model data. + * + * Return: the number of cycles the GPU was active during the counter sampling + * period. + */ +static u32 kbase_g7x_get_active_cycles( + struct kbase_ipa_model_vinstr_data *model_data) +{ + u32 counter = kbase_g7x_power_model_get_jm_counter(model_data, JM_GPU_ACTIVE); + + /* Counters are only 32-bit, so we can safely multiply by 1 then cast + * the 64-bit result back to a u32. + */ + return kbase_ipa_single_counter(model_data, 1, counter); +} + +/* Table of IPA group definitions. + * + * For each IPA group, this table defines a function to access the given performance block counter (or counters, + * if the operation needs to be iterated on multiple blocks) and calculate energy estimation. + */ + +static const struct kbase_ipa_group ipa_groups_def_g71[] = { + { + .name = "l2_access", + .default_value = 526300, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_count", + .default_value = 301100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "tex_issue", + .default_value = 197400, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_COORD_ISSUE, + }, + { + .name = "tile_wb", + .default_value = -156400, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_BEATS_WR_TIB, + }, + { + .name = "gpu_active", + .default_value = 115800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g72[] = { + { + .name = "l2_access", + .default_value = 393000, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_count", + .default_value = 227000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "tex_issue", + .default_value = 181900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_COORD_ISSUE, + }, + { + .name = "tile_wb", + .default_value = -120200, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_BEATS_WR_TIB, + }, + { + .name = "gpu_active", + .default_value = 133100, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g76[] = { + { + .name = "gpu_active", + .default_value = 122000, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 488900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 212100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 288000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 378100, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = { + { + .name = "gpu_active", + .default_value = 224200, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 384700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 271900, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 477700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 551400, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g51[] = { + { + .name = "gpu_active", + .default_value = 201400, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, + { + .name = "exec_instr_count", + .default_value = 392700, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_COUNT, + }, + { + .name = "vary_instr", + .default_value = 274000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_VARY_INSTR, + }, + { + .name = "tex_tfch_num_operations", + .default_value = 528000, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS, + }, + { + .name = "l2_access", + .default_value = 506400, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_g77[] = { + { + .name = "l2_access", + .default_value = 710800, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_msg", + .default_value = 2375300, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_MSG, + }, + { + .name = "exec_instr_fma", + .default_value = 656100, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_FMA, + }, + { + .name = "tex_filt_num_operations", + .default_value = 318800, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, + }, + { + .name = "gpu_active", + .default_value = 172800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_tbex[] = { + { + .name = "l2_access", + .default_value = 599800, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_msg", + .default_value = 1830200, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_MSG, + }, + { + .name = "exec_instr_fma", + .default_value = 407300, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_FMA, + }, + { + .name = "tex_filt_num_operations", + .default_value = 224500, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, + }, + { + .name = "gpu_active", + .default_value = 153800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + +static const struct kbase_ipa_group ipa_groups_def_tbax[] = { + { + .name = "l2_access", + .default_value = 599800, + .op = kbase_g7x_sum_all_memsys_blocks, + .counter_block_offset = MEMSYS_L2_ANY_LOOKUP, + }, + { + .name = "exec_instr_msg", + .default_value = 1830200, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_MSG, + }, + { + .name = "exec_instr_fma", + .default_value = 407300, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_EXEC_INSTR_FMA, + }, + { + .name = "tex_filt_num_operations", + .default_value = 224500, + .op = kbase_g7x_sum_all_shader_cores, + .counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS, + }, + { + .name = "gpu_active", + .default_value = 153800, + .op = kbase_g7x_jm_single_counter, + .counter_block_offset = JM_GPU_ACTIVE, + }, +}; + + +#define IPA_POWER_MODEL_OPS(gpu, init_token) \ + const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \ + .name = "mali-" #gpu "-power-model", \ + .init = kbase_ ## init_token ## _power_model_init, \ + .term = kbase_ipa_vinstr_common_model_term, \ + .get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \ + .reset_counter_data = kbase_ipa_vinstr_reset_data, \ + }; \ + KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops) + +#define STANDARD_POWER_MODEL(gpu, reference_voltage) \ + static int kbase_ ## gpu ## _power_model_init(\ + struct kbase_ipa_model *model) \ + { \ + BUILD_BUG_ON(ARRAY_SIZE(ipa_groups_def_ ## gpu) > \ + KBASE_IPA_MAX_GROUP_DEF_NUM); \ + return kbase_ipa_vinstr_common_model_init(model, \ + ipa_groups_def_ ## gpu, \ + ARRAY_SIZE(ipa_groups_def_ ## gpu), \ + kbase_g7x_get_active_cycles, \ + (reference_voltage)); \ + } \ + IPA_POWER_MODEL_OPS(gpu, gpu) + +#define ALIAS_POWER_MODEL(gpu, as_gpu) \ + IPA_POWER_MODEL_OPS(gpu, as_gpu) + +STANDARD_POWER_MODEL(g71, 800); +STANDARD_POWER_MODEL(g72, 800); +STANDARD_POWER_MODEL(g76, 800); +STANDARD_POWER_MODEL(g52_r1, 1000); +STANDARD_POWER_MODEL(g51, 1000); +STANDARD_POWER_MODEL(g77, 1000); +STANDARD_POWER_MODEL(tbex, 1000); +STANDARD_POWER_MODEL(tbax, 1000); + +/* g52 is an alias of g76 (TNOX) for IPA */ +ALIAS_POWER_MODEL(g52, g76); +/* tnax is an alias of g77 (TTRX) for IPA */ +ALIAS_POWER_MODEL(tnax, g77); + +static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = { + &kbase_g71_ipa_model_ops, + &kbase_g72_ipa_model_ops, + &kbase_g76_ipa_model_ops, + &kbase_g52_ipa_model_ops, + &kbase_g52_r1_ipa_model_ops, + &kbase_g51_ipa_model_ops, + &kbase_g77_ipa_model_ops, + &kbase_tnax_ipa_model_ops, + &kbase_tbex_ipa_model_ops, + &kbase_tbax_ipa_model_ops +}; + +const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( + struct kbase_device *kbdev, const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) { + const struct kbase_ipa_model_ops *ops = + ipa_counter_model_ops[i]; + + if (!strcmp(ops->name, name)) + return ops; + } + + dev_err(kbdev->dev, "power model \'%s\' not found\n", name); + + return NULL; +} + +const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id) +{ + const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> + GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { + case GPU_ID2_PRODUCT_TMIX: + return "mali-g71-power-model"; + case GPU_ID2_PRODUCT_THEX: + return "mali-g72-power-model"; + case GPU_ID2_PRODUCT_TNOX: + return "mali-g76-power-model"; + case GPU_ID2_PRODUCT_TSIX: + return "mali-g51-power-model"; + case GPU_ID2_PRODUCT_TGOX: + if ((gpu_id & GPU_ID2_VERSION_MAJOR) == + (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) + /* g52 aliased to g76 power-model's ops */ + return "mali-g52-power-model"; + else + return "mali-g52_r1-power-model"; + case GPU_ID2_PRODUCT_TNAX: + return "mali-tnax-power-model"; + case GPU_ID2_PRODUCT_TTRX: + return "mali-g77-power-model"; + case GPU_ID2_PRODUCT_TBEX: + return "mali-tbex-power-model"; + case GPU_ID2_PRODUCT_TBAX: + return "mali-tbax-power-model"; + default: + return NULL; + } +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c old mode 100644 new mode 100755 index d663ccb..24d7b06 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,9 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ + #include #include #include @@ -27,30 +27,19 @@ #include "mali_kbase_ipa_debugfs.h" #include "mali_kbase_ipa_simple.h" #include "backend/gpu/mali_kbase_pm_internal.h" - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#include "backend/gpu/mali_kbase_devfreq.h" #include -#else -#include -#define dev_pm_opp_find_freq_exact opp_find_freq_exact -#define dev_pm_opp_get_voltage opp_get_voltage -#define dev_pm_opp opp -#endif #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model" -static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = { - &kbase_simple_ipa_model_ops, - &kbase_g71_ipa_model_ops, - &kbase_g72_ipa_model_ops, - &kbase_g76_ipa_model_ops, - &kbase_g52_ipa_model_ops, - &kbase_g52_r1_ipa_model_ops, - &kbase_g51_ipa_model_ops, - &kbase_g77_ipa_model_ops, - &kbase_tnax_ipa_model_ops, - &kbase_tbex_ipa_model_ops -}; +/* Polling by thermal governor starts when the temperature exceeds the certain + * trip point. In order to have meaningful value for the counters, when the + * polling starts and first call to kbase_get_real_power() is made, it is + * required to reset the counter values every now and then. + * It is reasonable to do the reset every second if no polling is being done, + * the counter model implementation also assumes max sampling interval of 1 sec. + */ +#define RESET_INTERVAL_MS ((s64)1000) int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) { @@ -71,53 +60,24 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model) } const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, - const char *name) + const char *name) { - int i; + if (!strcmp(name, kbase_simple_ipa_model_ops.name)) + return &kbase_simple_ipa_model_ops; - for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) { - const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i]; - - if (!strcmp(ops->name, name)) - return ops; - } - - dev_err(kbdev->dev, "power model \'%s\' not found\n", name); - - return NULL; + return kbase_ipa_counter_model_ops_find(kbdev, name); } KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find); const char *kbase_ipa_model_name_from_id(u32 gpu_id) { - const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> - GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) { - case GPU_ID2_PRODUCT_TMIX: - return "mali-g71-power-model"; - case GPU_ID2_PRODUCT_THEX: - return "mali-g72-power-model"; - case GPU_ID2_PRODUCT_TNOX: - return "mali-g76-power-model"; - case GPU_ID2_PRODUCT_TSIX: - return "mali-g51-power-model"; - case GPU_ID2_PRODUCT_TGOX: - if ((gpu_id & GPU_ID2_VERSION_MAJOR) == - (0 << GPU_ID2_VERSION_MAJOR_SHIFT)) - /* g52 aliased to g76 power-model's ops */ - return "mali-g52-power-model"; - else - return "mali-g52_r1-power-model"; - case GPU_ID2_PRODUCT_TNAX: - return "mali-tnax-power-model"; - case GPU_ID2_PRODUCT_TTRX: - return "mali-g77-power-model"; - case GPU_ID2_PRODUCT_TBEX: - return "mali-tbex-power-model"; - default: + const char* model_name = + kbase_ipa_counter_model_name_from_id(gpu_id); + + if (!model_name) return KBASE_IPA_FALLBACK_MODEL_NAME; - } + else + return model_name; } KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id); @@ -364,6 +324,8 @@ int kbase_ipa_init(struct kbase_device *kbdev) kbdev->ipa.configured_model = default_model; } + kbdev->ipa.last_sample_time = ktime_get(); + end: if (err) kbase_ipa_term_locked(kbdev); @@ -418,7 +380,8 @@ static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq, const u32 v2f = v2f_big / 1000; /* Range (working backwards from next line): 0 < v2fc < 2^23 uW. - * Must be < 2^42 to avoid overflowing the return value. */ + * Must be < 2^42 to avoid overflowing the return value. + */ const u64 v2fc = (u64) c * (u64) v2f; /* Range: 0 < v2fc / 1000 < 2^13 mW */ @@ -514,8 +477,9 @@ static u32 get_static_power_locked(struct kbase_device *kbdev, return power; } -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE static unsigned long kbase_get_static_power(struct devfreq *df, unsigned long voltage) #else @@ -524,8 +488,8 @@ static unsigned long kbase_get_static_power(unsigned long voltage) { struct kbase_ipa_model *model; u32 power = 0; -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE struct kbase_device *kbdev = dev_get_drvdata(&df->dev); #else struct kbase_device *kbdev = kbase_find_device(-1); @@ -541,16 +505,55 @@ static unsigned long kbase_get_static_power(unsigned long voltage) mutex_unlock(&kbdev->ipa.lock); -#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) +#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) kbase_release_device(kbdev); #endif return power; } +#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ + +/** + * opp_translate_freq_voltage() - Translate nominal OPP frequency from + * devicetree into the real frequency for + * top-level and shader cores. + * @kbdev: Device pointer + * @nominal_freq: Nominal frequency in Hz. + * @nominal_voltage: Nominal voltage, in mV. + * @freqs: Pointer to array of real frequency values. + * @volts: Pointer to array of voltages. + * + * If there are 2 clock domains, then top-level and shader cores can operate + * at different frequency and voltage level. The nominal frequency ("opp-hz") + * used by devfreq from the devicetree may not be same as the real frequency + * at which top-level and shader cores are operating, so a translation is + * needed. + * Nominal voltage shall always be same as the real voltage for top-level. + */ +static void opp_translate_freq_voltage(struct kbase_device *kbdev, + unsigned long nominal_freq, + unsigned long nominal_voltage, + unsigned long *freqs, + unsigned long *volts) +{ + u64 core_mask; + + kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask, + freqs, volts); + CSTD_UNUSED(core_mask); + + if (kbdev->nr_clocks == 1) { + freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; + volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; + } +} -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE static unsigned long kbase_get_dynamic_power(struct devfreq *df, unsigned long freq, unsigned long voltage) @@ -560,10 +563,13 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, #endif { struct kbase_ipa_model *model; - u32 power_coeff = 0, power = 0; + unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + u32 power = 0; int err = 0; -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE struct kbase_device *kbdev = dev_get_drvdata(&df->dev); #else struct kbase_device *kbdev = kbase_find_device(-1); @@ -576,34 +582,53 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq, model = kbdev->ipa.fallback_model; - err = model->ops->get_dynamic_coeff(model, &power_coeff); - - if (!err) - power = kbase_scale_dynamic_power(power_coeff, freq, voltage); - else + err = model->ops->get_dynamic_coeff(model, power_coeffs); + + if (!err) { + opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); + + power = kbase_scale_dynamic_power( + power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + + /* Here unlike kbase_get_real_power(), shader core frequency is + * used for the scaling as simple power model is used to obtain + * the value of dynamic coefficient (which is is a fixed value + * retrieved from the device tree). + */ + power += kbase_scale_dynamic_power( + power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], + freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], + volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + } else dev_err_ratelimited(kbdev->dev, "Model %s returned error code %d\n", model->ops->name, err); mutex_unlock(&kbdev->ipa.lock); -#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) +#if !(defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) kbase_release_device(kbdev); #endif return power; } +#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, unsigned long freq, unsigned long voltage) { struct kbase_ipa_model *model; - u32 power_coeff = 0; - int err = 0; + unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; + u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0}; struct kbasep_pm_metrics diff; u64 total_time; + bool skip_utilization_scaling = false; + int err = 0; lockdep_assert_held(&kbdev->ipa.lock); @@ -611,30 +636,62 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, model = get_current_model(kbdev); - err = model->ops->get_dynamic_coeff(model, &power_coeff); + err = model->ops->get_dynamic_coeff(model, power_coeffs); /* If the counter model returns an error (e.g. switching back to * protected mode and failing to read counters, or a counter sample * with too few cycles), revert to the fallback model. */ if (err && model != kbdev->ipa.fallback_model) { + /* No meaningful scaling for GPU utilization can be done if + * the sampling interval was too long. This is equivalent to + * assuming GPU was busy throughout (similar to what is done + * during protected mode). + */ + if (err == -EOVERFLOW) + skip_utilization_scaling = true; + model = kbdev->ipa.fallback_model; - err = model->ops->get_dynamic_coeff(model, &power_coeff); + err = model->ops->get_dynamic_coeff(model, power_coeffs); } - if (err) + if (WARN_ON(err)) return err; - *power = kbase_scale_dynamic_power(power_coeff, freq, voltage); + opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts); - /* time_busy / total_time cannot be >1, so assigning the 64-bit - * result of div_u64 to *power cannot overflow. - */ - total_time = diff.time_busy + (u64) diff.time_idle; - *power = div_u64(*power * (u64) diff.time_busy, - max(total_time, 1ull)); + *power = kbase_scale_dynamic_power( + power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL], + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); + + if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) { + unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]; - *power += get_static_power_locked(kbdev, model, voltage); + /* As per the HW team, the top-level frequency needs to be used + * for the scaling if the counter based model was used as + * counter values are normalized with the GPU_ACTIVE counter + * value, which increments at the rate of top-level frequency. + */ + if (model != kbdev->ipa.fallback_model) + freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]; + + *power += kbase_scale_dynamic_power( + power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES], + freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]); + } + + if (!skip_utilization_scaling) { + /* time_busy / total_time cannot be >1, so assigning the 64-bit + * result of div_u64 to *power cannot overflow. + */ + total_time = diff.time_busy + (u64) diff.time_idle; + *power = div_u64(*power * (u64) diff.time_busy, + max(total_time, 1ull)); + } + + *power += get_static_power_locked(kbdev, model, + volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]); return err; } @@ -658,18 +715,42 @@ int kbase_get_real_power(struct devfreq *df, u32 *power, } KBASE_EXPORT_TEST_API(kbase_get_real_power); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -struct devfreq_cooling_ops kbase_ipa_power_model_ops = { -#else struct devfreq_cooling_power kbase_ipa_power_model_ops = { -#endif -#ifdef CONFIG_MALI_DEVFREQ +#if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE .get_static_power = &kbase_get_static_power, .get_dynamic_power = &kbase_get_dynamic_power, -#endif -#if defined(CONFIG_MALI_PWRSOFT_765) || \ - LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) +#endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */ +#if defined(CONFIG_MALI_PWRSOFT_765) || \ + KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE .get_real_power = &kbase_get_real_power, #endif }; KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops); + +void kbase_ipa_reset_data(struct kbase_device *kbdev) +{ + ktime_t now, diff; + s64 elapsed_time; + + mutex_lock(&kbdev->ipa.lock); + + now = ktime_get(); + diff = ktime_sub(now, kbdev->ipa.last_sample_time); + elapsed_time = ktime_to_ms(diff); + + if (elapsed_time > RESET_INTERVAL_MS) { + struct kbasep_pm_metrics diff; + struct kbase_ipa_model *model; + + kbase_pm_get_dvfs_metrics( + kbdev, &kbdev->ipa.last_metrics, &diff); + + model = get_current_model(kbdev); + if (model != kbdev->ipa.fallback_model) + model->ops->reset_counter_data(model); + + kbdev->ipa.last_sample_time = ktime_get(); + } + + mutex_unlock(&kbdev->ipa.lock); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h index 92aace9..c668af9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_IPA_H_ @@ -27,6 +26,20 @@ struct devfreq; +/** + * enum kbase_ipa_block_type - Type of block for which power estimation is done. + * + * @KBASE_IPA_BLOCK_TYPE_TOP_LEVEL: Top-level block, that covers CSHW, + * MEMSYS, Tiler. + * @KBASE_IPA_BLOCK_TYPE_SHADER_CORES: All Shader cores. + * @KBASE_IPA_BLOCK_TYPE_NUM: Number of blocks. + */ +enum kbase_ipa_block_type { + KBASE_IPA_BLOCK_TYPE_TOP_LEVEL, + KBASE_IPA_BLOCK_TYPE_SHADER_CORES, + KBASE_IPA_BLOCK_TYPE_NUM +}; + /** * struct kbase_ipa_model - Object describing a particular IPA model. * @kbdev: pointer to kbase device @@ -89,7 +102,8 @@ struct kbase_ipa_model_ops { int (*init)(struct kbase_ipa_model *model); /* Called immediately after init(), or when a parameter is changed, so * that any coefficients derived from model parameters can be - * recalculated. */ + * recalculated + */ int (*recalculate)(struct kbase_ipa_model *model); void (*term)(struct kbase_ipa_model *model); /* @@ -101,7 +115,9 @@ struct kbase_ipa_model_ops { * is then scaled by the IPA framework according to the current OPP's * frequency and voltage. * - * Return: 0 on success, or an error code. + * Return: 0 on success, or an error code. -EOVERFLOW error code will + * indicate that sampling interval was too large and no meaningful + * scaling for GPU utiliation can be done. */ int (*get_dynamic_coeff)(struct kbase_ipa_model *model, u32 *coeffp); /* @@ -115,6 +131,18 @@ struct kbase_ipa_model_ops { * Return: 0 on success, or an error code. */ int (*get_static_coeff)(struct kbase_ipa_model *model, u32 *coeffp); + + /* + * reset_counter_data() - Reset the HW counter data used for calculating + * dynamic power coefficient + * @model: pointer to model + * + * This method is currently applicable only to the counter based model. + * The next call to get_dynamic_coeff() will have to calculate the + * dynamic power coefficient based on the HW counter data generated + * from this point onwards. + */ + void (*reset_counter_data)(struct kbase_ipa_model *model); }; /** @@ -163,6 +191,17 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model); const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev, const char *name); +/** + * kbase_ipa_counter_model_ops_find - Lookup an IPA counter model using its name + * @kbdev: pointer to kbase device + * @name: name of counter model to lookup + * + * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup + * failed. + */ +const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find( + struct kbase_device *kbdev, const char *name); + /** * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID * @gpu_id: GPU ID of GPU the model will be used for @@ -172,6 +211,16 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device * */ const char *kbase_ipa_model_name_from_id(u32 gpu_id); +/** + * kbase_ipa_counter_model_name_from_id - Find the best counter model for a + * given GPU ID + * @gpu_id: GPU ID of GPU the counter model will be used for + * + * Return: The name of the appropriate counter-based model, or NULL if the + * no counter model exists. + */ +const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id); + /** * kbase_ipa_init_model - Initilaize the particular IPA model * @kbdev: pointer to kbase device @@ -183,7 +232,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id); * Return: pointer to kbase_ipa_model on success, NULL on error */ struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev, - const struct kbase_ipa_model_ops *ops); + const struct kbase_ipa_model_ops *ops); /** * kbase_ipa_term_model - Terminate the particular IPA model * @model: pointer to the IPA model object, already initialized @@ -202,16 +251,6 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model); */ void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev); -extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops; -extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops; - /** * kbase_get_real_power() - get the real power consumption of the GPU * @df: dynamic voltage and frequency scaling information for the GPU. @@ -237,11 +276,20 @@ int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power, unsigned long voltage); #endif /* MALI_UNIT_TEST */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -extern struct devfreq_cooling_ops kbase_ipa_power_model_ops; -#else extern struct devfreq_cooling_power kbase_ipa_power_model_ops; -#endif + +/** + * kbase_ipa_reset_data() - Reset the data required for power estimation. + * @kbdev: Pointer to kbase device. + * + * This function is called to ensure a meaningful baseline for + * kbase_get_real_power(), when thermal governor starts the polling, and + * that is achieved by updating the GPU utilization metrics and retrieving + * the accumulated value of HW counters. + * Basically this function collects all the data required for power estimation + * but does not process it. + */ +void kbase_ipa_reset_data(struct kbase_device *kbdev); #else /* !(defined(CONFIG_MALI_DEVFREQ) && defined(CONFIG_DEVFREQ_THERMAL)) */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c index 30a3b7d..5976389 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -28,7 +27,7 @@ #include "mali_kbase_ipa.h" #include "mali_kbase_ipa_debugfs.h" -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)) +#if (KERNEL_VERSION(4, 7, 0) > LINUX_VERSION_CODE) #define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE #endif @@ -160,7 +159,8 @@ int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, return -ENOMEM; /* 'name' is stack-allocated for array elements, so copy it into - * heap-allocated storage */ + * heap-allocated storage + */ param->name = kstrdup(name, GFP_KERNEL); if (!param->name) { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h index a983d9c..f690367 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_IPA_DEBUGFS_H_ @@ -28,7 +27,7 @@ enum kbase_ipa_model_param_type { PARAM_TYPE_STRING, }; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) void kbase_ipa_debugfs_init(struct kbase_device *kbdev); int kbase_ipa_model_param_add(struct kbase_ipa_model *model, const char *name, @@ -63,6 +62,9 @@ static inline int kbase_ipa_model_param_add(struct kbase_ipa_model *model, static inline void kbase_ipa_model_param_free_all(struct kbase_ipa_model *model) { } +static inline void kbase_ipa_model_param_set_s32(struct kbase_ipa_model *model, + const char *name, s32 val) +{ } #endif /* CONFIG_DEBUG_FS */ #endif /* _KBASE_IPA_DEBUGFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c index 852559e..55f1d1c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include -#ifdef CONFIG_DEVFREQ_THERMAL +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) #include #endif #include @@ -34,20 +33,18 @@ #include "mali_kbase_ipa_simple.h" #include "mali_kbase_ipa_debugfs.h" -#if MALI_UNIT_TEST - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) -static unsigned long dummy_temp; +/* This is used if the dynamic power for top-level is estimated separately + * through the counter model. To roughly match the contribution of top-level + * power in the total dynamic power, when calculated through counter model, + * this scalar is used for the dynamic coefficient specified in the device tree + * for simple power model. This value was provided by the HW team after + * taking all the power data collected and dividing top level power by shader + * core power and then averaging it across all samples. + */ +#define TOP_LEVEL_DYN_COEFF_SCALER (3) -static int kbase_simple_power_model_get_dummy_temp( - struct thermal_zone_device *tz, - unsigned long *temp) -{ - *temp = READ_ONCE(dummy_temp); - return 0; -} +#if MALI_UNIT_TEST -#else static int dummy_temp; static int kbase_simple_power_model_get_dummy_temp( @@ -57,7 +54,6 @@ static int kbase_simple_power_model_get_dummy_temp( *temp = READ_ONCE(dummy_temp); return 0; } -#endif /* Intercept calls to the kernel function using a macro */ #ifdef thermal_zone_get_temp @@ -143,16 +139,13 @@ static u32 calculate_temp_scaling_factor(s32 ts[4], s64 t) /* We can't call thermal_zone_get_temp() directly in model_static_coeff(), * because we don't know if tz->lock is held in the same thread. So poll it in - * a separate thread to get around this. */ + * a separate thread to get around this. + */ static int poll_temperature(void *data) { struct kbase_ipa_model_simple_data *model_data = (struct kbase_ipa_model_simple_data *) data; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) - unsigned long temp; -#else int temp; -#endif while (!kthread_should_stop()) { struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz); @@ -208,7 +201,21 @@ static int model_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp) struct kbase_ipa_model_simple_data *model_data = (struct kbase_ipa_model_simple_data *) model->model_data; +#if MALI_USE_CSF + /* On CSF GPUs, the dynamic power for top-level and shader cores is + * estimated separately. Currently there is a single dynamic + * coefficient value provided in the device tree for simple model. + * As per the discussion with HW team the coefficient value needs to + * be scaled down for top-level to limit its contribution in the + * total dyanmic power. + */ + coeffp[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = + model_data->dynamic_coefficient / TOP_LEVEL_DYN_COEFF_SCALER; + coeffp[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = + model_data->dynamic_coefficient; +#else *coeffp = model_data->dynamic_coefficient; +#endif return 0; } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h index fed67d5..fb174e2 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_IPA_SIMPLE_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h index aac561b..c490f1c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Definitions (types, defines, etcs) specific to Job Manager Kbase. * They are placed here to allow the hierarchy of header files to work. @@ -129,7 +126,7 @@ /* Reset the GPU after each atom completion */ #define KBASE_SERIALIZE_RESET (1 << 2) -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) /** * struct base_job_fault_event - keeps track of the atom which faulted or which * completed after the faulty atom but before the @@ -409,6 +406,16 @@ struct kbase_ext_res { * sync through soft jobs and for the implicit * synchronization required on access to external * resources. + * @dma_fence.fence_in: Input fence + * @dma_fence.fence: Points to the dma-buf output fence for this atom. + * @dma_fence.context: The dma-buf fence context number for this atom. A + * unique context number is allocated to each katom in + * the context on context creation. + * @dma_fence.seqno: The dma-buf fence sequence number for this atom. This + * is increased every time this katom uses dma-buf fence + * @dma_fence.callbacks: List of all callbacks set up to wait on other fences + * @dma_fence.dep_count: Atomic counter of number of outstandind dma-buf fence + * dependencies for this atom. * @event_code: Event code for the job chain represented by the atom, * both HW and low-level SW events are represented by * event codes. @@ -443,6 +450,8 @@ struct kbase_ext_res { * @blocked: flag indicating that atom's resubmission to GPU is * blocked till the work item is scheduled to return the * atom to JS. + * @seq_nr: user-space sequence number, to order atoms in some + * temporal order * @pre_dep: Pointer to atom that this atom has same-slot * dependency on * @post_dep: Pointer to atom that has same-slot dependency on @@ -477,11 +486,19 @@ struct kbase_ext_res { * when transitioning into or out of protected mode. * Atom will be either entering or exiting the * protected mode. + * @protected_state.enter: entering the protected mode. + * @protected_state.exit: exiting the protected mode. * @runnable_tree_node: The node added to context's job slot specific rb tree * when the atom becomes runnable. * @age: Age of atom relative to other atoms in the context, * is snapshot of the age_count counter in kbase * context. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @renderpass_id:Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @jc_fragment: Set of GPU fragment job chains + * @retry_count: TODO: Not used,to be removed */ struct kbase_jd_atom { struct work_struct work; @@ -496,9 +513,9 @@ struct kbase_jd_atom { struct list_head jd_item; bool in_jd_list; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE u8 jit_ids[2]; -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ u16 nr_extres; struct kbase_ext_res *extres; @@ -516,7 +533,6 @@ struct kbase_jd_atom { * when working with this sub struct */ #if defined(CONFIG_SYNC_FILE) - /* Input fence */ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence_in; #else @@ -539,14 +555,7 @@ struct kbase_jd_atom { #else struct dma_fence *fence; #endif - /* The dma-buf fence context number for this atom. A unique - * context number is allocated to each katom in the context on - * context creation. - */ unsigned int context; - /* The dma-buf fence sequence number for this atom. This is - * increased every time this katom uses dma-buf fence. - */ atomic_t seqno; /* This contains a list of all callbacks set up to wait on * other fences. This atom must be held back from JS until all @@ -593,7 +602,7 @@ struct kbase_jd_atom { wait_queue_head_t completed; enum kbase_jd_atom_state status; -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) int work_id; #endif int slot_nr; @@ -608,6 +617,8 @@ struct kbase_jd_atom { atomic_t blocked; + u64 seq_nr; + struct kbase_jd_atom *pre_dep; struct kbase_jd_atom *post_dep; @@ -616,7 +627,7 @@ struct kbase_jd_atom { u32 flush_id; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) struct base_job_fault_event fault_event; #endif struct list_head queue; @@ -778,6 +789,7 @@ struct kbase_jd_renderpass { * @jit_pending_alloc: A list of just-in-time memory allocation * soft-jobs which will be reattempted after the * impending free of other active allocations. + * @max_priority: Max priority level allowed for this context. */ struct kbase_jd_context { struct mutex lock; @@ -792,12 +804,13 @@ struct kbase_jd_context { u32 job_nr; size_t tb_wrap_offset; -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) atomic_t work_id; #endif struct list_head jit_atoms_head; struct list_head jit_pending_alloc; + int max_priority; }; /** @@ -815,4 +828,27 @@ struct jsctx_queue { struct list_head x_dep_head; }; +/** + * struct kbase_as - Object representing an address space of GPU. + * @number: Index at which this address space structure is present + * in an array of address space structures embedded inside + * the &struct kbase_device. + * @pf_wq: Workqueue for processing work items related to + * Page fault and Bus fault handling. + * @work_pagefault: Work item for the Page fault handling. + * @work_busfault: Work item for the Bus fault handling. + * @pf_data: Data relating to Page fault. + * @bf_data: Data relating to Bus fault. + * @current_setup: Stores the MMU configuration for this address space. + */ +struct kbase_as { + int number; + struct workqueue_struct *pf_wq; + struct work_struct work_pagefault; + struct work_struct work_busfault; + struct kbase_fault pf_data; + struct kbase_fault bf_data; + struct kbase_mmu_setup current_setup; +}; + #endif /* _KBASE_JM_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h index 6c222ce..5e0c4bc 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_jm_js.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -32,6 +31,7 @@ /** * kbasep_js_devdata_init - Initialize the Job Scheduler + * @kbdev: The kbase_device to operate on * * The struct kbasep_js_device_data sub-structure of kbdev must be zero * initialized before passing to the kbasep_js_devdata_init() function. This is @@ -41,6 +41,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev); /** * kbasep_js_devdata_halt - Halt the Job Scheduler. + * @kbdev: The kbase_device to operate on * * It is safe to call this on kbdev even if it the kbasep_js_device_data * sub-structure was never initialized/failed initialization, to give efficient @@ -58,6 +59,7 @@ void kbasep_js_devdata_halt(struct kbase_device *kbdev); /** * kbasep_js_devdata_term - Terminate the Job Scheduler + * @kbdev: The kbase_device to operate on * * It is safe to call this on kbdev even if it the kbasep_js_device_data * sub-structure was never initialized/failed initialization, to give efficient @@ -75,6 +77,7 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev); /** * kbasep_js_kctx_init - Initialize the Scheduling Component of a * struct kbase_context on the Job Scheduler. + * @kctx: The kbase_context to operate on * * This effectively registers a struct kbase_context with a Job Scheduler. * @@ -89,6 +92,7 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx); /** * kbasep_js_kctx_term - Terminate the Scheduling Component of a * struct kbase_context on the Job Scheduler + * @kctx: The kbase_context to operate on * * This effectively de-registers a struct kbase_context from its Job Scheduler * @@ -108,6 +112,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx); * kbasep_js_add_job - Add a job chain to the Job Scheduler, * and take necessary actions to * schedule the context/run the job. + * @kctx: The kbase_context to operate on + * @atom: Atom to add * * This atomically does the following: * * Update the numbers of jobs information @@ -151,7 +157,10 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); /** * kbasep_js_remove_job - Remove a job chain from the Job Scheduler, * except for its 'retained state'. - * + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @atom: Atom to remove +* * Completely removing a job requires several calls: * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of * the atom @@ -185,6 +194,9 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, * kbasep_js_remove_cancelled_job - Completely remove a job chain from the * Job Scheduler, in the case * where the job chain was cancelled. + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom: Atom to remove * * This is a variant of kbasep_js_remove_job() that takes care of removing all * of the retained state too. This is generally useful for cancelled atoms, @@ -215,6 +227,9 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * kbasep_js_runpool_requeue_or_kill_ctx - Handling the requeuing/killing of a * context that was evicted from the * policy queue or runpool. + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @has_pm_ref: tells whether to release Power Manager active reference * * This should be used whenever handing off a context that has been evicted * from the policy queue or the runpool: @@ -242,6 +257,8 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, /** * kbasep_js_runpool_release_ctx - Release a refcount of a context being busy, * allowing it to be scheduled out. + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on * * When the refcount reaches zero and the context might be scheduled out * (depending on whether the Scheduling Policy has deemed it so, or if it has @@ -296,6 +313,9 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of * kbasep_js_runpool_release_ctx() that handles additional * actions from completing an atom. + * @kbdev: KBase device + * @kctx: KBase context + * @katom_retained_state: Retained state from the atom * * This is usually called as part of completing an atom and releasing the * refcount on the context held by the atom. @@ -315,8 +335,12 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( struct kbasep_js_atom_retained_state *katom_retained_state); /** - * kbasep_js_runpool_release_ctx_nolock - Variant of - * kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_runpool_release_ctx_nolock - + * Variant of kbase_js_runpool_release_ctx() w/out locks + * @kbdev: KBase device + * @kctx: KBase context + * + * Variant of kbase_js_runpool_release_ctx() that assumes that * kbasep_js_device_data::runpool_mutex and * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not * attempt to schedule new contexts. @@ -326,6 +350,8 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, /** * kbasep_js_schedule_privileged_ctx - Schedule in a privileged context + * @kbdev: KBase device + * @kctx: KBase context * * This schedules a context in regardless of the context priority. * If the runpool is full, a context will be forced out of the runpool and the @@ -351,6 +377,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, /** * kbasep_js_release_privileged_ctx - Release a privileged context, * allowing it to be scheduled out. + * @kbdev: KBase device + * @kctx: KBase context * * See kbasep_js_runpool_release_ctx for potential side effects. * @@ -368,6 +396,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, /** * kbase_js_try_run_jobs - Try to submit the next job on each slot + * @kbdev: KBase device * * The following locks may be used: * * kbasep_js_device_data::runpool_mutex @@ -378,6 +407,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev); /** * kbasep_js_suspend - Suspend the job scheduler during a Power Management * Suspend event. + * @kbdev: KBase device * * Causes all contexts to be removed from the runpool, and prevents any * contexts from (re)entering the runpool. @@ -401,6 +431,7 @@ void kbasep_js_suspend(struct kbase_device *kbdev); /** * kbasep_js_resume - Resume the Job Scheduler after a Power Management * Resume event. + * @kbdev: KBase device * * This restores the actions from kbasep_js_suspend(): * * Schedules contexts back into the runpool @@ -412,7 +443,7 @@ void kbasep_js_resume(struct kbase_device *kbdev); * kbase_js_dep_resolved_submit - Submit an atom to the job scheduler. * * @kctx: Context pointer - * @atom: Pointer to the atom to submit + * @katom: Pointer to the atom to submit * * The atom is enqueued on the context's ringbuffer. The caller must have * ensured that all dependencies can be represented in the ringbuffer. @@ -457,7 +488,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); * kbase_js_unpull - Return an atom to the job scheduler ringbuffer. * * @kctx: Context pointer - * @atom: Pointer to the atom to unpull + * @katom: Pointer to the atom to unpull * * An atom is 'unpulled' if execution is stopped but intended to be returned to * later. The most common reason for this is that the atom has been @@ -584,7 +615,6 @@ void kbase_js_set_timeouts(struct kbase_device *kbdev); */ void kbase_js_set_ctx_priority(struct kbase_context *kctx, int new_priority); - /** * kbase_js_update_ctx_priority - update the context priority * @@ -603,6 +633,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx); /** * kbasep_js_is_submit_allowed - Check that a context is allowed to submit * jobs on this policy + * @js_devdata: KBase Job Scheduler Device Data + * @kctx: KBase context * * The purpose of this abstraction is to hide the underlying data size, * and wrap up the long repeated line of code. @@ -625,13 +657,15 @@ static inline bool kbasep_js_is_submit_allowed( test_bit = (u16) (1u << kctx->as_nr); is_allowed = (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); - dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: submit %s allowed on %pK (as=%d)", is_allowed ? "is" : "isn't", (void *)kctx, kctx->as_nr); return is_allowed; } /** * kbasep_js_set_submit_allowed - Allow a context to submit jobs on this policy + * @js_devdata: KBase Job Scheduler Device Data + * @kctx: KBase context * * The purpose of this abstraction is to hide the underlying data size, * and wrap up the long repeated line of code. @@ -650,7 +684,7 @@ static inline void kbasep_js_set_submit_allowed( set_bit = (u16) (1u << kctx->as_nr); - dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed |= set_bit; @@ -659,6 +693,8 @@ static inline void kbasep_js_set_submit_allowed( /** * kbasep_js_clear_submit_allowed - Prevent a context from submitting more * jobs on this policy + * @js_devdata: KBase Job Scheduler Device Data + * @kctx: KBase context * * The purpose of this abstraction is to hide the underlying data size, * and wrap up the long repeated line of code. @@ -679,13 +715,17 @@ static inline void kbasep_js_clear_submit_allowed( clear_bit = (u16) (1u << kctx->as_nr); clear_mask = ~clear_bit; - dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %pK (as=%d)", kctx, kctx->as_nr); js_devdata->runpool_irq.submit_allowed &= clear_mask; } /** + * kbasep_js_atom_retained_state_init_invalid - + * Create an initial 'invalid' atom retained state + * @retained_state: pointer where to create and initialize the state + * * Create an initial 'invalid' atom retained state, that requires no * atom-related work to be done on releasing with * kbasep_js_runpool_release_ctx_and_katom_retained_state() @@ -699,6 +739,10 @@ static inline void kbasep_js_atom_retained_state_init_invalid( } /** + * kbasep_js_atom_retained_state_copy() - Copy atom state + * @retained_state: where to copy + * @katom: where to copy from + * * Copy atom state that can be made available after jd_done_nolock() is called * on that atom. */ @@ -743,7 +787,7 @@ static inline bool kbasep_js_has_atom_finished( * kbasep_js_atom_retained_state_is_valid - Determine whether a struct * kbasep_js_atom_retained_state * is valid - * @katom_retained_state the atom's retained state to check + * @katom_retained_state: the atom's retained state to check * * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates * that the code should just ignore it. @@ -759,6 +803,8 @@ static inline bool kbasep_js_atom_retained_state_is_valid( /** * kbase_js_runpool_inc_context_count - Increment number of running contexts. + * @kbdev: KBase device + * @kctx: KBase context * * The following locking conditions are made on the caller: * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. @@ -795,6 +841,8 @@ static inline void kbase_js_runpool_inc_context_count( /** * kbase_js_runpool_dec_context_count - decrement number of running contexts. * + * @kbdev: KBase device + * @kctx: KBase context * The following locking conditions are made on the caller: * * The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. * * The caller must hold the kbasep_js_device_data::runpool_mutex @@ -889,4 +937,17 @@ static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) return kbasep_js_relative_priority_to_atom[prio_idx]; } +/** + * kbase_js_priority_check - Check the priority requested + * + * @kbdev: Device pointer + * @priority: Requested priority + * + * This will determine whether the requested priority can be satisfied. + * + * Return: The same or lower priority than requested. + */ + +base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority); + #endif /* _KBASE_JM_JS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h index 0b48615..75152fb 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/jm/mali_kbase_js_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,33 +17,15 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /** - * @file mali_kbase_js.h - * Job Scheduler Type Definitions + * DOC: Job Scheduler Type Definitions */ #ifndef _KBASE_JS_DEFS_H_ #define _KBASE_JS_DEFS_H_ -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js - * @{ - */ /* Forward decls */ struct kbase_device; struct kbase_jd_atom; @@ -50,11 +33,14 @@ struct kbase_jd_atom; typedef u32 kbase_context_flags; -/** Callback function run on all of a context's jobs registered with the Job - * Scheduler */ -typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +/* + * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's + * jobs registered with the Job Scheduler + */ +typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); -/** +/* * @brief Maximum number of jobs that can be submitted to a job slot whilst * inside the IRQ handler. * @@ -65,7 +51,15 @@ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 /** - * @brief Context attributes + * enum kbasep_js_ctx_attr - Context attributes + * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains + * Compute jobs. + * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains + * Non-Compute jobs. + * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context + * contains compute-job atoms that aren't restricted to a coherent group, + * and can run on all cores. + * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum * * Each context attribute can be thought of as a boolean value that caches some * state information about either the runpool, or the context: @@ -82,61 +76,70 @@ typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd * - The runpool holds a refcount of how many contexts in the runpool have this * attribute. * - The context holds a refcount of how many atoms have this attribute. + * + * KBASEP_JS_CTX_ATTR_COMPUTE: + * Attribute indicating a context that contains Compute jobs. That is, + * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + * + * KBASEP_JS_CTX_ATTR_NON_COMPUTE: + * Attribute indicating a context that contains Non-Compute jobs. That is, + * the context has some jobs that are \b not of type @ref + * BASE_JD_REQ_ONLY_COMPUTE. + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + * + * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: + * Attribute indicating that a context contains compute-job atoms that + * aren't restricted to a coherent group, and can run on all cores. + * + * Specifically, this is when the atom's \a core_req satisfy: + * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 + * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups + * + * Such atoms could be blocked from running if one of the coherent groups + * is being used by another job slot, so tracking this context attribute + * allows us to prevent such situations. + * + * @note This doesn't take into account the 1-coregroup case, where all + * compute atoms would effectively be able to run on 'all cores', but + * contexts will still not always get marked with this attribute. Instead, + * it is the caller's responsibility to take into account the number of + * coregroups when interpreting this attribute. + * + * @note Whilst Tiler atoms are normally combined with + * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without + * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy + * enough to handle anyway. + * + * */ enum kbasep_js_ctx_attr { - /** Attribute indicating a context that contains Compute jobs. That is, - * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE - * - * @note A context can be both 'Compute' and 'Non Compute' if it contains - * both types of jobs. - */ KBASEP_JS_CTX_ATTR_COMPUTE, - - /** Attribute indicating a context that contains Non-Compute jobs. That is, - * the context has some jobs that are \b not of type @ref - * BASE_JD_REQ_ONLY_COMPUTE. - * - * @note A context can be both 'Compute' and 'Non Compute' if it contains - * both types of jobs. - */ KBASEP_JS_CTX_ATTR_NON_COMPUTE, - - /** Attribute indicating that a context contains compute-job atoms that - * aren't restricted to a coherent group, and can run on all cores. - * - * Specifically, this is when the atom's \a core_req satisfy: - * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 - * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups - * - * Such atoms could be blocked from running if one of the coherent groups - * is being used by another job slot, so tracking this context attribute - * allows us to prevent such situations. - * - * @note This doesn't take into account the 1-coregroup case, where all - * compute atoms would effectively be able to run on 'all cores', but - * contexts will still not always get marked with this attribute. Instead, - * it is the caller's responsibility to take into account the number of - * coregroups when interpreting this attribute. - * - * @note Whilst Tiler atoms are normally combined with - * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without - * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy - * enough to handle anyway. - */ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, - - /** Must be the last in the enum */ KBASEP_JS_CTX_ATTR_COUNT }; enum { - /** Bit indicating that new atom should be started because this atom completed */ + /* + * Bit indicating that new atom should be started because this atom + * completed + */ KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), - /** Bit indicating that the atom was evicted from the JS_NEXT registers */ + /* + * Bit indicating that the atom was evicted from the JS_NEXT registers + */ KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) }; -/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ +/** + * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...> + * bits + */ typedef u32 kbasep_js_atom_done_code; /* @@ -168,7 +171,9 @@ enum { * Internal atom priority defines for kbase_jd_atom::sched_prio */ enum { - KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, + KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, + KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, + KBASE_JS_ATOM_SCHED_PRIO_HIGH, KBASE_JS_ATOM_SCHED_PRIO_MED, KBASE_JS_ATOM_SCHED_PRIO_LOW, KBASE_JS_ATOM_SCHED_PRIO_COUNT, @@ -183,7 +188,70 @@ enum { #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED /** - * @brief KBase Device Data Job Scheduler sub-structure + * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure + * @runpool_irq: Sub-structure to collect together Job Scheduling data used in + * IRQ context. The hwaccess_lock must be held when accessing. + * @runpool_irq.submit_allowed: Bitvector indicating whether a currently + * scheduled context is allowed to submit jobs. When bit 'N' is set in + * this, it indicates whether the context bound to address space 'N' is + * allowed to submit jobs. + * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters: + * Each is large enough to hold a refcount of the number of contexts + * that can fit into the runpool. This is currently BASE_MAX_NR_AS. + * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store + * the refcount. Hence, it's not worthwhile reducing this to + * bit-manipulation on u32s to save space (where in contrast, 4 bit + * sub-fields would be easy to do and would save space). + * Whilst this must not become negative, the sign bit is used for: + * - error detection in debug builds + * - Optimization: it is undefined for a signed int to overflow, and so + * the compiler can optimize for that never happening (thus, no masking + * is required on updating the variable) + * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector + * to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n' + * is using core i (i.e. slot_affinity_refcount[n][i] > 0) + * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned + * by each slot. Used to generate the slot_affinities array of bitvectors. + * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, + * because it is refcounted only when a job is definitely about to be + * submitted to a slot, and is de-refcounted immediately after a job + * finishes + * @schedule_sem: Scheduling semaphore. This must be held when calling + * kbase_jm_kick() + * @ctx_list_pullable: List of contexts that can currently be pulled from + * @ctx_list_unpullable: List of contexts that can not currently be pulled + * from, but have jobs currently running. + * @nr_user_contexts_running: Number of currently scheduled user contexts + * (excluding ones that are not submitting jobs) + * @nr_all_contexts_running: Number of currently scheduled contexts (including + * ones that are not submitting jobs) + * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber + * @note This is a write-once member, and so no locking is required to + * read + * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS + * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS + * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL + * @hard_stop_ticks_ss: Value for JS_HARD_STOP_TICKS_SS + * @hard_stop_ticks_cl: Value for JS_HARD_STOP_TICKS_CL + * @hard_stop_ticks_dumping: Value for JS_HARD_STOP_TICKS_DUMPING + * @gpu_reset_ticks_ss: Value for JS_RESET_TICKS_SS + * @gpu_reset_ticks_cl: Value for JS_RESET_TICKS_CL + * @gpu_reset_ticks_dumping: Value for JS_RESET_TICKS_DUMPING + * @ctx_timeslice_ns: Value for JS_CTX_TIMESLICE_NS + * @suspended_soft_jobs_list: List of suspended soft jobs + * @softstop_always: Support soft-stop on a single context + * @init_status:The initialized-flag is placed at the end, to avoid + * cache-pollution (we should only be using this during init/term paths). + * @note This is a write-once member, and so no locking is required to + * read + * @nr_contexts_pullable:Number of contexts that can currently be pulled from + * @nr_contexts_runnable:Number of contexts that can either be pulled from or + * arecurrently running + * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT + * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts + * independently of the Run Pool. + * Of course, you don't need the Run Pool lock to access this. + * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool. * * This encapsulates the current context of the Job Scheduler on a particular * device. This context is global to the device, and is not tied to any @@ -191,121 +259,49 @@ enum { * * nr_contexts_running and as_free are optimized for packing together (by making * them smaller types than u32). The operations on them should rarely involve - * masking. The use of signed types for arithmetic indicates to the compiler that - * the value will not rollover (which would be undefined behavior), and so under - * the Total License model, it is free to make optimizations based on that (i.e. - * to remove masking). + * masking. The use of signed types for arithmetic indicates to the compiler + * that the value will not rollover (which would be undefined behavior), and so + * under the Total License model, it is free to make optimizations based on + * that (i.e. to remove masking). */ struct kbasep_js_device_data { - /* Sub-structure to collect together Job Scheduling data used in IRQ - * context. The hwaccess_lock must be held when accessing. */ struct runpool_irq { - /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. - * When bit 'N' is set in this, it indicates whether the context bound to address space - * 'N' is allowed to submit jobs. - */ u16 submit_allowed; - - /** Context Attributes: - * Each is large enough to hold a refcount of the number of contexts - * that can fit into the runpool. This is currently BASE_MAX_NR_AS - * - * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store - * the refcount. Hence, it's not worthwhile reducing this to - * bit-manipulation on u32s to save space (where in contrast, 4 bit - * sub-fields would be easy to do and would save space). - * - * Whilst this must not become negative, the sign bit is used for: - * - error detection in debug builds - * - Optimization: it is undefined for a signed int to overflow, and so - * the compiler can optimize for that never happening (thus, no masking - * is required on updating the variable) */ s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; - - /* - * Affinity management and tracking - */ - /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates - * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; - /** Refcount for each core owned by each slot. Used to generate the - * slot_affinities array of bitvectors - * - * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, - * because it is refcounted only when a job is definitely about to be - * submitted to a slot, and is de-refcounted immediately after a job - * finishes */ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; } runpool_irq; - - /** - * Scheduling semaphore. This must be held when calling - * kbase_jm_kick() - */ struct semaphore schedule_sem; - - /** - * List of contexts that can currently be pulled from - */ - struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - /** - * List of contexts that can not currently be pulled from, but have - * jobs currently running. - */ - struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS][KBASE_JS_ATOM_SCHED_PRIO_COUNT]; - - /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ + struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] + [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] + [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; s8 nr_user_contexts_running; - /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ s8 nr_all_contexts_running; - - /** Core Requirements to match up with base_js_atom's core_req memeber - * @note This is a write-once member, and so no locking is required to read */ base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; - u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ - u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ - u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ - u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ - u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ - u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ - u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ - u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ - u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ - u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ + u32 scheduling_period_ns; + u32 soft_stop_ticks; + u32 soft_stop_ticks_cl; + u32 hard_stop_ticks_ss; + u32 hard_stop_ticks_cl; + u32 hard_stop_ticks_dumping; + u32 gpu_reset_ticks_ss; + u32 gpu_reset_ticks_cl; + u32 gpu_reset_ticks_dumping; + u32 ctx_timeslice_ns; - /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; #ifdef CONFIG_MALI_DEBUG - /* Support soft-stop on a single context */ bool softstop_always; #endif /* CONFIG_MALI_DEBUG */ - - /** The initalized-flag is placed at the end, to avoid cache-pollution (we should - * only be using this during init/term paths). - * @note This is a write-once member, and so no locking is required to read */ int init_status; - - /* Number of contexts that can currently be pulled from */ u32 nr_contexts_pullable; - - /* Number of contexts that can either be pulled from or are currently - * running */ atomic_t nr_contexts_runnable; - - /** Value for JS_SOFT_JOB_TIMEOUT */ atomic_t soft_job_timeout_ms; - - /** - * Queue Lock, used to access the Policy's queue of contexts - * independently of the Run Pool. - * - * Of course, you don't need the Run Pool lock to access this. - */ struct mutex queue_mutex; - - /** + /* * Run Pool mutex, for managing contexts within the runpool. * Unless otherwise specified, you must hold this lock whilst accessing * any members that follow @@ -317,61 +313,59 @@ struct kbasep_js_device_data { }; /** - * @brief KBase Context Job Scheduling information structure + * struct kbasep_js_kctx_info - KBase Context Job Scheduling information + * structure + * @ctx: Job Scheduler Context information sub-structure.Its members are + * accessed regardless of whether the context is: + * - In the Policy's Run Pool + * - In the Policy's Queue + * - Not queued nor in the Run Pool. + * You must obtain the @ctx.jsctx_mutex before accessing any other members + * of this substructure. + * You may not access any of its members from IRQ context. + * @ctx.jsctx_mutex: Job Scheduler Context lock + * @ctx.nr_jobs: Number of jobs ready to run - does \em not include + * the jobs waiting in the dispatcher, and dependency-only + * jobs. See kbase_jd_context::job_nr for such jobs + * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough + * to hold a refcount of the number of atoms on the context. + * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state + * changes. + * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on + * one list per job slot. + * @init_status: The initalized-flag is placed at the end, to avoid + * cache-pollution (we should only be using this during init/term paths) * * This is a substructure in the struct kbase_context that encapsulates all the * scheduling information. */ struct kbasep_js_kctx_info { - - /** - * Job Scheduler Context information sub-structure. These members are - * accessed regardless of whether the context is: - * - In the Policy's Run Pool - * - In the Policy's Queue - * - Not queued nor in the Run Pool. - * - * You must obtain the jsctx_mutex before accessing any other members of - * this substructure. - * - * You may not access any of these members from IRQ context. - */ struct kbase_jsctx { - struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ + struct mutex jsctx_mutex; - /** Number of jobs ready to run - does \em not include the jobs waiting in - * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr - * for such jobs*/ u32 nr_jobs; - - /** Context Attributes: - * Each is large enough to hold a refcount of the number of atoms on - * the context. **/ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; - - /** - * Wait queue to wait for KCTX_SHEDULED flag state changes. - * */ wait_queue_head_t is_scheduled_wait; - - /** Link implementing JS queues. Context can be present on one - * list per job slot - */ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; } ctx; - - /* The initalized-flag is placed at the end, to avoid cache-pollution (we should - * only be using this during init/term paths) */ int init_status; }; -/** Subset of atom state that can be available after jd_done_nolock() is called +/** + * struct kbasep_js_atom_retained_state - Subset of atom state. + * @event_code: to determine whether the atom has finished + * @core_req: core requirements + * @sched_priority: priority + * @device_nr: Core group atom was executed on + * + * Subset of atom state that can be available after jd_done_nolock() is called * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), - * because the original atom could disappear. */ + * because the original atom could disappear. + */ struct kbasep_js_atom_retained_state { - /** Event code - to determine whether the atom has finished */ + /* Event code - to determine whether the atom has finished */ enum base_jd_event_code event_code; - /** core requirements */ + /* core requirements */ base_jd_core_req core_req; /* priority */ int sched_priority; @@ -380,30 +374,23 @@ struct kbasep_js_atom_retained_state { }; -/** +/* * Value signifying 'no retry on a slot required' for: * - kbase_js_atom_retained_state::retry_submit_on_slot * - kbase_jd_atom::retry_submit_on_slot */ #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) -/** - * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. - * - * @see kbase_atom_retained_state_is_valid() +/* + * base_jd_core_req value signifying 'invalid' for a + * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid() */ #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP -/** - * @brief The JS timer resolution, in microseconds - * +/* + * The JS timer resolution, in microseconds * Any non-zero difference in time will be at least this size. */ #define KBASEP_JS_TICK_RESOLUTION_US 1 - - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - -#endif /* _KBASE_JS_DEFS_H_ */ +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index 6885f8d..93cd05f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, @@ -50,12 +49,12 @@ enum base_hw_feature { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, + BASE_HW_FEATURE_ASN_HASH, BASE_HW_FEATURE_END }; @@ -85,7 +84,6 @@ static const enum base_hw_feature base_hw_features_tMIx[] = { BASE_HW_FEATURE_THREAD_GROUP_SPLIT, BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -112,7 +110,6 @@ static const enum base_hw_feature base_hw_features_tHEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -139,7 +136,6 @@ static const enum base_hw_feature base_hw_features_tSIx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -166,7 +162,6 @@ static const enum base_hw_feature base_hw_features_tDVx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_END }; @@ -193,7 +188,6 @@ static const enum base_hw_feature base_hw_features_tNOx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END @@ -222,7 +216,6 @@ static const enum base_hw_feature base_hw_features_tGOx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_TLS_HASHING, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_END @@ -250,7 +243,6 @@ static const enum base_hw_feature base_hw_features_tTRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END @@ -278,7 +270,6 @@ static const enum base_hw_feature base_hw_features_tNAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END @@ -306,14 +297,13 @@ static const enum base_hw_feature base_hw_features_tBEx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tDUx[] = { +static const enum base_hw_feature base_hw_features_tBAx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -335,14 +325,13 @@ static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tODx[] = { +static const enum base_hw_feature base_hw_features_tDUx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -364,13 +353,13 @@ static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, + BASE_HW_FEATURE_IDVS_GROUP_SIZE, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tGRx[] = { +static const enum base_hw_feature base_hw_features_tODx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -392,13 +381,12 @@ static const enum base_hw_feature base_hw_features_tGRx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tVAx[] = { +static const enum base_hw_feature base_hw_features_tGRx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -420,13 +408,12 @@ static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tTUx[] = { +static const enum base_hw_feature base_hw_features_tVAx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_XAFFINITY, @@ -448,39 +435,10 @@ static const enum base_hw_feature base_hw_features_tTUx[] = { BASE_HW_FEATURE_FLUSH_REDUCTION, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, BASE_HW_FEATURE_L2_CONFIG, BASE_HW_FEATURE_CLEAN_ONLY_SAFE, BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tE2x[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_DEBUG_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_AARCH64_MMU, - BASE_HW_FEATURE_IDVS_GROUP_SIZE, - BASE_HW_FEATURE_L2_CONFIG, - BASE_HW_FEATURE_CLEAN_ONLY_SAFE, - BASE_HW_FEATURE_END -}; #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 3966069..beda1e4 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, @@ -59,6 +58,7 @@ enum base_hw_issue { BASE_HW_ISSUE_TTRX_3470, BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_TTRX_3485, + BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_END }; @@ -532,79 +532,89 @@ static const enum base_hw_issue base_hw_issues_lBEx_r1p1[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tBAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tDUx[] = { - BASE_HW_ISSUE_5736, +static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_2968_TTRX_3162, + BASE_HW_ISSUE_TTRX_921, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_TSIX_2033, - BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tODx[] = { +static const enum base_hw_issue base_hw_issues_model_tBAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, + BASE_HW_ISSUE_TTRX_3470, + BASE_HW_ISSUE_TTRX_3464, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_921, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tGRx[] = { +static const enum base_hw_issue base_hw_issues_model_tDUx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_TTRX_3414, + BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tVAx[] = { +static const enum base_hw_issue base_hw_issues_model_tODx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, + BASE_HW_ISSUE_GPU2019_3212, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tTUx[] = { +static const enum base_hw_issue base_hw_issues_model_tGRx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, @@ -612,24 +622,20 @@ static const enum base_hw_issue base_hw_issues_model_tTUx[] = { BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = { +static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_921, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_END }; -static const enum base_hw_issue base_hw_issues_model_tE2x[] = { +static const enum base_hw_issue base_hw_issues_model_tVAx[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, - BASE_HW_ISSUE_TTRX_3414, - BASE_HW_ISSUE_TTRX_3083, BASE_HW_ISSUE_END }; + #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h index 907142d..56db8ca 100755 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #ifndef _KBASE_H_ #define _KBASE_H_ @@ -38,7 +35,7 @@ #include #include #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)) +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) #include #endif #include @@ -46,8 +43,9 @@ #include #include #include +#include -#include "mali_base_kernel.h" +#include #include /* @@ -66,26 +64,40 @@ #include "mali_kbase_gpu_memory_debugfs.h" #include "mali_kbase_mem_profile_debugfs.h" #include "mali_kbase_gpuprops.h" -#include "mali_kbase_ioctl.h" +#include +#if !MALI_USE_CSF #include "mali_kbase_debug_job_fault.h" #include "mali_kbase_jd_debugfs.h" #include "mali_kbase_jm.h" #include "mali_kbase_js.h" +#endif /* !MALI_USE_CSF */ #include "ipa/mali_kbase_ipa.h" -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) #include #endif #include "mali_linux_trace.h" +#if MALI_USE_CSF +#include "csf/mali_kbase_csf.h" +#endif #ifndef u64_to_user_ptr /* Introduced in Linux v4.6 */ #define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x) #endif +#if MALI_USE_CSF +/* Physical memory group ID for CSF user I/O. + */ +#define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT + +/* Physical memory group ID for CSF firmware. + */ +#define KBASE_MEM_GROUP_CSF_FW BASE_MEM_GROUP_DEFAULT +#endif /* Physical memory group ID for a special page which can alias several regions. */ @@ -140,9 +152,9 @@ void kbase_release_device(struct kbase_device *kbdev); * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase * region): * - alignment offset is set to the difference between the kbase region - * extent (converted from the original value in pages to bytes) and the kbase + * extension (converted from the original value in pages to bytes) and the kbase * region initial_commit (also converted from the original value in pages to - * bytes); alignment mask is set to the kbase region extent in bytes and + * bytes); alignment mask is set to the kbase region extension in bytes and * decremented by 1. * * Return: if successful, address of the unmapped area aligned as required; @@ -184,7 +196,7 @@ void kbase_device_pm_term(struct kbase_device *kbdev); int power_control_init(struct kbase_device *kbdev); void power_control_term(struct kbase_device *kbdev); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) void kbase_device_debugfs_term(struct kbase_device *kbdev); int kbase_device_debugfs_init(struct kbase_device *kbdev); #else /* CONFIG_DEBUG_FS */ @@ -201,11 +213,8 @@ void registers_unmap(struct kbase_device *kbdev); int kbase_device_coherency_init(struct kbase_device *kbdev); -#ifdef CONFIG_MALI_BUSLOG -int buslog_init(struct kbase_device *kbdev); -void buslog_term(struct kbase_device *kbdev); -#endif +#if !MALI_USE_CSF int kbase_jd_init(struct kbase_context *kctx); void kbase_jd_exit(struct kbase_context *kctx); @@ -213,9 +222,9 @@ void kbase_jd_exit(struct kbase_context *kctx); * kbase_jd_submit - Submit atoms to the job dispatcher * * @kctx: The kbase context to submit to - * @user_addr: The address in user space of the struct base_jd_atom_v2 array + * @user_addr: The address in user space of the struct base_jd_atom array * @nr_atoms: The number of atoms in the array - * @stride: sizeof(struct base_jd_atom_v2) + * @stride: sizeof(struct base_jd_atom) * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6) * * Return: 0 on success or error code @@ -306,9 +315,12 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); +#endif /* !MALI_USE_CSF */ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); +#if !MALI_USE_CSF int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); +#endif /* !MALI_USE_CSF */ int kbase_event_pending(struct kbase_context *ctx); int kbase_event_init(struct kbase_context *kctx); void kbase_event_close(struct kbase_context *kctx); @@ -372,6 +384,7 @@ static inline void kbase_free_user_buffer( */ int kbase_mem_copy_from_extres(struct kbase_context *kctx, struct kbase_debug_copy_buffer *buf_data); +#if !MALI_USE_CSF int kbase_process_soft_job(struct kbase_jd_atom *katom); int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); @@ -387,18 +400,21 @@ int kbase_soft_event_update(struct kbase_context *kctx, void kbasep_soft_job_timeout_worker(struct timer_list *timer); void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); +#endif /* !MALI_USE_CSF */ void kbasep_as_do_poke(struct work_struct *work); /** * Check whether a system suspend is in progress, or has already been suspended + * @kbdev: The kbase device structure for the device * * The caller should ensure that either kbdev->pm.active_count_lock is held, or * a dmb was executed recently (to ensure the value is most * up-to-date). However, without a lock the value could change afterwards. * - * @return false if a suspend is not in progress - * @return !=false otherwise + * Return: + * * false if a suspend is not in progress + * * !=false otherwise */ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) { @@ -419,7 +435,27 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) */ static inline bool kbase_pm_is_gpu_lost(struct kbase_device *kbdev) { - return kbdev->pm.gpu_lost; + return (atomic_read(&kbdev->pm.gpu_lost) == 0 ? false : true); +} + +/* + * Set or clear gpu lost state + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @gpu_lost: true to activate GPU lost state, FALSE is deactive it + * + * Puts power management code into gpu lost state or takes it out of the + * state. Once in gpu lost state new GPU jobs will no longer be + * scheduled. + */ +static inline void kbase_pm_set_gpu_lost(struct kbase_device *kbdev, + bool gpu_lost) +{ + const int new_val = (gpu_lost ? 1 : 0); + const int cur_val = atomic_xchg(&kbdev->pm.gpu_lost, new_val); + + if (new_val != cur_val) + KBASE_KTRACE_ADD(kbdev, ARB_GPU_LOST, NULL, new_val); } #endif @@ -455,9 +491,12 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev); */ void kbase_pm_metrics_stop(struct kbase_device *kbdev); +#if !MALI_USE_CSF /** * Return the atom's ID, as was originally supplied by userspace in - * base_jd_atom_v2::atom_number + * base_jd_atom::atom_number + * @kctx: KBase context pointer + * @katom: Atome for which to return ID */ static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -484,6 +523,7 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id( { return &kctx->jctx.atoms[id]; } +#endif /* !MALI_USE_CSF */ /** * Initialize the disjoint state @@ -508,7 +548,7 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id( * The disjoint event counter is also incremented immediately whenever a job is soft stopped * and during context creation. * - * @param kbdev The kbase device + * @kbdev: The kbase device * * Return: 0 on success and non-zero value on failure. */ @@ -518,7 +558,7 @@ void kbase_disjoint_init(struct kbase_device *kbdev); * Increase the count of disjoint events * called when a disjoint event has happened * - * @param kbdev The kbase device + * @kbdev: The kbase device */ void kbase_disjoint_event(struct kbase_device *kbdev); @@ -528,14 +568,14 @@ void kbase_disjoint_event(struct kbase_device *kbdev); * This should be called when something happens which could be disjoint if the GPU * is in a disjoint state. The state refcount keeps track of this. * - * @param kbdev The kbase device + * @kbdev: The kbase device */ void kbase_disjoint_event_potential(struct kbase_device *kbdev); /** * Returns the count of disjoint events * - * @param kbdev The kbase device + * @kbdev: The kbase device * @return the count of disjoint events */ u32 kbase_disjoint_event_get(struct kbase_device *kbdev); @@ -547,7 +587,7 @@ u32 kbase_disjoint_event_get(struct kbase_device *kbdev); * eventually after the disjoint state has completed @ref kbase_disjoint_state_down * should be called * - * @param kbdev The kbase device + * @kbdev: The kbase device */ void kbase_disjoint_state_up(struct kbase_device *kbdev); @@ -558,68 +598,43 @@ void kbase_disjoint_state_up(struct kbase_device *kbdev); * * Called after @ref kbase_disjoint_state_up once the disjoint state is over * - * @param kbdev The kbase device + * @kbdev: The kbase device */ void kbase_disjoint_state_down(struct kbase_device *kbdev); /** - * If a job is soft stopped and the number of contexts is >= this value - * it is reported as a disjoint event - */ -#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 - -#if !defined(UINT64_MAX) - #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) -#endif - -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) - -/* kbase_io_history_init - initialize data struct for register access history - * - * @kbdev The register history to initialize - * @n The number of register accesses that the buffer could hold + * kbase_device_pcm_dev_init() - Initialize the priority control manager device * - * @return 0 if successfully initialized, failure otherwise - */ -int kbase_io_history_init(struct kbase_io_history *h, u16 n); - -/* kbase_io_history_term - uninit all resources for the register access history + * @kbdev: Pointer to the structure for the kbase device * - * @h The register history to terminate - */ -void kbase_io_history_term(struct kbase_io_history *h); - -/* kbase_io_history_dump - print the register history to the kernel ring buffer + * Pointer to the priority control manager device is retrieved from the device + * tree and a reference is taken on the module implementing the callbacks for + * priority control manager operations. * - * @kbdev Pointer to kbase_device containing the register history to dump + * Return: 0 if successful, or an error code on failure */ -void kbase_io_history_dump(struct kbase_device *kbdev); +int kbase_device_pcm_dev_init(struct kbase_device *const kbdev); /** - * kbase_io_history_resize - resize the register access history buffer. + * kbase_device_pcm_dev_term() - Performs priority control manager device + * deinitialization. * - * @h: Pointer to a valid register history to resize - * @new_size: Number of accesses the buffer could hold + * @kbdev: Pointer to the structure for the kbase device * - * A successful resize will clear all recent register accesses. - * If resizing fails for any reason (e.g., could not allocate memory, invalid - * buffer size) then the original buffer will be kept intact. - * - * @return 0 if the buffer was resized, failure otherwise + * Reference is released on the module implementing the callbacks for priority + * control manager operations. */ -int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size); - -#else /* CONFIG_DEBUG_FS */ +void kbase_device_pcm_dev_term(struct kbase_device *const kbdev); -#define kbase_io_history_init(...) ((int)0) - -#define kbase_io_history_term CSTD_NOP - -#define kbase_io_history_dump CSTD_NOP - -#define kbase_io_history_resize CSTD_NOP +/** + * If a job is soft stopped and the number of contexts is >= this value + * it is reported as a disjoint event + */ +#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 -#endif /* CONFIG_DEBUG_FS */ +#if !defined(UINT64_MAX) + #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) +#endif /*meson graphics start */ extern int meson_gpu_data_invalid_count; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c index 2e2e394..027eb8c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -26,7 +25,7 @@ #include #include -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) #ifdef CONFIG_MALI_DEBUG static int kbase_as_fault_read(struct seq_file *sfile, void *data) @@ -80,7 +79,7 @@ static const struct file_operations as_fault_fops = { */ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev) { -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) #ifdef CONFIG_MALI_DEBUG uint i; char as_name[64]; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h index 496d8b1..919fbc1 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_AS_FAULT_DEBUG_FS_H @@ -39,7 +38,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev); static inline void kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no) { -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) #ifdef CONFIG_MALI_DEBUG kbdev->debugfs_as_read_bitmap |= (1ULL << as_no); #endif /* CONFIG_DEBUG_FS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h index 2c11093..a085fd8 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_bits.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,17 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. */ #ifndef _KBASE_BITS_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c index 27a03cf..af51ed8 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Cache Policy API. */ @@ -58,10 +55,11 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, { dma_sync_single_for_device(kbdev->dev, handle, size, dir); } - +KBASE_EXPORT_TEST_API(kbase_sync_single_for_device); void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); } +KBASE_EXPORT_TEST_API(kbase_sync_single_for_cpu); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h index 8a1e529..7da33a6 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2013, 2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Cache Policy API. */ @@ -30,7 +27,7 @@ #define _KBASE_CACHE_POLICY_H_ #include "mali_kbase.h" -#include "mali_base_kernel.h" +#include /** * kbase_cache_enabled - Choose the cache policy for a specific region diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h new file mode 100644 index 0000000..c232e21 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_caps.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Driver Capability Queries. + */ + +#ifndef _KBASE_CAPS_H_ +#define _KBASE_CAPS_H_ + +#include + +typedef enum mali_kbase_cap { + MALI_KBASE_CAP_SYSTEM_MONITOR = 0, + MALI_KBASE_CAP_JIT_PRESSURE_LIMIT, + MALI_KBASE_CAP_MEM_GROW_ON_GPF, + MALI_KBASE_CAP_MEM_PROTECTED, + MALI_KBASE_NUM_CAPS +} mali_kbase_cap; + +extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap); + +static inline bool mali_kbase_supports_system_monitor(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR); +} + +static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT); +} + +static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF); +} + +static inline bool mali_kbase_supports_mem_protected(unsigned long api_version) +{ + return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED); +} + +#endif /* __KBASE_CAPS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c new file mode 100644 index 0000000..6a1e7e4 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_ccswe.h" +#include "mali_kbase_linux.h" + +#include +#include + +static u64 kbasep_ccswe_cycle_at_no_lock( + struct kbase_ccswe *self, u64 timestamp_ns) +{ + s64 diff_s, diff_ns; + u32 gpu_freq; + + lockdep_assert_held(&self->access); + + diff_ns = timestamp_ns - self->timestamp_ns; + gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq; + + diff_s = div_s64(diff_ns, NSEC_PER_SEC); + diff_ns -= diff_s * NSEC_PER_SEC; + + return self->cycles_elapsed + diff_s * gpu_freq + + div_s64(diff_ns * gpu_freq, NSEC_PER_SEC); +} + +void kbase_ccswe_init(struct kbase_ccswe *self) +{ + memset(self, 0, sizeof(*self)); + + spin_lock_init(&self->access); +} + +u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns) +{ + unsigned long flags; + u64 result; + + spin_lock_irqsave(&self->access, flags); + result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns); + spin_unlock_irqrestore(&self->access, flags); + + return result; +} + +void kbase_ccswe_freq_change( + struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq) +{ + unsigned long flags; + + spin_lock_irqsave(&self->access, flags); + + /* The time must go only forward. */ + if (WARN_ON(timestamp_ns < self->timestamp_ns)) + goto exit; + + /* If this is the first frequency change, cycles_elapsed is zero. */ + if (self->timestamp_ns) + self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock( + self, timestamp_ns); + + self->timestamp_ns = timestamp_ns; + self->prev_gpu_freq = self->gpu_freq; + self->gpu_freq = gpu_freq; +exit: + spin_unlock_irqrestore(&self->access, flags); +} + +void kbase_ccswe_reset(struct kbase_ccswe *self) +{ + unsigned long flags; + + spin_lock_irqsave(&self->access, flags); + + self->timestamp_ns = 0; + self->cycles_elapsed = 0; + self->gpu_freq = 0; + self->prev_gpu_freq = 0; + + spin_unlock_irqrestore(&self->access, flags); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h new file mode 100644 index 0000000..8e55ffc --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ccswe.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_CCSWE_H_ +#define _KBASE_CCSWE_H_ + +#include + +/** + * struct kbase_ccswe - Cycle count software estimator. + * + * @access: Spinlock protecting this structure access. + * @timestamp_ns: Timestamp(ns) when the last frequency change + * occurred. + * @cycles_elapsed: Number of cycles elapsed before the last frequency + * change + * @gpu_freq: Current GPU frequency(Hz) value. + * @prev_gpu_freq: Previous GPU frequency(Hz) before the last frequency + * change. + */ +struct kbase_ccswe { + spinlock_t access; + u64 timestamp_ns; + u64 cycles_elapsed; + u32 gpu_freq; + u32 prev_gpu_freq; +}; + +/** + * kbase_ccswe_init() - initialize the cycle count estimator. + * + * @self: Cycles count software estimator instance. + */ +void kbase_ccswe_init(struct kbase_ccswe *self); + + +/** + * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp. + * + * @self: Cycles count software estimator instance. + * @timestamp_ns: The timestamp(ns) for cycle count estimation. + * + * The timestamp must be bigger than the timestamp of the penultimate + * frequency change. If only one frequency change occurred, the + * timestamp must be bigger than the timestamp of the frequency change. + * This is to allow the following code to be executed w/o synchronization. + * If lines below executed atomically, it is safe to assume that only + * one frequency change may happen in between. + * + * u64 ts = ktime_get_raw_ns(); + * u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts) + * + * Returns: estimated value of cycle count at a given time. + */ +u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns); + +/** + * kbase_ccswe_freq_change() - update GPU frequency. + * + * @self: Cycles count software estimator instance. + * @timestamp_ns: Timestamp(ns) when frequency change occurred. + * @gpu_freq: New GPU frequency value. + * + * The timestamp must be bigger than the timestamp of the previous + * frequency change. The function is to be called at the frequency + * change moment (not later). + */ +void kbase_ccswe_freq_change( + struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq); + +/** + * kbase_ccswe_reset() - reset estimator state + * + * @self: Cycles count software estimator instance. + */ +void kbase_ccswe_reset(struct kbase_ccswe *self); + +#endif /* _KBASE_CCSWE_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c index ce7070d..37dbca1 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include #include #include @@ -46,3 +43,62 @@ void kbasep_platform_device_term(struct kbase_device *kbdev) platform_funcs_p->platform_term_func(kbdev); } +int kbasep_platform_device_late_init(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_late_init_func) + platform_funcs_p->platform_late_init_func(kbdev); + + return 0; +} + +void kbasep_platform_device_late_term(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_late_term_func) + platform_funcs_p->platform_late_term_func(kbdev); +} + +#if !MALI_USE_CSF +int kbasep_platform_context_init(struct kbase_context *kctx) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_handler_context_init_func) + return platform_funcs_p->platform_handler_context_init_func(kctx); + + return 0; +} + +void kbasep_platform_context_term(struct kbase_context *kctx) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_handler_context_term_func) + platform_funcs_p->platform_handler_context_term_func(kctx); +} + +void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_handler_atom_submit_func) + platform_funcs_p->platform_handler_atom_submit_func(katom); +} + +void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_handler_atom_complete_func) + platform_funcs_p->platform_handler_atom_complete_func(katom); +} +#endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h index 69723ea..e7eb334 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_config.h - * Configuration API and Attributes for KBase + * DOC: Configuration API and Attributes for KBase */ #ifndef _KBASE_CONFIG_H_ @@ -32,37 +28,31 @@ #include #include -#include +#include #include -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_config Configuration API and Attributes - * @{ - */ - /* Forward declaration of struct kbase_device */ struct kbase_device; +#if !MALI_USE_CSF +/* Forward declaration of struct kbase_context */ +struct kbase_context; + +/* Forward declaration of struct kbase_atom */ +struct kbase_jd_atom; +#endif + /** - * kbase_platform_funcs_conf - Specifies platform init/term function pointers + * struct kbase_platform_funcs_conf - Specifies platform integration function + * pointers for DDK events such as device init and term. * * Specifies the functions pointers for platform specific initialization and - * termination. By default no functions are required. No additional platform - * specific control is necessary. + * termination as well as other events. By default no functions are required. + * No additional platform specific control is necessary. */ struct kbase_platform_funcs_conf { /** - * platform_init_func - platform specific init function pointer + * @platform_init_func: platform specific init function pointer * @kbdev - kbase_device pointer * * Returns 0 on success, negative error code otherwise. @@ -77,7 +67,7 @@ struct kbase_platform_funcs_conf { */ int (*platform_init_func)(struct kbase_device *kbdev); /** - * platform_term_func - platform specific termination function pointer + * @platform_term_func: platform specific termination function pointer * @kbdev - kbase_device pointer * * Function pointer for platform specific termination or NULL if no @@ -88,6 +78,84 @@ struct kbase_platform_funcs_conf { * can be accessed (and possibly terminated) in here. */ void (*platform_term_func)(struct kbase_device *kbdev); + + /** + * @platform_late_init_func: platform specific late init function pointer + * @kbdev - kbase_device pointer + * + * Function pointer to inform that the kbase driver initialization completed + * or NULL if no such function is required. At this point the GPU driver will be + * fully initialized. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly terminated) in here. + */ + int (*platform_late_init_func)(struct kbase_device *kbdev); + + /** + * @platform_late_term_func: platform specific late termination function pointer + * @kbdev - kbase_device pointer + * + * Function pointer for platform specific termination or NULL if no + * termination function is required. At this point the GPU driver will complete + * termination process + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly terminated) in here. + */ + void (*platform_late_term_func)(struct kbase_device *kbdev); + +#if !MALI_USE_CSF + /** + * @platform_handler_context_init_func: platform specific handler for + * when a new kbase_context is created. + * @kctx - kbase_context pointer + * + * Returns 0 on success, negative error code otherwise. + * + * Function pointer for platform specific initialization of a kernel + * context or NULL if not required. Called at the last stage of kernel + * context initialization. + */ + int (*platform_handler_context_init_func)(struct kbase_context *kctx); + /** + * @platform_handler_context_term_func: platform specific handler for + * when a kbase_context is terminated. + * @kctx - kbase_context pointer + * + * Function pointer for platform specific termination of a kernel + * context or NULL if not required. Called at the first stage of kernel + * context termination. + */ + void (*platform_handler_context_term_func)(struct kbase_context *kctx); + /** + * @platform_handler_atom_submit_func: platform specific handler for + * when a kbase_jd_atom is submitted. + * @katom - kbase_jd_atom pointer + * + * Function pointer for platform specific handling at the point when an + * atom is submitted to the GPU or set to NULL if not required. The + * function cannot assume that it is running in a process context. + * + * Context: The caller must hold the hwaccess_lock. Function must be + * runnable in an interrupt context. + */ + void (*platform_handler_atom_submit_func)(struct kbase_jd_atom *katom); + /** + * @platform_handler_atom_complete_func: platform specific handler for + * when a kbase_jd_atom completes. + * @katom - kbase_jd_atom pointer + * + * Function pointer for platform specific handling at the point when an + * atom stops running on the GPU or set to NULL if not required. The + * function cannot assume that it is running in a process context. + * + * Context: The caller must hold the hwaccess_lock. Function must be + * runnable in an interrupt context. + */ + void (*platform_handler_atom_complete_func)( + struct kbase_jd_atom *katom); +#endif }; /* @@ -223,7 +291,90 @@ struct kbase_pm_callback_conf { int (*soft_reset_callback)(struct kbase_device *kbdev); }; -#ifdef CONFIG_OF +/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier. + * + * Pointer to this structure is supposed to be passed to the gpu clock rate + * change notifier function. This structure is deliberately aligned with the + * common clock framework notification structure 'struct clk_notifier_data' + * and such alignment should be maintained. + * + * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered. + * @old_rate: Previous rate of this GPU clock in Hz. + * @new_rate: New rate of this GPU clock in Hz. + */ +struct kbase_gpu_clk_notifier_data { + void *gpu_clk_handle; + unsigned long old_rate; + unsigned long new_rate; +}; + +/** + * struct kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace + * operations. + * + * Specifies the functions pointers for platform specific GPU clock rate trace + * operations. By default no functions are required. + */ +struct kbase_clk_rate_trace_op_conf { + /** + * @enumerate_gpu_clk: Enumerate a GPU clock on the given index + * @kbdev - kbase_device pointer + * @index - GPU clock index + * + * Returns a handle unique to the given GPU clock, or NULL if the clock + * array has been exhausted at the given index value. + * + * Kbase will use this function pointer to enumerate the existence of a + * GPU clock on the given index. + */ + void *(*enumerate_gpu_clk)(struct kbase_device *kbdev, + unsigned int index); + + /** + * @get_gpu_clk_rate: Get the current rate for an enumerated clock. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * + * Returns current rate of the GPU clock in unit of Hz. + */ + unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev, + void *gpu_clk_handle); + + /** + * @gpu_clk_notifier_register: Register a clock rate change notifier. + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function + * pointer + * + * Returns 0 on success, negative error code otherwise. + * + * This function pointer is used to register a callback function that + * is supposed to be invoked whenever the rate of clock corresponding + * to @gpu_clk_handle changes. + * @nb contains the pointer to callback function. + * The callback function expects the pointer of type + * 'struct kbase_gpu_clk_notifier_data' as the third argument. + */ + int (*gpu_clk_notifier_register)(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb); + + /** + * @gpu_clk_notifier_unregister: Unregister clock rate change notifier + * @kbdev - kbase_device pointer + * @gpu_clk_handle - Handle unique to the enumerated GPU clock + * @nb - notifier block containing the callback function + * pointer + * + * This function pointer is used to unregister a callback function that + * was previously registered to get notified of the change in rate + * of clock corresponding to @gpu_clk_handle. + */ + void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb); +}; + +#if IS_ENABLED(CONFIG_OF) struct kbase_platform_config { }; #else @@ -253,7 +404,7 @@ struct kbase_platform_config { #endif /* CONFIG_OF */ /** - * @brief Gets the pointer to platform config. + * kbase_get_platform_config - Gets the pointer to platform config. * * @return Pointer to the platform config */ @@ -284,6 +435,83 @@ int kbasep_platform_device_init(struct kbase_device *kbdev); */ void kbasep_platform_device_term(struct kbase_device *kbdev); +/** + * kbasep_platform_device_late_init: - Platform specific call to finish hardware + * initialization + * @kbdev: kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can initialize any hardware and context state that + * is required for the GPU block to function. + * + * Return: 0 if no errors have been found in the config. + * Negative error code otherwise. + */ +int kbasep_platform_device_late_init(struct kbase_device *kbdev); + +/** + * kbasep_platform_device_late_term - Platform specific call to finish hardware + * termination + * @kbdev: Kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can destroy any platform specific context state and + * shut down any hardware functionality that are outside of the Power Management + * callbacks. + * + */ +void kbasep_platform_device_late_term(struct kbase_device *kbdev); + +#if !MALI_USE_CSF +/** + * kbasep_platform_context_init - Platform specific callback when a kernel + * context is created + * @kctx: kbase_context pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can initialize any per kernel context structures + * that are required for the GPU block to function. + * + * Return: 0 if no errors were encountered. Negative error code otherwise. + */ +int kbasep_platform_context_init(struct kbase_context *kctx); + +/** + * kbasep_platform_context_term - Platform specific callback when a kernel + * context is terminated + * @kctx: kbase_context pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine should terminate any per kernel context structures + * created as part of &kbasep_platform_context_init. + * + */ +void kbasep_platform_context_term(struct kbase_context *kctx); + +/** + * kbasep_platform_event_atom_submit - Platform specific callback when an atom + * is submitted to the GPU + * @katom: kbase_jd_atom pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine should not assume that it is in a process context. + * + * Return: 0 if no errors were encountered. Negative error code otherwise. + */ +void kbasep_platform_event_atom_submit(struct kbase_jd_atom *katom); + +/** + * kbasep_platform_event_atom_complete - Platform specific callback when an atom + * has stopped running on the GPU + * @katom: kbase_jd_atom pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine should not assume that it is in a process context. + * + */ +void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom); +#endif + #ifndef CONFIG_OF /** * kbase_platform_register - Register a platform device for the GPU @@ -304,8 +532,4 @@ int kbase_platform_register(void); void kbase_platform_unregister(void); #endif - /** @} *//* end group kbase_config */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - #endif /* _KBASE_CONFIG_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index e079281..63c36e2 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,14 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_config_defaults.h - * - * Default values for configuration settings + * DOC: Default values for configuration settings * */ @@ -88,29 +85,38 @@ enum { }; /** - * Default period for DVFS sampling + * Default period for DVFS sampling (can be overridden by platform header) */ +#ifndef DEFAULT_PM_DVFS_PERIOD #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ +#endif /** * Power Management poweroff tick granuality. This is in nanoseconds to - * allow HR timer support. + * allow HR timer support (can be overridden by platform header). * * On each scheduling tick, the power manager core may decide to: * -# Power off one or more shader cores * -# Power off the entire GPU */ +#ifndef DEFAULT_PM_GPU_POWEROFF_TICK_NS #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ +#endif /** * Power Manager number of ticks before shader cores are powered off + * (can be overridden by platform header). */ +#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ +#endif /** - * Default scheduling tick granuality + * Default scheduling tick granuality (can be overridden by platform header) */ +#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS #define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ +#endif /** * Default minimum number of scheduling ticks before jobs are soft-stopped. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index fd5b8f0..ce84219 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -28,37 +27,39 @@ #ifdef CONFIG_MALI_DEVFREQ #include #include -#ifdef CONFIG_DEVFREQ_THERMAL +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) #include #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_DEVFREQ */ -#ifdef CONFIG_MALI_NO_MALI -#include "mali_kbase_model_linux.h" -#include -#endif /* CONFIG_MALI_NO_MALI */ #include "mali_kbase_mem_profile_debugfs_buf_size.h" -#include "mali_kbase_debug_mem_view.h" #include "mali_kbase_mem.h" #include "mali_kbase_mem_pool_debugfs.h" +#include "mali_kbase_mem_pool_group.h" #include "mali_kbase_debugfs_helper.h" -#if !MALI_CUSTOMER_RELEASE -#include "mali_kbase_regs_dump_debugfs.h" -#endif /* !MALI_CUSTOMER_RELEASE */ #include "mali_kbase_regs_history_debugfs.h" #include #include +#if !MALI_USE_CSF #include -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#endif /* !MALI_USE_CSF */ +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS #include #endif -#include #include -#include -#include "mali_kbase_ioctl.h" +#include +#if !MALI_USE_CSF +#include "mali_kbase_kinstr_jm.h" +#endif #include "mali_kbase_hwcnt_context.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_legacy.h" #include "mali_kbase_vinstr.h" +#if MALI_USE_CSF +#include "csf/mali_kbase_csf_firmware.h" +#include "csf/mali_kbase_csf_tiler_heap.h" +#include "csf/mali_kbase_csf_csg_debugfs.h" +#include "csf/mali_kbase_csf_cpu_queue_debugfs.h" +#endif #ifdef CONFIG_MALI_ARBITER_SUPPORT #include "arbiter/mali_kbase_arbiter_pm.h" #endif @@ -68,7 +69,8 @@ #ifdef CONFIG_MALI_CINSTR_GWT #include "mali_kbase_gwt.h" #endif -#include "mali_kbase_pm_internal.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "mali_kbase_dvfs_debugfs.h" #include #include @@ -99,13 +101,7 @@ #include - -#if (KERNEL_VERSION(3, 13, 0) <= LINUX_VERSION_CODE) #include -#else -#include -#endif - #include #include @@ -114,6 +110,8 @@ #include #include +#include + /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 #define MMU_IRQ_TAG 1 @@ -121,6 +119,72 @@ #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" +/** + * KBASE_API_VERSION - KBase API Version + * @major: Kernel major version + * @minor: Kernel minor version + */ +#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ + (((minor) & 0xFFF) << 8) | \ + ((0 & 0xFF) << 0)) + +#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF) +#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF) + +/** + * typedef mali_kbase_capability_def - kbase capabilities table + */ +typedef struct mali_kbase_capability_def { + u16 required_major; + u16 required_minor; +} mali_kbase_capability_def; + +/* + * This must be kept in-sync with mali_kbase_cap + * + * TODO: The alternative approach would be to embed the cap enum values + * in the table. Less efficient but potentially safer. + */ +static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = { +#if MALI_USE_CSF + { 1, 0 }, /* SYSTEM_MONITOR */ + { 1, 0 }, /* JIT_PRESSURE_LIMIT */ + { 1, 0 }, /* MEM_GROW_ON_GPF */ + { 1, 0 } /* MEM_PROTECTED */ +#else + { 11, 15 }, /* SYSTEM_MONITOR */ + { 11, 25 }, /* JIT_PRESSURE_LIMIT */ + { 11, 2 }, /* MEM_GROW_ON_GPF */ + { 11, 2 } /* MEM_PROTECTED */ +#endif +}; + +/** + * mali_kbase_supports_cap - Query whether a kbase capability is supported + * + * @api_version: API version to convert + * @cap: Capability to query for - see mali_kbase_caps.h + */ +bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap) +{ + bool supported = false; + unsigned long required_ver; + + mali_kbase_capability_def const *cap_def; + + if (WARN_ON(cap < 0)) + return false; + + if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS)) + return false; + + cap_def = &kbase_caps_table[(int)cap]; + required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor); + supported = (api_version >= required_ver); + + return supported; +} + /** * kbase_file_new - Create an object representing a device file * @@ -152,7 +216,7 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, } /** - * kbase_file_get_api_version - Set the application programmer interface version + * kbase_file_set_api_version - Set the application programmer interface version * * @kfile: A device file created by kbase_file_new() * @major: Major version number (must not exceed 12 bits) @@ -271,7 +335,7 @@ static void kbase_file_delete(struct kbase_file *const kfile) if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { struct kbase_context *kctx = kfile->kctx; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) kbasep_mem_profile_debugfs_remove(kctx); #endif @@ -326,31 +390,18 @@ static int kbase_api_handshake(struct kbase_file *kfile, * the flags have been set. Originally it was created on file open * (with job submission disabled) but we don't support that usage. */ - if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15)) + if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile))) err = kbase_file_create_kctx(kfile, BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED); return err; } -/** - * enum mali_error - Mali error codes shared with userspace - * - * This is subset of those common Mali errors that can be returned to userspace. - * Values of matching user and kernel space enumerators MUST be the same. - * MALI_ERROR_NONE is guaranteed to be 0. - * - * @MALI_ERROR_NONE: Success - * @MALI_ERROR_OUT_OF_GPU_MEMORY: Not used in the kernel driver - * @MALI_ERROR_OUT_OF_MEMORY: Memory allocation failure - * @MALI_ERROR_FUNCTION_FAILED: Generic error code - */ -enum mali_error { - MALI_ERROR_NONE = 0, - MALI_ERROR_OUT_OF_GPU_MEMORY, - MALI_ERROR_OUT_OF_MEMORY, - MALI_ERROR_FUNCTION_FAILED, -}; +static int kbase_api_handshake_dummy(struct kbase_file *kfile, + struct kbase_ioctl_version_check *version) +{ + return -EPERM; +} static struct kbase_device *to_kbase_device(struct device *dev) { @@ -377,7 +428,7 @@ int assign_irqs(struct kbase_device *kbdev) return -ENOENT; } -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) if (!strncasecmp(irq_res->name, "JOB", 4)) { irqtag = JOB_IRQ_TAG; } else if (!strncasecmp(irq_res->name, "MMU", 4)) { @@ -428,10 +479,10 @@ void kbase_release_device(struct kbase_device *kbdev) } EXPORT_SYMBOL(kbase_release_device); -#ifdef CONFIG_DEBUG_FS -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \ - !(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)) +#if IS_ENABLED(CONFIG_DEBUG_FS) +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && \ + !(KERNEL_VERSION(4, 4, 28) <= LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE) /* * Older versions, before v4.6, of the kernel doesn't have * kstrtobool_from_user(), except longterm 4.4.y which had it added in 4.4.28 @@ -545,7 +596,7 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, { struct kbase_device *kbdev = NULL; struct kbase_context *kctx = NULL; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) char kctx_name[64]; #endif @@ -576,8 +627,11 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, if (kbdev->infinite_cache_active_default) kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + + mutex_init(&kctx->mem_profile_lock); + kctx->kctx_dentry = debugfs_create_dir(kctx_name, kbdev->debugfs_ctx_directory); @@ -598,8 +652,6 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, debugfs_create_file("force_same_va", 0600, kctx->kctx_dentry, kctx, &kbase_force_same_va_fops); - mutex_init(&kctx->mem_profile_lock); - kbase_context_debugfs_init(kctx); } #endif /* CONFIG_DEBUG_FS */ @@ -622,6 +674,13 @@ static int kbase_open(struct inode *inode, struct file *filp) if (!kbdev) return -ENODEV; + /* Device-wide firmware load is moved here from probing to comply with + * Android GKI vendor guideline. + */ + ret = kbase_device_firmware_init_once(kbdev); + if (ret) + goto out; + kfile = kbase_file_new(kbdev, filp); if (!kfile) { ret = -ENOMEM; @@ -633,7 +692,7 @@ static int kbase_open(struct inode *inode, struct file *filp) return 0; - out: +out: kbase_release_device(kbdev); return ret; } @@ -661,11 +720,13 @@ static int kbase_api_set_flags(struct kbase_file *kfile, /* For backward compatibility, the context may have been created before * the flags were set. */ - if (api_version >= KBASE_API_VERSION(11, 15)) { + if (mali_kbase_supports_system_monitor(api_version)) { err = kbase_file_create_kctx(kfile, flags->create_flags); } else { +#if !MALI_USE_CSF struct kbasep_js_kctx_info *js_kctx_info = NULL; unsigned long irq_flags = 0; +#endif /* If setup is incomplete (e.g. because the API version * wasn't set) then we have to give up. @@ -674,6 +735,12 @@ static int kbase_api_set_flags(struct kbase_file *kfile, if (unlikely(!kctx)) return -EPERM; +#if MALI_USE_CSF + /* On CSF GPUs Job Manager interface isn't used to submit jobs + * (there are no job slots). So the legacy job manager path to + * submit jobs needs to remain disabled for CSF GPUs. + */ +#else js_kctx_info = &kctx->jctx.sched_info; mutex_lock(&js_kctx_info->ctx.jsctx_mutex); spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags); @@ -685,11 +752,13 @@ static int kbase_api_set_flags(struct kbase_file *kfile, spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); +#endif } return err; } +#if !MALI_USE_CSF static int kbase_api_job_submit(struct kbase_context *kctx, struct kbase_ioctl_job_submit *submit) { @@ -697,6 +766,7 @@ static int kbase_api_job_submit(struct kbase_context *kctx, submit->nr_atoms, submit->stride, false); } +#endif /* !MALI_USE_CSF */ static int kbase_api_get_gpuprops(struct kbase_context *kctx, struct kbase_ioctl_get_gpuprops *get_props) @@ -722,11 +792,13 @@ static int kbase_api_get_gpuprops(struct kbase_context *kctx, return kprops->prop_buffer_size; } +#if !MALI_USE_CSF static int kbase_api_post_term(struct kbase_context *kctx) { kbase_event_close(kctx); return 0; } +#endif /* !MALI_USE_CSF */ static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc) @@ -760,11 +832,23 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx, flags |= BASE_MEM_SAME_VA; } +#if MALI_USE_CSF + /* If CSF event memory allocation, need to force certain flags. + * SAME_VA - GPU address needs to be used as a CPU address, explicit + * mmap has to be avoided. + * CACHED_CPU - Frequent access to the event memory by CPU. + * COHERENT_SYSTEM - No explicit cache maintenance around the access + * to event memory so need to leverage the coherency support. + */ + if (flags & BASE_MEM_CSF_EVENT) { + flags |= (BASE_MEM_SAME_VA | + BASE_MEM_CACHED_CPU | + BASE_MEM_COHERENT_SYSTEM); + } +#endif - reg = kbase_mem_alloc(kctx, alloc->in.va_pages, - alloc->in.commit_pages, - alloc->in.extent, - &flags, &gpu_va); + reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, + alloc->in.extension, &flags, &gpu_va); if (!reg) return -ENOMEM; @@ -788,6 +872,14 @@ static int kbase_api_mem_free(struct kbase_context *kctx, return kbase_mem_free(kctx, free->gpu_addr); } +#if !MALI_USE_CSF +static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx, + union kbase_kinstr_jm_fd *arg) +{ + return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg); +} +#endif + static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx, struct kbase_ioctl_hwcnt_reader_setup *setup) { @@ -881,17 +973,6 @@ static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx, return 0; } -#ifdef CONFIG_MALI_NO_MALI -static int kbase_api_hwcnt_set(struct kbase_context *kctx, - struct kbase_ioctl_hwcnt_values *values) -{ - gpu_model_set_dummy_prfcnt_sample( - (u32 __user *)(uintptr_t)values->data, - values->size); - - return 0; -} -#endif static int kbase_api_disjoint_query(struct kbase_context *kctx, struct kbase_ioctl_disjoint_query *query) @@ -1056,10 +1137,7 @@ static int kbase_api_mem_alias(struct kbase_context *kctx, u64 flags; int err; - if (alias->in.nents == 0 || alias->in.nents > 2048) - return -EINVAL; - - if (alias->in.stride > (U64_MAX / 2048)) + if (alias->in.nents == 0 || alias->in.nents > BASE_MEM_ALIAS_MAX_ENTS) return -EINVAL; ai = vmalloc(sizeof(*ai) * alias->in.nents); @@ -1183,6 +1261,7 @@ static int kbase_api_mem_profile_add(struct kbase_context *kctx, return kbasep_mem_profile_debugfs_insert(kctx, buf, data->len); } +#if !MALI_USE_CSF static int kbase_api_soft_event_update(struct kbase_context *kctx, struct kbase_ioctl_soft_event_update *update) { @@ -1191,6 +1270,7 @@ static int kbase_api_soft_event_update(struct kbase_context *kctx, return kbase_soft_event_update(kctx, update->event, update->new_status); } +#endif /* !MALI_USE_CSF */ static int kbase_api_sticky_resource_map(struct kbase_context *kctx, struct kbase_ioctl_sticky_resource_map *map) @@ -1261,18 +1341,6 @@ static int kbase_api_sticky_resource_unmap(struct kbase_context *kctx, } #if MALI_UNIT_TEST -static int kbase_api_tlstream_test(struct kbase_context *kctx, - struct kbase_ioctl_tlstream_test *test) -{ - kbase_timeline_test( - kctx->kbdev, - test->tpw_count, - test->msg_delay, - test->msg_count, - test->aux_msg); - - return 0; -} static int kbase_api_tlstream_stats(struct kbase_context *kctx, struct kbase_ioctl_tlstream_stats *stats) @@ -1285,55 +1353,266 @@ static int kbase_api_tlstream_stats(struct kbase_context *kctx, } #endif /* MALI_UNIT_TEST */ +#if MALI_USE_CSF +static int kbasep_cs_event_signal(struct kbase_context *kctx) +{ + kbase_csf_event_signal_notify_gpu(kctx); + return 0; +} + +static int kbasep_cs_queue_register(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register *reg) +{ + kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; + + return kbase_csf_queue_register(kctx, reg); +} + +static int kbasep_cs_queue_register_ex(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_register_ex *reg) +{ + kctx->jit_group_id = BASE_MEM_GROUP_DEFAULT; + + return kbase_csf_queue_register_ex(kctx, reg); +} + +static int kbasep_cs_queue_terminate(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_terminate *term) +{ + kbase_csf_queue_terminate(kctx, term); + + return 0; +} + +static int kbasep_cs_queue_bind(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_bind *bind) +{ + return kbase_csf_queue_bind(kctx, bind); +} + +static int kbasep_cs_queue_kick(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_kick *kick) +{ + return kbase_csf_queue_kick(kctx, kick); +} + +static int kbasep_cs_queue_group_create(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create *create) +{ + return kbase_csf_queue_group_create(kctx, create); +} + +static int kbasep_cs_queue_group_terminate(struct kbase_context *kctx, + struct kbase_ioctl_cs_queue_group_term *term) +{ + kbase_csf_queue_group_terminate(kctx, term->group_handle); + + return 0; +} + +static int kbasep_kcpu_queue_new(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_new *new) +{ + return kbase_csf_kcpu_queue_new(kctx, new); +} + +static int kbasep_kcpu_queue_delete(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_delete *delete) +{ + return kbase_csf_kcpu_queue_delete(kctx, delete); +} + +static int kbasep_kcpu_queue_enqueue(struct kbase_context *kctx, + struct kbase_ioctl_kcpu_queue_enqueue *enqueue) +{ + return kbase_csf_kcpu_queue_enqueue(kctx, enqueue); +} + +static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx, + union kbase_ioctl_cs_tiler_heap_init *heap_init) +{ + kctx->jit_group_id = heap_init->in.group_id; + + return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size, + heap_init->in.initial_chunks, heap_init->in.max_chunks, + heap_init->in.target_in_flight, + &heap_init->out.gpu_heap_va, &heap_init->out.first_chunk_va); +} + +static int kbasep_cs_tiler_heap_term(struct kbase_context *kctx, + struct kbase_ioctl_cs_tiler_heap_term *heap_term) +{ + return kbase_csf_tiler_heap_term(kctx, heap_term->gpu_heap_va); +} + +static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx, + union kbase_ioctl_cs_get_glb_iface *param) +{ + struct basep_cs_stream_control *stream_data = NULL; + struct basep_cs_group_control *group_data = NULL; + void __user *user_groups, *user_streams; + int err = 0; + u32 const max_group_num = param->in.max_group_num; + u32 const max_total_stream_num = param->in.max_total_stream_num; + + if (max_group_num > MAX_SUPPORTED_CSGS) + return -EINVAL; + + if (max_total_stream_num > + MAX_SUPPORTED_CSGS * MAX_SUPPORTED_STREAMS_PER_GROUP) + return -EINVAL; + + user_groups = u64_to_user_ptr(param->in.groups_ptr); + user_streams = u64_to_user_ptr(param->in.streams_ptr); + + if (max_group_num > 0) { + if (!user_groups) + err = -EINVAL; + else { + group_data = kcalloc(max_group_num, + sizeof(*group_data), GFP_KERNEL); + if (!group_data) + err = -ENOMEM; + } + } + + if (max_total_stream_num > 0) { + if (!user_streams) + err = -EINVAL; + else { + stream_data = kcalloc(max_total_stream_num, + sizeof(*stream_data), GFP_KERNEL); + if (!stream_data) + err = -ENOMEM; + } + } + + if (!err) { + param->out.total_stream_num = kbase_csf_firmware_get_glb_iface( + kctx->kbdev, group_data, max_group_num, stream_data, + max_total_stream_num, ¶m->out.glb_version, + ¶m->out.features, ¶m->out.group_num, + ¶m->out.prfcnt_size, ¶m->out.instr_features); + + if (copy_to_user(user_groups, group_data, + MIN(max_group_num, param->out.group_num) * + sizeof(*group_data))) + err = -EFAULT; + } + + if (!err) + if (copy_to_user(user_streams, stream_data, + MIN(max_total_stream_num, param->out.total_stream_num) * + sizeof(*stream_data))) + err = -EFAULT; + + kfree(group_data); + kfree(stream_data); + return err; +} + +static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx, + struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info) +{ + return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer, + cpu_queue_info->size); +} + +#endif /* MALI_USE_CSF */ + +static int kbasep_ioctl_context_priority_check(struct kbase_context *kctx, + struct kbase_ioctl_context_priority_check *priority_check) +{ +#if MALI_USE_CSF + priority_check->priority = kbase_csf_priority_check(kctx->kbdev, priority_check->priority); +#else + base_jd_prio req_priority = (base_jd_prio)priority_check->priority; + + priority_check->priority = (u8)kbase_js_priority_check(kctx->kbdev, req_priority); +#endif + return 0; +} -#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ - do { \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ - return function(arg); \ +#define KBASE_HANDLE_IOCTL(cmd, function, arg) \ + do { \ + int ret; \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + ret = function(arg); \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ + #function); \ + return ret; \ } while (0) -#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ - do { \ - type param; \ - int err; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - return function(arg, ¶m); \ +#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + ret = function(arg, ¶m); \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ + #function); \ + return ret; \ } while (0) -#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - memset(¶m, 0, sizeof(param)); \ - ret = function(arg, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - return ret; \ +#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + memset(¶m, 0, sizeof(param)); \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ + #function); \ + return ret; \ } while (0) -#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ - do { \ - type param; \ - int ret, err; \ - BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \ - BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ - err = copy_from_user(¶m, uarg, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - ret = function(arg, ¶m); \ - err = copy_to_user(uarg, ¶m, sizeof(param)); \ - if (err) \ - return -EFAULT; \ - return ret; \ +#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg) \ + do { \ + type param; \ + int ret, err; \ + dev_dbg(arg->kbdev->dev, "Enter ioctl %s\n", #function); \ + BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE | _IOC_READ)); \ + BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd)); \ + err = copy_from_user(¶m, uarg, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + ret = function(arg, ¶m); \ + err = copy_to_user(uarg, ¶m, sizeof(param)); \ + if (err) \ + return -EFAULT; \ + dev_dbg(arg->kbdev->dev, "Return %d from ioctl %s\n", ret, \ + #function); \ + return ret; \ } while (0) +static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, + struct kbase_ioctl_set_limited_core_count *set_limited_core_count) +{ + const u64 shader_core_mask = + kbase_pm_get_present_cores(kctx->kbdev, KBASE_PM_CORE_SHADER); + const u64 limited_core_mask = + ((u64)1 << (set_limited_core_count->max_core_count)) - 1; + + if ((shader_core_mask & limited_core_mask) == 0) { + /* At least one shader core must be available after applying the mask */ + return -EINVAL; + } + + kctx->limited_core_mask = limited_core_mask; + return 0; +} + static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct kbase_file *const kfile = filp->private_data; @@ -1350,6 +1629,13 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kfile); break; + case KBASE_IOCTL_VERSION_CHECK_RESERVED: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK_RESERVED, + kbase_api_handshake_dummy, + struct kbase_ioctl_version_check, + kfile); + break; + case KBASE_IOCTL_SET_FLAGS: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS, kbase_api_set_flags, @@ -1364,23 +1650,27 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Normal ioctls */ switch (cmd) { +#if !MALI_USE_CSF case KBASE_IOCTL_JOB_SUBMIT: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT, kbase_api_job_submit, struct kbase_ioctl_job_submit, kctx); break; +#endif /* !MALI_USE_CSF */ case KBASE_IOCTL_GET_GPUPROPS: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, kbase_api_get_gpuprops, struct kbase_ioctl_get_gpuprops, kctx); break; +#if !MALI_USE_CSF case KBASE_IOCTL_POST_TERM: KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM, kbase_api_post_term, kctx); break; +#endif /* !MALI_USE_CSF */ case KBASE_IOCTL_MEM_ALLOC: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC, kbase_api_mem_alloc, @@ -1513,12 +1803,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kctx); break; +#if !MALI_USE_CSF case KBASE_IOCTL_SOFT_EVENT_UPDATE: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE, kbase_api_soft_event_update, struct kbase_ioctl_soft_event_update, kctx); break; +#endif /* !MALI_USE_CSF */ case KBASE_IOCTL_STICKY_RESOURCE_MAP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP, @@ -1534,6 +1826,14 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; /* Instrumentation. */ +#if !MALI_USE_CSF + case KBASE_IOCTL_KINSTR_JM_FD: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD, + kbase_api_kinstr_jm_fd, + union kbase_kinstr_jm_fd, + kctx); + break; +#endif case KBASE_IOCTL_HWCNT_READER_SETUP: KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP, kbase_api_hwcnt_reader_setup, @@ -1562,14 +1862,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) union kbase_ioctl_get_cpu_gpu_timeinfo, kctx); break; -#ifdef CONFIG_MALI_NO_MALI - case KBASE_IOCTL_HWCNT_SET: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET, - kbase_api_hwcnt_set, - struct kbase_ioctl_hwcnt_values, - kctx); - break; -#endif #ifdef CONFIG_MALI_CINSTR_GWT case KBASE_IOCTL_CINSTR_GWT_START: KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START, @@ -1588,13 +1880,98 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kctx); break; #endif -#if MALI_UNIT_TEST - case KBASE_IOCTL_TLSTREAM_TEST: - KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST, - kbase_api_tlstream_test, - struct kbase_ioctl_tlstream_test, +#if MALI_USE_CSF + case KBASE_IOCTL_CS_EVENT_SIGNAL: + KBASE_HANDLE_IOCTL(KBASE_IOCTL_CS_EVENT_SIGNAL, + kbasep_cs_event_signal, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_REGISTER: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER, + kbasep_cs_queue_register, + struct kbase_ioctl_cs_queue_register, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_REGISTER_EX: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_REGISTER_EX, + kbasep_cs_queue_register_ex, + struct kbase_ioctl_cs_queue_register_ex, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_TERMINATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_TERMINATE, + kbasep_cs_queue_terminate, + struct kbase_ioctl_cs_queue_terminate, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_BIND: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_BIND, + kbasep_cs_queue_bind, + union kbase_ioctl_cs_queue_bind, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_KICK: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_KICK, + kbasep_cs_queue_kick, + struct kbase_ioctl_cs_queue_kick, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, + kbasep_cs_queue_group_create, + union kbase_ioctl_cs_queue_group_create, + kctx); + break; + case KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE, + kbasep_cs_queue_group_terminate, + struct kbase_ioctl_cs_queue_group_term, + kctx); + break; + case KBASE_IOCTL_KCPU_QUEUE_CREATE: + KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_KCPU_QUEUE_CREATE, + kbasep_kcpu_queue_new, + struct kbase_ioctl_kcpu_queue_new, + kctx); + break; + case KBASE_IOCTL_KCPU_QUEUE_DELETE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_DELETE, + kbasep_kcpu_queue_delete, + struct kbase_ioctl_kcpu_queue_delete, + kctx); + break; + case KBASE_IOCTL_KCPU_QUEUE_ENQUEUE: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, + kbasep_kcpu_queue_enqueue, + struct kbase_ioctl_kcpu_queue_enqueue, + kctx); + break; + case KBASE_IOCTL_CS_TILER_HEAP_INIT: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, + kbasep_cs_tiler_heap_init, + union kbase_ioctl_cs_tiler_heap_init, + kctx); + break; + case KBASE_IOCTL_CS_TILER_HEAP_TERM: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_TILER_HEAP_TERM, + kbasep_cs_tiler_heap_term, + struct kbase_ioctl_cs_tiler_heap_term, + kctx); + break; + case KBASE_IOCTL_CS_GET_GLB_IFACE: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_GET_GLB_IFACE, + kbase_ioctl_cs_get_glb_iface, + union kbase_ioctl_cs_get_glb_iface, + kctx); + break; + case KBASE_IOCTL_CS_CPU_QUEUE_DUMP: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP, + kbasep_ioctl_cs_cpu_queue_dump, + struct kbase_ioctl_cs_cpu_queue_info, kctx); break; +#endif /* MALI_USE_CSF */ +#if MALI_UNIT_TEST case KBASE_IOCTL_TLSTREAM_STATS: KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS, kbase_api_tlstream_stats, @@ -1602,6 +1979,18 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kctx); break; #endif /* MALI_UNIT_TEST */ + case KBASE_IOCTL_CONTEXT_PRIORITY_CHECK: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CONTEXT_PRIORITY_CHECK, + kbasep_ioctl_context_priority_check, + struct kbase_ioctl_context_priority_check, + kctx); + break; + case KBASE_IOCTL_SET_LIMITED_CORE_COUNT: + KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_LIMITED_CORE_COUNT, + kbasep_ioctl_set_limited_core_count, + struct kbase_ioctl_set_limited_core_count, + kctx); + break; } dev_warn(kbdev->dev, "Unknown ioctl 0x%x nr:%d", cmd, _IOC_NR(cmd)); @@ -1609,6 +1998,51 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } +#if MALI_USE_CSF +static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ + struct kbase_file *const kfile = filp->private_data; + struct kbase_context *const kctx = + kbase_file_get_kctx_if_setup_complete(kfile); + struct base_csf_notification event_data = { + .type = BASE_CSF_NOTIFICATION_EVENT }; + const size_t data_size = sizeof(event_data); + bool read_event = false, read_error = false; + + if (unlikely(!kctx)) + return -EPERM; + + if (atomic_read(&kctx->event_count)) + read_event = true; + else + read_error = kbase_csf_read_error(kctx, &event_data); + + if (!read_event && !read_error) { + bool dump = kbase_csf_cpu_queue_read_dump_req(kctx, + &event_data); + /* This condition is not treated as an error. + * It is possible that event handling thread was woken up due + * to a fault/error that occurred for a queue group, but before + * the corresponding fault data was read by the thread the + * queue group was already terminated by the userspace. + */ + if (!dump) + dev_dbg(kctx->kbdev->dev, + "Neither event nor error signaled"); + } + + if (copy_to_user(buf, &event_data, data_size) != 0) { + dev_warn(kctx->kbdev->dev, + "Failed to copy data\n"); + return -EFAULT; + } + + if (read_event) + atomic_set(&kctx->event_count, 0); + + return data_size; +} +#else /* MALI_USE_CSF */ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; @@ -1652,6 +2086,7 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof out: return out_count * sizeof(uevent); } +#endif /* MALI_USE_CSF */ static unsigned int kbase_poll(struct file *filp, poll_table *wait) { @@ -1672,19 +2107,31 @@ static unsigned int kbase_poll(struct file *filp, poll_table *wait) void kbase_event_wakeup(struct kbase_context *kctx) { KBASE_DEBUG_ASSERT(kctx); - + dev_dbg(kctx->kbdev->dev, "Waking event queue for context %pK\n", + (void *)kctx); wake_up_interruptible(&kctx->event_queue); } KBASE_EXPORT_TEST_API(kbase_event_wakeup); +#if MALI_USE_CSF +int kbase_event_pending(struct kbase_context *ctx) +{ + WARN_ON_ONCE(!ctx); + + return (atomic_read(&ctx->event_count) != 0) || + kbase_csf_error_pending(ctx) || + kbase_csf_cpu_queue_dump_needed(ctx); +} +#else int kbase_event_pending(struct kbase_context *ctx) { KBASE_DEBUG_ASSERT(ctx); return (atomic_read(&ctx->event_count) != 0) || - (atomic_read(&ctx->event_closed) != 0); + (atomic_read(&ctx->event_closed) != 0); } +#endif KBASE_EXPORT_TEST_API(kbase_event_pending); @@ -1738,36 +2185,160 @@ static const struct file_operations kbase_fops = { .get_unmapped_area = kbase_get_unmapped_area, }; -/** - * show_policy - Show callback for the power_policy sysfs file. - * - * This function is called to get the contents of the power_policy sysfs - * file. This is a list of the available policies with the currently active one - * surrounded by square brackets. - * - * @dev: The device this sysfs file is for - * @attr: The attributes of the sysfs file - * @buf: The output buffer for the sysfs file contents - * - * Return: The number of bytes output to @buf. - */ -static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +static ssize_t show_gpu_memory(struct device *dev, struct device_attribute *attr, char * const buf) { struct kbase_device *kbdev; - const struct kbase_pm_policy *current_policy; - const struct kbase_pm_policy *const *policy_list; - int policy_count; - int i; ssize_t ret = 0; + struct list_head *entry; + const struct list_head *kbdev_list; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - current_policy = kbase_pm_get_policy(kbdev); - - policy_count = kbase_pm_list_policies(kbdev, &policy_list); + kbdev_list = kbase_device_get_list(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%-16s %-16s %10u\n", + kbdev->devname, + "total used_pages", + atomic_read(&(kbdev->memdev.used_pages))); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "----------------------------------------------------\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%-16s %-16s %-16s\n", + "kctx", "pid", "used_pages"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "----------------------------------------------------\n"); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory usage and cap for each kctx + * opened on this device */ + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%p %10u %10u\n", + kctx, + kctx->tgid, + atomic_read(&(kctx->used_pages))); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + + kbase_device_put_list(kbdev_list); + + + return ret; +} + +static ssize_t set_gpu_memory(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + ssize_t err = count; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return err; +} + +static DEVICE_ATTR(gpu_memory, S_IRUGO | S_IWUSR, show_gpu_memory, set_gpu_memory); + +static ssize_t show_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct list_head *entry; + const struct list_head *kbdev_list; + ssize_t ret = 0; + int i = 0; + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbdev_list = kbase_device_get_list(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + + kbdev = list_entry(entry, struct kbase_device, entry); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%-16s %-16s %-16s\n", + "kctx", "pid", "cached_pages"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "----------------------------------------------------\n"); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + /* output the memory cached and cap for each kctx + * opened on this device */ + unsigned long cached_mem = 0; + for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) + //pr_info("[%d]:kctx->mem_pools.small[%d] = %d", kctx->tgid, i, kctx->mem_pools.small[i].cur_size); + cached_mem += kctx->mem_pools.small[i].cur_size; + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%p %10u %10lu\n", + kctx, + kctx->tgid, + cached_mem); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + + kbase_device_put_list(kbdev_list); + + return ret; +} + +static ssize_t set_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + ssize_t err = count; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return err; +} + +static DEVICE_ATTR(ctx_mem_pool_size, S_IRUGO | S_IWUSR, show_ctx_mem_pool_size, set_ctx_mem_pool_size); + +/** + * show_policy - Show callback for the power_policy sysfs file. + * + * This function is called to get the contents of the power_policy sysfs + * file. This is a list of the available policies with the currently active one + * surrounded by square brackets. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *current_policy; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_get_policy(kbdev); + + policy_count = kbase_pm_list_policies(kbdev, &policy_list); for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { if (policy_list[i] == current_policy) @@ -1798,7 +2369,7 @@ static ssize_t show_policy(struct device *dev, struct device_attribute *attr, ch * @dev: The device with sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * Return: @count if the function succeeded. An error code on failure. */ @@ -1857,6 +2428,7 @@ static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) { struct kbase_device *kbdev; + unsigned long flags; ssize_t ret = 0; kbdev = to_kbase_device(dev); @@ -1864,6 +2436,19 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, if (!kbdev) return -ENODEV; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + +#if MALI_USE_CSF + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current debug core mask : 0x%llX\n", + kbdev->pm.debug_core_mask); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current desired core mask : 0x%llX\n", + kbase_pm_ca_get_core_mask(kbdev)); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "Current in use core mask : 0x%llX\n", + kbdev->pm.backend.shaders_avail); +#else ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask (JS0) : 0x%llX\n", kbdev->pm.debug_core_mask[0]); @@ -1873,10 +2458,14 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask (JS2) : 0x%llX\n", kbdev->pm.debug_core_mask[2]); +#endif /* MALI_USE_CSF */ + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->gpu_props.props.raw_props.shader_present); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return ret; } @@ -1888,24 +2477,42 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, * @dev: The device with sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * Return: @count if the function succeeded. An error code on failure. */ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kbase_device *kbdev; +#if MALI_USE_CSF + u64 new_core_mask; +#else u64 new_core_mask[3]; - int items, i; + u64 group0_core_mask; + int i; +#endif /* MALI_USE_CSF */ + + int items; ssize_t err = count; unsigned long flags; - u64 shader_present, group0_core_mask; + u64 shader_present; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; +#if MALI_USE_CSF + items = sscanf(buf, "%llx", &new_core_mask); + + if (items != 1) { + dev_err(kbdev->dev, + "Couldn't process core mask write operation.\n" + "Use format \n"); + err = -EINVAL; + goto end; + } +#else items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1], &new_core_mask[2]); @@ -1920,11 +2527,35 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, if (items == 1) new_core_mask[1] = new_core_mask[2] = new_core_mask[0]; +#endif mutex_lock(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); shader_present = kbdev->gpu_props.props.raw_props.shader_present; + +#if MALI_USE_CSF + if ((new_core_mask & shader_present) != new_core_mask) { + dev_err(dev, + "Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)", + new_core_mask, shader_present); + err = -EINVAL; + goto unlock; + + } else if (!(new_core_mask & shader_present & + kbdev->pm.backend.ca_cores_enabled)) { + dev_err(dev, + "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n", + new_core_mask, + kbdev->gpu_props.props.raw_props.shader_present, + kbdev->pm.backend.ca_cores_enabled); + err = -EINVAL; + goto unlock; + } + + if (kbdev->pm.debug_core_mask != new_core_mask) + kbase_pm_set_debug_core_mask(kbdev, new_core_mask); +#else group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask; for (i = 0; i < 3; ++i) { @@ -1947,6 +2578,11 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, new_core_mask[i], i, group0_core_mask); err = -EINVAL; goto unlock; + } else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) { + dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n", + new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present); + err = -EINVAL; + goto unlock; } } @@ -1959,6 +2595,7 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1], new_core_mask[2]); } +#endif /* MALI_USE_CSF */ unlock: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1976,130 +2613,7 @@ end: */ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); -static ssize_t show_gpu_memory(struct device *dev, struct device_attribute *attr, char * const buf) -{ - struct kbase_device *kbdev; - ssize_t ret = 0; - struct list_head *entry; - const struct list_head *kbdev_list; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - kbdev_list = kbase_device_get_list(); - list_for_each(entry, kbdev_list) { - struct kbase_device *kbdev = NULL; - struct kbase_context *kctx; - - kbdev = list_entry(entry, struct kbase_device, entry); - /* output the total memory usage and cap for this device */ - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%-16s %-16s %10u\n", - kbdev->devname, - "total used_pages", - atomic_read(&(kbdev->memdev.used_pages))); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "----------------------------------------------------\n"); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%-16s %-16s %-16s\n", - "kctx", "pid", "used_pages"); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "----------------------------------------------------\n"); - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { - /* output the memory usage and cap for each kctx - * opened on this device */ - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%p %10u %10u\n", - kctx, - kctx->tgid, - atomic_read(&(kctx->used_pages))); - } - mutex_unlock(&kbdev->kctx_list_lock); - } - - kbase_device_put_list(kbdev_list); - - - return ret; -} - -static ssize_t set_gpu_memory(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - ssize_t err = count; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - return err; -} - -static DEVICE_ATTR(gpu_memory, S_IRUGO | S_IWUSR, show_gpu_memory, set_gpu_memory); - -static ssize_t show_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, char * const buf) -{ - struct list_head *entry; - const struct list_head *kbdev_list; - ssize_t ret = 0; - int i = 0; - struct kbase_device *const kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - kbdev_list = kbase_device_get_list(); - list_for_each(entry, kbdev_list) { - struct kbase_device *kbdev = NULL; - struct kbase_context *kctx; - - kbdev = list_entry(entry, struct kbase_device, entry); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%-16s %-16s %-16s\n", - "kctx", "pid", "cached_pages"); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "----------------------------------------------------\n"); - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { - /* output the memory cached and cap for each kctx - * opened on this device */ - unsigned long cached_mem = 0; - for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) - //pr_info("[%d]:kctx->mem_pools.small[%d] = %d", kctx->tgid, i, kctx->mem_pools.small[i].cur_size); - cached_mem += kctx->mem_pools.small[i].cur_size; - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%p %10u %10lu\n", - kctx, - kctx->tgid, - cached_mem); - } - mutex_unlock(&kbdev->kctx_list_lock); - } - - kbase_device_put_list(kbdev_list); - - return ret; -} - -static ssize_t set_ctx_mem_pool_size(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct kbase_device *kbdev; - ssize_t err = count; - - kbdev = to_kbase_device(dev); - - if (!kbdev) - return -ENODEV; - - return err; -} - -static DEVICE_ATTR(ctx_mem_pool_size, S_IRUGO | S_IWUSR, show_ctx_mem_pool_size, set_ctx_mem_pool_size); - +#if !MALI_USE_CSF /** * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs * file. @@ -2107,7 +2621,7 @@ static DEVICE_ATTR(ctx_mem_pool_size, S_IRUGO | S_IWUSR, show_ctx_mem_pool_size, * @dev: The device this sysfs file is for. * @attr: The attributes of the sysfs file. * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. + * @count: The number of bytes to write to the sysfs file. * * This allows setting the timeout for software jobs. Waiting soft event wait * jobs will be cancelled after this period expires, while soft fence wait jobs @@ -2200,7 +2714,7 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, * @dev: The device with sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * Return: @count if the function succeeded. An error code on failure. */ @@ -2377,7 +2891,7 @@ static u32 get_new_js_timeout( * @dev: The device the sysfs file is for * @attr: The attributes of the sysfs file * @buf: The value written to the sysfs file - * @count: The number of bytes written to the sysfs file + * @count: The number of bytes to write to the sysfs file * * This function is called when the js_scheduling_period sysfs file is written * to. It checks the data written, and if valid updates the js_scheduling_period @@ -2417,7 +2931,8 @@ static ssize_t set_js_scheduling_period(struct device *dev, /* If no contexts have been scheduled since js_timeouts was last written * to, the new timeouts might not have been latched yet. So check if an - * update is pending and use the new values if necessary. */ + * update is pending and use the new values if necessary. + */ /* Use previous 'new' scheduling period as a base if present. */ old_period = js_data->scheduling_period_ns; @@ -2542,9 +3057,10 @@ static ssize_t show_js_softstop_always(struct device *dev, */ static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); #endif /* CONFIG_MALI_DEBUG */ +#endif /* !MALI_USE_CSF */ #ifdef CONFIG_MALI_DEBUG -typedef void (kbasep_debug_command_func) (struct kbase_device *); +typedef void kbasep_debug_command_func(struct kbase_device *); enum kbasep_debug_command_code { KBASEP_DEBUG_COMMAND_DUMPTRACE, @@ -2690,24 +3206,20 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, .name = "Mali-G77" }, { .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G78" }, + { .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G78AE" }, { .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G68" }, { .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G57" }, { .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TODX" }, + .name = "Mali-G710" }, + { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, + .name = "Mali-G610" }, { .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TGRX" }, + .name = "Mali-G510" }, { .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TVAX" }, - { .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LODX" }, - { .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TTUX" }, - { .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LTUX" }, - { .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TE2X" }, + .name = "Mali-G310" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -2850,7 +3362,8 @@ static ssize_t set_pm_poweroff(struct device *dev, stt = &kbdev->pm.backend.shader_tick_timer; stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); - stt->configured_ticks = poweroff_shader_ticks; + stt->default_ticks = poweroff_shader_ticks; + stt->configured_ticks = stt->default_ticks; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -2888,7 +3401,7 @@ static ssize_t show_pm_poweroff(struct device *dev, stt = &kbdev->pm.backend.shader_tick_timer; ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n", ktime_to_ns(stt->configured_interval), - stt->configured_ticks); + stt->default_ticks); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -2898,6 +3411,75 @@ static ssize_t show_pm_poweroff(struct device *dev, static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, set_pm_poweroff); +#if MALI_USE_CSF +/** + * set_idle_hysteresis_time - Store callback for CSF idle_hysteresis_time + * sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the idle_hysteresis_time sysfs file is + * written to. + * + * This file contains values of the idle idle hysteresis duration. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_idle_hysteresis_time(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u32 dur; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (kstrtou32(buf, 0, &dur)) { + dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n" + "Use format \n"); + return -EINVAL; + } + + kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); + + return count; +} + +/** + * show_idle_hysteresis_time - Show callback for CSF idle_hysteresis_time + * sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current idle hysteresis duration in ms. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_idle_hysteresis_time(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + u32 dur; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev); + ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); + + return ret; +} + +static DEVICE_ATTR(idle_hysteresis_time, S_IRUGO | S_IWUSR, + show_idle_hysteresis_time, set_idle_hysteresis_time); +#endif + /** * set_reset_timeout - Store callback for the reset_timeout sysfs file. * @dev: The device with sysfs file is for @@ -3124,16 +3706,214 @@ static ssize_t set_lp_mem_pool_max_size(struct device *dev, if (!kbdev) return -ENODEV; - err = kbase_debugfs_helper_set_attr_from_string(buf, - kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, - kbase_mem_pool_debugfs_set_max_size); + err = kbase_debugfs_helper_set_attr_from_string(buf, + kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS, + kbase_mem_pool_debugfs_set_max_size); + + return err ? err : count; +} + +static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, + set_lp_mem_pool_max_size); + +/** + * show_simplified_mem_pool_max_size - Show the maximum size for the memory + * pool 0 of small (4KiB) pages. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the max size. + * + * This function is called to get the maximum size for the memory pool 0 of + * small (4KiB) pages. It is assumed that the maximum size value is same for + * all the pools. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_simplified_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size); +} + +/** + * set_simplified_mem_pool_max_size - Set the same maximum size for all the + * memory pools of small (4KiB) pages. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called to set the same maximum size for all the memory + * pools of small (4KiB) pages. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t set_simplified_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + unsigned long new_size; + int gid; + int err; + + if (!kbdev) + return -ENODEV; + + err = kstrtoul(buf, 0, &new_size); + if (err) + return -EINVAL; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) + kbase_mem_pool_debugfs_set_max_size( + kbdev->mem_pools.small, gid, (size_t)new_size); + + return count; +} + +static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size, + set_simplified_mem_pool_max_size); + +/** + * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory + * pool 0 of large (2MiB) pages. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the total current pool size. + * + * This function is called to get the maximum size for the memory pool 0 of + * large (2MiB) pages. It is assumed that the maximum size value is same for + * all the pools. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE, + kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size); +} + +/** + * set_simplified_lp_mem_pool_max_size - Set the same maximum size for all the + * memory pools of large (2MiB) pages. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called to set the same maximum size for all the memory + * pools of large (2MiB) pages. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *const kbdev = to_kbase_device(dev); + unsigned long new_size; + int gid; + int err; + + if (!kbdev) + return -ENODEV; + + err = kstrtoul(buf, 0, &new_size); + if (err) + return -EINVAL; + + for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) + kbase_mem_pool_debugfs_set_max_size( + kbdev->mem_pools.large, gid, (size_t)new_size); + + return count; +} + +static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size, + set_simplified_lp_mem_pool_max_size); + +/** + * show_simplified_ctx_default_max_size - Show the default maximum size for the + * memory pool 0 of small (4KiB) pages. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the pool size. + * + * This function is called to get the default ctx maximum size for the memory + * pool 0 of small (4KiB) pages. It is assumed that maximum size value is same + * for all the pools. The maximum size for the pool of large (2MiB) pages will + * be same as max size of the pool of small (4KiB) pages in terms of bytes. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_simplified_ctx_default_max_size(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + size_t max_size; + + if (!kbdev) + return -ENODEV; + + max_size = kbase_mem_pool_config_debugfs_max_size( + kbdev->mem_pool_defaults.small, 0); + + return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size); +} + +/** + * set_simplified_ctx_default_max_size - Set the same default maximum size for + * all the pools created for new + * contexts. This covers the pool of + * large pages as well and its max size + * will be same as max size of the pool + * of small pages in terms of bytes. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The value written to the sysfs file. + * @count: The number of bytes written to the sysfs file. + * + * This function is called to set the same maximum size for all pools created + * for new contexts. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t set_simplified_ctx_default_max_size(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + unsigned long new_size; + int err; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + err = kstrtoul(buf, 0, &new_size); + if (err) + return -EINVAL; + + kbase_mem_pool_group_config_set_max_size( + &kbdev->mem_pool_defaults, (size_t)new_size); - return err ? err : count; + return count; } -static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size, - set_lp_mem_pool_max_size); +static DEVICE_ATTR(ctx_default_max_size, 0600, + show_simplified_ctx_default_max_size, + set_simplified_ctx_default_max_size); +#if !MALI_USE_CSF /** * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs * entry. @@ -3218,7 +3998,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR, set_js_ctx_scheduling_mode); #ifdef MALI_KBASE_BUILD -#ifdef CONFIG_DEBUG_FS /* Number of entries in serialize_jobs_settings[] */ #define NR_SERIALIZE_JOBS_SETTINGS 5 @@ -3239,8 +4018,47 @@ static struct }; /** - * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs - * file + * update_serialize_jobs_setting - Update the serialization setting for the + * submission of GPU jobs. + * + * This function is called when the serialize_jobs sysfs/debugfs file is + * written to. It matches the requested setting against the available settings + * and if a matching setting is found updates kbdev->serialize_jobs. + * + * @kbdev: An instance of the GPU platform device, allocated from the probe + * method of the driver. + * @buf: Buffer containing the value written to the sysfs/debugfs file. + * @count: The number of bytes to write to the sysfs/debugfs file. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev, + const char *buf, size_t count) +{ + int i; + bool valid = false; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { + kbdev->serialize_jobs = + serialize_jobs_settings[i].setting; + valid = true; + break; + } + } + + if (!valid) { + dev_err(kbdev->dev, "serialize_jobs: invalid setting"); + return -EINVAL; + } + + return count; +} + +#if IS_ENABLED(CONFIG_DEBUG_FS) +/** + * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs + * debugfs file * @sfile: seq_file pointer * @data: Private callback data * @@ -3250,7 +4068,8 @@ static struct * * Return: 0 on success, or an error code on error */ -static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data) +static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile, + void *data) { struct kbase_device *kbdev = sfile->private; int i; @@ -3291,8 +4110,6 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, struct seq_file *s = file->private_data; struct kbase_device *kbdev = s->private; char buf[MAX_SERIALIZE_JOBS_NAME_LEN]; - int i; - bool valid = false; CSTD_UNUSED(ppos); @@ -3302,21 +4119,7 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, buf[count] = 0; - for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { - if (sysfs_streq(serialize_jobs_settings[i].name, buf)) { - kbdev->serialize_jobs = - serialize_jobs_settings[i].setting; - valid = true; - break; - } - } - - if (!valid) { - dev_err(kbdev->dev, "serialize_jobs: invalid setting\n"); - return -EINVAL; - } - - return count; + return update_serialize_jobs_setting(kbdev, buf, count); } /** @@ -3330,7 +4133,8 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file, static int kbasep_serialize_jobs_debugfs_open(struct inode *in, struct file *file) { - return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private); + return single_open(file, kbasep_serialize_jobs_seq_debugfs_show, + in->i_private); } static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { @@ -3343,27 +4147,101 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = { }; #endif /* CONFIG_DEBUG_FS */ + +/** + * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file. + * + * This function is called to get the contents of the serialize_jobs sysfs + * file. This is a list of the available settings with the currently active + * one surrounded by square brackets. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The output buffer for the sysfs file contents + * + * Return: The number of bytes output to @buf. + */ +static ssize_t show_serialize_jobs_sysfs(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + ssize_t ret = 0; + int i; + + for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) { + if (kbdev->serialize_jobs == + serialize_jobs_settings[i].setting) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]", + serialize_jobs_settings[i].name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", + serialize_jobs_settings[i].name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** + * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file. + * + * This function is called when the serialize_jobs sysfs file is written to. + * It matches the requested setting against the available settings and if a + * matching setting is found updates kbdev->serialize_jobs. + * + * @dev: The device this sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t store_serialize_jobs_sysfs(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return update_serialize_jobs_setting(to_kbase_device(dev), buf, count); +} + +static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs, + store_serialize_jobs_sysfs); #endif /* MALI_KBASE_BUILD */ +#endif /* !MALI_USE_CSF */ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) { struct kbase_device *kbdev = container_of(data, struct kbase_device, protected_mode_hwcnt_disable_work); + spinlock_t *backend_lock; unsigned long flags; bool do_disable; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); +#if MALI_USE_CSF + backend_lock = &kbdev->csf.scheduler.interrupt_lock; +#else + backend_lock = &kbdev->hwaccess_lock; +#endif + + spin_lock_irqsave(backend_lock, flags); do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(backend_lock, flags); if (!do_disable) return; kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + spin_lock_irqsave(backend_lock, flags); do_disable = !kbdev->protected_mode_hwcnt_desired && !kbdev->protected_mode_hwcnt_disabled; @@ -3373,7 +4251,9 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) * the state machine. */ kbdev->protected_mode_hwcnt_disabled = true; +#if !MALI_USE_CSF kbase_backend_slot_update(kbdev); +#endif /* !MALI_USE_CSF */ } else { /* Protected mode state was updated while we were doing the * disable, so we need to undo the disable we just performed. @@ -3381,9 +4261,10 @@ static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data) kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); } - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(backend_lock, flags); } +#ifndef PLATFORM_PROTECTED_CALLBACKS static int kbasep_protected_mode_enable(struct protected_mode_device *pdev) { struct kbase_device *kbdev = pdev->data; @@ -3403,6 +4284,9 @@ static const struct protected_mode_ops kbasep_native_protected_ops = { .protected_mode_disable = kbasep_protected_mode_disable }; +#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops) +#endif /* PLATFORM_PROTECTED_CALLBACKS */ + int kbase_protected_mode_init(struct kbase_device *kbdev) { /* Use native protected ops */ @@ -3411,7 +4295,7 @@ int kbase_protected_mode_init(struct kbase_device *kbdev) if (!kbdev->protected_dev) return -ENOMEM; kbdev->protected_dev->data = kbdev; - kbdev->protected_ops = &kbasep_native_protected_ops; + kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS; INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work, kbasep_protected_mode_hwcnt_disable_worker); kbdev->protected_mode_hwcnt_desired = true; @@ -3425,15 +4309,6 @@ void kbase_protected_mode_term(struct kbase_device *kbdev) kfree(kbdev->protected_dev); } -#ifdef CONFIG_MALI_NO_MALI -static int kbase_common_reg_map(struct kbase_device *kbdev) -{ - return 0; -} -static void kbase_common_reg_unmap(struct kbase_device * const kbdev) -{ -} -#else /* CONFIG_MALI_NO_MALI */ static int kbase_common_reg_map(struct kbase_device *kbdev) { int err = 0; @@ -3469,7 +4344,6 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev) kbdev->reg_size = 0; } } -#endif /* CONFIG_MALI_NO_MALI */ int registers_map(struct kbase_device * const kbdev) { @@ -3489,6 +4363,15 @@ int registers_map(struct kbase_device * const kbdev) kbdev->reg_start = reg_res->start; kbdev->reg_size = resource_size(reg_res); +#if MALI_USE_CSF + if (kbdev->reg_size < + (CSF_HW_DOORBELL_PAGE_OFFSET + + CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + dev_err(kbdev->dev, "Insufficient register space, will override to the required size\n"); + kbdev->reg_size = CSF_HW_DOORBELL_PAGE_OFFSET + + CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE; + } +#endif err = kbase_common_reg_map(kbdev); if (err) { @@ -3570,6 +4453,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev) u32 gpu_model_id; if (kbase_is_pv_enabled(kbdev->dev->of_node)) { + dev_info(kbdev->dev, "Arbitration interface enabled\n"); if (kbase_is_pm_enabled(kbdev->dev->of_node)) { /* Arbitration AND power management invalid */ dev_err(kbdev->dev, "Invalid combination of arbitration AND power management\n"); @@ -3593,13 +4477,16 @@ int kbase_device_pm_init(struct kbase_device *kbdev) gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id); if (gpu_model_id != GPU_ID2_PRODUCT_TGOX - && gpu_model_id != GPU_ID2_PRODUCT_TNOX) { + && gpu_model_id != GPU_ID2_PRODUCT_TNOX + && gpu_model_id != GPU_ID2_PRODUCT_TBAX) { kbase_arbiter_pm_early_term(kbdev); dev_err(kbdev->dev, "GPU platform not suitable for arbitration\n"); return -EPERM; } } } else { + kbdev->arb.arb_if = NULL; + kbdev->arb.arb_dev = NULL; err = power_control_init(kbdev); } #else @@ -3611,7 +4498,7 @@ int kbase_device_pm_init(struct kbase_device *kbdev) void kbase_device_pm_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_ARBITER_SUPPORT -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) if (kbase_is_pv_enabled(kbdev->dev->of_node)) kbase_arbiter_pm_early_term(kbdev); else @@ -3624,7 +4511,7 @@ void kbase_device_pm_term(struct kbase_device *kbdev) int power_control_init(struct kbase_device *kbdev) { -#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF) +#ifndef CONFIG_OF /* Power control initialization requires at least the capability to get * regulators and clocks from the device tree, as well as parsing * arrays of unsigned integer values. @@ -3719,12 +4606,6 @@ int power_control_init(struct kbase_device *kbdev) * on the device tree of the platform shouldn't prevent the driver * from completing its initialization. */ -#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ - !defined(LSK_OPPV2_BACKPORT)) - err = of_init_opp_table(kbdev->dev); - CSTD_UNUSED(err); -#else - #if defined(CONFIG_PM_OPP) #if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ defined(CONFIG_REGULATOR)) @@ -3736,8 +4617,6 @@ int power_control_init(struct kbase_device *kbdev) err = dev_pm_opp_of_add_table(kbdev->dev); CSTD_UNUSED(err); #endif /* CONFIG_PM_OPP */ - -#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ return 0; clocks_probe_defer: @@ -3746,20 +4625,13 @@ clocks_probe_defer: regulator_put(kbdev->regulators[i]); #endif return err; -#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */ +#endif /* CONFIG_OF */ } void power_control_term(struct kbase_device *kbdev) { unsigned int i; -#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \ - !defined(LSK_OPPV2_BACKPORT)) -#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE - of_free_opp_table(kbdev->dev); -#endif -#else - #if defined(CONFIG_PM_OPP) dev_pm_opp_of_remove_table(kbdev->dev); #if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \ @@ -3769,8 +4641,6 @@ void power_control_term(struct kbase_device *kbdev) #endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */ #endif /* CONFIG_PM_OPP */ -#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */ - for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { if (kbdev->clocks[i]) { if (__clk_is_enabled(kbdev->clocks[i])) @@ -3781,24 +4651,23 @@ void power_control_term(struct kbase_device *kbdev) break; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) +#if defined(CONFIG_OF) && defined(CONFIG_REGULATOR) for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { if (kbdev->regulators[i]) { regulator_put(kbdev->regulators[i]); kbdev->regulators[i] = NULL; } } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +#endif } #ifdef MALI_KBASE_BUILD -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) static void trigger_reset(struct kbase_device *kbdev) { kbase_pm_context_active(kbdev); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(kbdev); kbase_pm_context_idle(kbdev); } @@ -3826,7 +4695,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ MAKE_QUIRK_ACCESSORS(sc); MAKE_QUIRK_ACCESSORS(tiler); MAKE_QUIRK_ACCESSORS(mmu); -MAKE_QUIRK_ACCESSORS(jm); +MAKE_QUIRK_ACCESSORS(gpu); static ssize_t kbase_device_debugfs_reset_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) @@ -3947,7 +4816,9 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, NULL); if (!kbdev->mali_debugfs_directory) { - dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + dev_err(kbdev->dev, + "Couldn't create mali debugfs directory: %s\n", + kbdev->devname); err = -ENOMEM; goto out; } @@ -3960,6 +4831,14 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) goto out; } + kbdev->debugfs_instr_directory = debugfs_create_dir("instrumentation", + kbdev->mali_debugfs_directory); + if (!kbdev->debugfs_instr_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs instrumentation directory\n"); + err = -ENOMEM; + goto out; + } + debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", kbdev->debugfs_ctx_directory); if (!debugfs_ctx_defaults_directory) { @@ -3968,20 +4847,20 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) goto out; } -#if !MALI_CUSTOMER_RELEASE - kbasep_regs_dump_debugfs_init(kbdev); -#endif /* !MALI_CUSTOMER_RELEASE */ kbasep_regs_history_debugfs_init(kbdev); +#if !MALI_USE_CSF kbase_debug_job_fault_debugfs_init(kbdev); +#endif /* !MALI_USE_CSF */ kbasep_gpu_memory_debugfs_init(kbdev); kbase_as_fault_debugfs_init(kbdev); -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS kbase_instr_backend_debugfs_init(kbdev); #endif /* fops_* variables created by invocations of macro - * MAKE_QUIRK_ACCESSORS() above. */ + * MAKE_QUIRK_ACCESSORS() above. + */ debugfs_create_file("quirks_sc", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_sc_quirks); @@ -3991,9 +4870,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) debugfs_create_file("quirks_mmu", 0644, kbdev->mali_debugfs_directory, kbdev, &fops_mmu_quirks); - debugfs_create_file("quirks_jm", 0644, - kbdev->mali_debugfs_directory, kbdev, - &fops_jm_quirks); + debugfs_create_file("quirks_gpu", 0644, kbdev->mali_debugfs_directory, + kbdev, &fops_gpu_quirks); debugfs_create_bool("infinite_cache", mode, debugfs_ctx_defaults_directory, @@ -4022,16 +4900,20 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev) kbase_ktrace_debugfs_init(kbdev); #ifdef CONFIG_MALI_DEVFREQ -#ifdef CONFIG_DEVFREQ_THERMAL +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) if (kbdev->devfreq) kbase_ipa_debugfs_init(kbdev); #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_DEVFREQ */ +#if !MALI_USE_CSF debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR, kbdev->mali_debugfs_directory, kbdev, &kbasep_serialize_jobs_debugfs_fops); +#endif + kbase_dvfs_status_debugfs_init(kbdev); + return 0; out: @@ -4048,7 +4930,7 @@ void kbase_device_debugfs_term(struct kbase_device *kbdev) int kbase_device_coherency_init(struct kbase_device *kbdev) { -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) u32 supported_coherency_bitmap = kbdev->gpu_props.props.raw_props.coherency_mode; const void *coherency_override_dts; @@ -4075,7 +4957,7 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) kbdev->system_coherency = COHERENCY_NONE; /* device tree may override the coherency */ -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) coherency_override_dts = of_get_property(kbdev->dev->of_node, "system-coherency", NULL); @@ -4083,6 +4965,17 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) override_coherency = be32_to_cpup(coherency_override_dts); +#if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) + /* ACE coherency mode is not supported by Driver on CSF GPUs. + * Return an error to signal the invalid device tree configuration. + */ + if (override_coherency == COHERENCY_ACE) { + dev_err(kbdev->dev, + "ACE coherency not supported, wrong DT configuration"); + return -EINVAL; + } +#endif + if ((override_coherency <= COHERENCY_NONE) && (supported_coherency_bitmap & COHERENCY_FEATURE_BIT(override_coherency))) { @@ -4106,53 +4999,181 @@ int kbase_device_coherency_init(struct kbase_device *kbdev) return 0; } -#ifdef CONFIG_MALI_BUSLOG -/* Callback used by the kbase bus logger client, to initiate a GPU reset - * when the bus log is restarted. GPU reset is used as reference point - * in HW bus log analyses. +#if MALI_USE_CSF +/** + * csg_scheduling_period_store - Store callback for the csg_scheduling_period + * sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the csg_scheduling_period sysfs file is written + * to. It checks the data written, and if valid updates the reset timeout. + * + * Return: @count if the function succeeded. An error code on failure. */ -static void kbase_logging_started_cb(void *data) +static ssize_t csg_scheduling_period_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_device *kbdev; + int ret; + unsigned int csg_scheduling_period; - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - dev_info(kbdev->dev, "KBASE - Bus logger restarted\n"); + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtouint(buf, 0, &csg_scheduling_period); + if (ret || csg_scheduling_period == 0) { + dev_err(kbdev->dev, + "Couldn't process csg_scheduling_period write operation.\n" + "Use format 'csg_scheduling_period_ms', and csg_scheduling_period_ms > 0\n"); + return -EINVAL; + } + + kbase_csf_scheduler_lock(kbdev); + kbdev->csf.scheduler.csg_scheduling_period_ms = csg_scheduling_period; + dev_dbg(kbdev->dev, "CSG scheduling period: %ums\n", + csg_scheduling_period); + kbase_csf_scheduler_unlock(kbdev); + + return count; } -int buslog_init(struct kbase_device *kbdev) +/** + * csg_scheduling_period_show - Show callback for the csg_scheduling_period + * sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current reset timeout. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t csg_scheduling_period_show(struct device *dev, + struct device_attribute *attr, + char *const buf) { - int err = 0; + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); - if (err == 0) - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); + ret = scnprintf(buf, PAGE_SIZE, "%u\n", + kbdev->csf.scheduler.csg_scheduling_period_ms); - return err; + return ret; +} + +static DEVICE_ATTR(csg_scheduling_period, 0644, csg_scheduling_period_show, + csg_scheduling_period_store); + +/** + * fw_timeout_store - Store callback for the fw_timeout sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the fw_timeout sysfs file is written to. It + * checks the data written, and if valid updates the reset timeout. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t fw_timeout_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct kbase_device *kbdev; + int ret; + unsigned int fw_timeout; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtouint(buf, 0, &fw_timeout); + if (ret || fw_timeout == 0) { + dev_err(kbdev->dev, "%s\n%s\n%u", + "Couldn't process fw_timeout write operation.", + "Use format 'fw_timeout_ms', and fw_timeout_ms > 0", + FIRMWARE_PING_INTERVAL_MS); + return -EINVAL; + } + + kbase_csf_scheduler_lock(kbdev); + kbdev->csf.fw_timeout_ms = fw_timeout; + kbase_csf_scheduler_unlock(kbdev); + dev_dbg(kbdev->dev, "Firmware timeout: %ums\n", fw_timeout); + + return count; } -void buslog_term(struct kbase_device *kbdev) +/** + * fw_timeout_show - Show callback for the firmware timeout sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current reset timeout. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t fw_timeout_show(struct device *dev, + struct device_attribute *attr, char *const buf) { - bl_core_client_unregister(kbdev->buslogger); + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%u\n", kbdev->csf.fw_timeout_ms); + + return ret; } -#endif + +static DEVICE_ATTR(fw_timeout, 0644, fw_timeout_show, fw_timeout_store); +#endif /* MALI_USE_CSF */ + +static struct attribute *kbase_scheduling_attrs[] = { +#if !MALI_USE_CSF + &dev_attr_serialize_jobs.attr, +#endif /* !MALI_USE_CSF */ + NULL +}; static struct attribute *kbase_attrs[] = { #ifdef CONFIG_MALI_DEBUG &dev_attr_debug_command.attr, +#if !MALI_USE_CSF &dev_attr_js_softstop_always.attr, +#endif /* !MALI_USE_CSF */ #endif +#if !MALI_USE_CSF &dev_attr_js_timeouts.attr, &dev_attr_soft_job_timeout.attr, +#endif /* !MALI_USE_CSF */ &dev_attr_gpuinfo.attr, &dev_attr_dvfs_period.attr, &dev_attr_pm_poweroff.attr, +#if MALI_USE_CSF + &dev_attr_idle_hysteresis_time.attr, +#endif &dev_attr_reset_timeout.attr, +#if !MALI_USE_CSF &dev_attr_js_scheduling_period.attr, +#else + &dev_attr_csg_scheduling_period.attr, + &dev_attr_fw_timeout.attr, +#endif /* !MALI_USE_CSF */ &dev_attr_power_policy.attr, &dev_attr_core_mask.attr, &dev_attr_gpu_memory.attr, @@ -4160,10 +5181,31 @@ static struct attribute *kbase_attrs[] = { &dev_attr_mem_pool_max_size.attr, &dev_attr_lp_mem_pool_size.attr, &dev_attr_lp_mem_pool_max_size.attr, +#if !MALI_USE_CSF &dev_attr_js_ctx_scheduling_mode.attr, +#endif /* !MALI_USE_CSF */ + NULL +}; + +static struct attribute *kbase_mempool_attrs[] = { + &dev_attr_max_size.attr, + &dev_attr_lp_max_size.attr, + &dev_attr_ctx_default_max_size.attr, NULL }; +#define SYSFS_SCHEDULING_GROUP "scheduling" +static const struct attribute_group kbase_scheduling_attr_group = { + .name = SYSFS_SCHEDULING_GROUP, + .attrs = kbase_scheduling_attrs, +}; + +#define SYSFS_MEMPOOL_GROUP "mempool" +static const struct attribute_group kbase_mempool_attr_group = { + .name = SYSFS_MEMPOOL_GROUP, + .attrs = kbase_mempool_attrs, +}; + static const struct attribute_group kbase_attr_group = { .attrs = kbase_attrs, }; @@ -4189,6 +5231,29 @@ int kbase_sysfs_init(struct kbase_device *kbdev) err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); err += sysfs_create_group(&kbdev->dev->kobj, &kbase_ctx_attr_group); + if (err) + return err; + + err = sysfs_create_group(&kbdev->dev->kobj, + &kbase_scheduling_attr_group); + if (err) { + dev_err(kbdev->dev, "Creation of %s sysfs group failed", + SYSFS_SCHEDULING_GROUP); + sysfs_remove_group(&kbdev->dev->kobj, + &kbase_attr_group); + return err; + } + + err = sysfs_create_group(&kbdev->dev->kobj, + &kbase_mempool_attr_group); + if (err) { + dev_err(kbdev->dev, "Creation of %s sysfs group failed", + SYSFS_MEMPOOL_GROUP); + sysfs_remove_group(&kbdev->dev->kobj, + &kbase_scheduling_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, + &kbase_attr_group); + } return err; } @@ -4196,6 +5261,8 @@ int kbase_sysfs_init(struct kbase_device *kbdev) void kbase_sysfs_term(struct kbase_device *kbdev) { sysfs_remove_group(&kbdev->dev->kobj, &kbase_ctx_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_mempool_attr_group); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group); sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); put_device(kbdev->dev); } @@ -4254,7 +5321,8 @@ static int kbase_platform_device_probe(struct platform_device *pdev) if (err) { if (err == -EPROBE_DEFER) - dev_err(kbdev->dev, "Device initialization Deferred\n"); + dev_info(kbdev->dev, + "Device initialization Deferred\n"); else dev_err(kbdev->dev, "Device initialization failed\n"); @@ -4296,8 +5364,11 @@ static int kbase_device_suspend(struct device *dev) kbase_pm_suspend(kbdev); -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_pm_metrics_stop(kbdev); +#endif + +#ifdef CONFIG_MALI_DEVFREQ dev_dbg(dev, "Callback %s\n", __func__); if (kbdev->devfreq) { kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); @@ -4325,8 +5396,11 @@ static int kbase_device_resume(struct device *dev) kbase_pm_resume(kbdev); -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_pm_metrics_start(kbdev); +#endif + +#ifdef CONFIG_MALI_DEVFREQ dev_dbg(dev, "Callback %s\n", __func__); if (kbdev->devfreq) { mutex_lock(&kbdev->pm.lock); @@ -4359,8 +5433,12 @@ static int kbase_device_runtime_suspend(struct device *dev) return -ENODEV; dev_dbg(dev, "Callback %s\n", __func__); -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_pm_metrics_stop(kbdev); +#endif + +#ifdef CONFIG_MALI_DEVFREQ if (kbdev->devfreq) kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND); #endif @@ -4398,8 +5476,11 @@ static int kbase_device_runtime_resume(struct device *dev) dev_dbg(dev, "runtime resume\n"); } -#if defined(CONFIG_MALI_DEVFREQ) && \ - (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_pm_metrics_start(kbdev); +#endif + +#ifdef CONFIG_MALI_DEVFREQ if (kbdev->devfreq) kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME); #endif @@ -4452,7 +5533,7 @@ static const struct dev_pm_ops kbase_pm_ops = { #endif /* KBASE_PM_RUNTIME */ }; -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) static const struct of_device_id kbase_dt_ids[] = { { .compatible = "arm,malit6xx" }, { .compatible = "arm,mali-midgard" }, @@ -4467,9 +5548,9 @@ static struct platform_driver kbase_platform_driver = { .remove = kbase_platform_device_remove, .driver = { .name = kbase_drv_name, - .owner = THIS_MODULE, .pm = &kbase_pm_ops, .of_match_table = of_match_ptr(kbase_dt_ids), + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; @@ -4477,7 +5558,7 @@ static struct platform_driver kbase_platform_driver = { * The driver will not provide a shortcut to create the Mali platform device * anymore when using Device Tree. */ -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) module_platform_driver(kbase_platform_driver); #else @@ -4512,6 +5593,7 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ __stringify(BASE_UK_VERSION_MAJOR) "." \ __stringify(BASE_UK_VERSION_MINOR) ")"); +MODULE_SOFTDEP("pre: memory_group_manager"); #define CREATE_TRACE_POINTS /* Create the trace points (otherwise we just get code to call a tracepoint) */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h index e1fffc3..4dc09e4 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_cs_experimental.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,18 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ - -/* - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * */ #ifndef _KBASE_CS_EXPERIMENTAL_H_ @@ -41,9 +30,6 @@ */ static inline void mali_kbase_print_cs_experimental(void) { -#if MALI_JIT_PRESSURE_LIMIT - pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled"); -#endif /* MALI_JIT_PRESSURE_LIMIT */ #if MALI_INCREMENTAL_RENDERING pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled"); #endif /* MALI_INCREMENTAL_RENDERING */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c index cea91bc..d06380d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,9 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include -#include - #include #include "mali_kbase_ctx_sched.h" #include "tl/mali_kbase_tracepoints.h" @@ -46,7 +43,8 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev) int as_present = (1U << kbdev->nr_hw_address_spaces) - 1; /* These two must be recalculated if nr_hw_address_spaces changes - * (e.g. for HW workarounds) */ + * (e.g. for HW workarounds) + */ kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; kbdev->as_free = as_present; /* All ASs initially free */ @@ -212,6 +210,13 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { struct kbase_context *kctx; +#if MALI_USE_CSF + if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { + kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, + MCU_AS_NR); + continue; + } +#endif kctx = kbdev->as_to_kctx[i]; if (kctx) { if (atomic_read(&kctx->refcount)) { @@ -254,7 +259,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( found_kctx = kbdev->as_to_kctx[as_nr]; - if (found_kctx != NULL) + if (!WARN_ON(found_kctx == NULL)) kbase_ctx_sched_retain_ctx_refcount(found_kctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -264,6 +269,21 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, size_t as_nr) +{ + unsigned long flags; + struct kbase_context *found_kctx; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + found_kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return found_kctx; +} + +struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( + struct kbase_device *kbdev, size_t as_nr) { struct kbase_context *found_kctx; @@ -273,13 +293,14 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) return NULL; - found_kctx = kbdev->as_to_kctx[as_nr]; + lockdep_assert_held(&kbdev->hwaccess_lock); - if (WARN_ON(!found_kctx)) - return NULL; + found_kctx = kbdev->as_to_kctx[as_nr]; - if (WARN_ON(atomic_read(&found_kctx->refcount) <= 0)) - return NULL; + if (found_kctx) { + if (atomic_read(&found_kctx->refcount) <= 0) + found_kctx = NULL; + } return found_kctx; } @@ -342,3 +363,40 @@ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx) spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); } + +#if MALI_USE_CSF +bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + bool added_ref = false; + unsigned long flags; + + if (WARN_ON(kctx == NULL)) + return added_ref; + + kbdev = kctx->kbdev; + + if (WARN_ON(kbdev == NULL)) + return added_ref; + + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + if ((kctx->as_nr != KBASEP_AS_NR_INVALID) && + (kctx == kbdev->as_to_kctx[kctx->as_nr])) { + atomic_inc(&kctx->refcount); + + if (kbdev->as_free & (1u << kctx->as_nr)) + kbdev->as_free &= ~(1u << kctx->as_nr); + + KBASE_KTRACE_ADD(kbdev, SCHED_RETAIN_CTX_NOLOCK, kctx, + kbase_ktrace_get_ctx_refcnt(kctx)); + added_ref = true; + } + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + return added_ref; +} +#endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h index 1affa71..334724f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_ctx_sched.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_CTX_SCHED_H_ @@ -26,7 +25,7 @@ #include /** - * The Context Scheduler manages address space assignment and reference + * DOC: The Context Scheduler manages address space assignment and reference * counting to kbase_context. The interface has been designed to minimise * interactions between the Job Scheduler and Power Management/MMU to support * the existing Job Scheduler interface. @@ -41,7 +40,7 @@ */ /** - * kbase_ctx_sched_init - Initialise the context scheduler + * kbase_ctx_sched_init() - Initialise the context scheduler * @kbdev: The device for which the context scheduler needs to be initialised * * This must be called during device initialisation. The number of hardware @@ -167,6 +166,21 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( struct kbase_context *kbase_ctx_sched_as_to_ctx(struct kbase_device *kbdev, size_t as_nr); +/** + * kbase_ctx_sched_as_to_ctx_nolock - Lookup a context based on its current + * address space. + * @kbdev: The device for which the returned context must belong + * @as_nr: address space assigned to the context of interest + * + * The following lock must be held by the caller: + * * kbase_device::hwaccess_lock + * + * Return: a valid struct kbase_context on success or NULL on failure, + * indicating that no context was found in as_nr. + */ +struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock( + struct kbase_device *kbdev, size_t as_nr); + /** * kbase_ctx_sched_inc_refcount_nolock - Refcount a context as being busy, * preventing it from being scheduled out. @@ -206,4 +220,22 @@ bool kbase_ctx_sched_inc_refcount(struct kbase_context *kctx); */ void kbase_ctx_sched_release_ctx_lock(struct kbase_context *kctx); +#if MALI_USE_CSF +/** + * kbase_ctx_sched_inc_refcount_if_as_valid - Refcount the context if it has GPU + * address space slot assigned to it. + * + * @kctx: Context to be refcounted + * + * This function takes a reference on the context if it has a GPU address space + * slot assigned to it. The address space slot will not be available for + * re-assignment until the reference is released. + * + * Return: true if refcount succeeded and the address space slot will not be + * reassigned, false if the refcount failed (because the address space slot + * was not assigned). + */ +bool kbase_ctx_sched_inc_refcount_if_as_valid(struct kbase_context *kctx); +#endif + #endif /* _KBASE_CTX_SCHED_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c index 118f787..6d3b109 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2014, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h index 2fdb72d..10a3c85 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2015, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #ifndef _KBASE_DEBUG_H #define _KBASE_DEBUG_H @@ -43,7 +40,7 @@ #endif /* KBASE_DEBUG_DISABLE_ASSERTS */ /** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ -typedef void (kbase_debug_assert_hook) (void *); +typedef void kbase_debug_assert_hook(void *); struct kbasep_debug_assert_cb { kbase_debug_assert_hook *func; @@ -51,9 +48,9 @@ struct kbasep_debug_assert_cb { }; /** - * @def KBASEP_DEBUG_PRINT_TRACE - * @brief Private macro containing the format of the trace to display before every message - * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME + * KBASEP_DEBUG_PRINT_TRACE - Private macro containing the format of the trace + * to display before every message @sa KBASE_DEBUG_SKIP_TRACE, + * KBASE_DEBUG_SKIP_FUNCTION_NAME */ #if !KBASE_DEBUG_SKIP_TRACE #define KBASEP_DEBUG_PRINT_TRACE \ @@ -68,21 +65,22 @@ struct kbasep_debug_assert_cb { #endif /** - * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - * @brief (Private) system printing function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. - * @param trace location in the code from where the message is printed - * @param function function from where the message is printed - * @param ... Format string followed by format arguments. + * KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) - (Private) system printing + * function associated to the @ref KBASE_DEBUG_ASSERT_MSG event. + * @trace: location in the code from where the message is printed + * @function: function from where the message is printed + * @...: Format string followed by format arguments. + * * @note function parameter cannot be concatenated with other strings */ /* Select the correct system output function*/ #ifdef CONFIG_MALI_DEBUG -#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ - do { \ - pr_err("Mali: %s function:%s ", trace, function);\ - pr_err(__VA_ARGS__);\ - pr_err("\n");\ - } while (false) +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) \ + do { \ + pr_err("Mali: %s function:%s ", trace, function); \ + pr_err(__VA_ARGS__); \ + pr_err("\n"); \ + } while (false) #else #define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() #endif @@ -94,12 +92,12 @@ struct kbasep_debug_assert_cb { #endif /** - * @def KBASE_DEBUG_ASSERT(expr) - * @brief Calls @ref KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false + * KBASE_DEBUG_ASSERT(expr) - Calls @ref KBASE_PRINT_ASSERT and prints the + * expression @a expr if @a expr is false + * @expr: Boolean expression * * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 * - * @param expr Boolean expression */ #define KBASE_DEBUG_ASSERT(expr) \ KBASE_DEBUG_ASSERT_MSG(expr, #expr) @@ -107,15 +105,15 @@ struct kbasep_debug_assert_cb { #if KBASE_DEBUG_DISABLE_ASSERTS #define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() #else - /** - * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) - * @brief Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false - * - * @note This macro does nothing if the flag @ref KBASE_DEBUG_DISABLE_ASSERTS is set to 1 - * - * @param expr Boolean expression - * @param ... Message to display when @a expr is false, as a format string followed by format arguments. - */ +/** + * KBASE_DEBUG_ASSERT_MSG() - Calls @ref KBASEP_DEBUG_ASSERT_OUT and prints the + * given message if @a expr is false + * @expr: Boolean expression + * @...: Message to display when @a expr is false, as a format string followed + * by format arguments. + * + * This macro does nothing if the flag KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + */ #define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ do { \ if (!(expr)) { \ @@ -127,10 +125,8 @@ struct kbasep_debug_assert_cb { #endif /* KBASE_DEBUG_DISABLE_ASSERTS */ /** - * @def KBASE_DEBUG_CODE( X ) - * @brief Executes the code inside the macro only in debug mode - * - * @param X Code to compile only in debug mode. + * KBASE_DEBUG_CODE( X ) - Executes the code inside the macro only in debug mode + * @X: Code to compile only in debug mode. */ #ifdef CONFIG_MALI_DEBUG #define KBASE_DEBUG_CODE(X) X @@ -141,7 +137,9 @@ struct kbasep_debug_assert_cb { /** @} */ /** - * @brief Register a function to call on ASSERT + * kbase_debug_assert_register_hook - Register a function to call on ASSERT + * @func: the function to call when an assert is triggered. + * @param: the parameter to pass to \a func when calling it * * Such functions will \b only be called during Debug mode, and for debugging * features \b only. Do not rely on them to be called in general use. @@ -151,13 +149,12 @@ struct kbasep_debug_assert_cb { * @note This function is not thread-safe, and should only be used to * register/deregister once in the module's lifetime. * - * @param[in] func the function to call when an assert is triggered. - * @param[in] param the parameter to pass to \a func when calling it */ void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); /** - * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() + * kbasep_debug_assert_call_hook - Call a debug assert hook previously + * registered with kbase_debug_assert_register_hook() * * @note This function is not thread-safe with respect to multiple threads * registering functions and parameters with diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index dbc774d..4f021b3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,13 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include #include -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) { @@ -518,23 +517,24 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev) /* * Initialize the relevant data structure per context */ -void kbase_debug_job_fault_context_init(struct kbase_context *kctx) +int kbase_debug_job_fault_context_init(struct kbase_context *kctx) { /* We need allocate double size register range * Because this memory will keep the register address and value */ kctx->reg_dump = vmalloc(0x4000 * 2); - if (kctx->reg_dump == NULL) - return; - - if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == false) { - vfree(kctx->reg_dump); - kctx->reg_dump = NULL; + if (kctx->reg_dump != NULL) { + if (kbase_debug_job_fault_reg_snapshot_init(kctx, 0x4000) == + false) { + vfree(kctx->reg_dump); + kctx->reg_dump = NULL; + } + INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); + atomic_set(&kctx->job_fault_count, 0); } - INIT_LIST_HEAD(&kctx->job_fault_resume_event_list); - atomic_set(&kctx->job_fault_count, 0); + return 0; } /* @@ -549,6 +549,14 @@ void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx) { WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING)); + /* Return early if the job fault part of the kbase_device is not + * initialized yet. An error can happen during the device probe after + * the privileged Kbase context was created for the HW counter dumping + * but before the job fault part is initialized. + */ + if (!kctx->kbdev->job_fault_resume_workq) + return; + kbase_ctx_remove_pending_event(kctx); } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h index ef69627..39aeed0 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUG_JOB_FAULT_H @@ -54,8 +53,9 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev); * kbase_debug_job_fault_context_init - Initialize the relevant * data structure per context * @kctx: KBase context pointer + * @return 0 on success */ -void kbase_debug_job_fault_context_init(struct kbase_context *kctx); +int kbase_debug_job_fault_context_init(struct kbase_context *kctx); /** * kbase_debug_job_fault_context_term - Release the relevant diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index 4788137..5a99b5e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -30,11 +29,7 @@ #include #include -#ifdef CONFIG_DEBUG_FS - -#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE) -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) -#endif +#if IS_ENABLED(CONFIG_DEBUG_FS) struct debug_mem_mapping { struct list_head node; @@ -179,6 +174,13 @@ static int debug_mem_zone_open(struct rb_root *rbtree, /* Empty region - ignore */ continue; + if (reg->flags & KBASE_REG_PROTECTED) { + /* CPU access to protected memory is forbidden - so + * skip this GPU virtual region. + */ + continue; + } + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); if (!mapping) { ret = -ENOMEM; @@ -222,19 +224,19 @@ static int debug_mem_open(struct inode *i, struct file *file) kbase_gpu_vm_lock(kctx); ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); - if (0 != ret) { + if (ret != 0) { kbase_gpu_vm_unlock(kctx); goto out; } ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); - if (0 != ret) { + if (ret != 0) { kbase_gpu_vm_unlock(kctx); goto out; } ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); - if (0 != ret) { + if (ret != 0) { kbase_gpu_vm_unlock(kctx); goto out; } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h index b948b7c..d034832 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUG_MEM_VIEW_H diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c index 37e507b..973739f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -46,10 +45,9 @@ * * Return: 0 if success, negative error code otherwise. */ -static int set_attr_from_string( - char *const buf, - void *const array, size_t const nelems, - kbase_debugfs_helper_set_attr_fn const set_attr_fn) +static int +set_attr_from_string(char *const buf, void *const array, size_t const nelems, + kbase_debugfs_helper_set_attr_fn *const set_attr_fn) { size_t index, err = 0; char *ptr = buf; @@ -90,9 +88,62 @@ static int set_attr_from_string( return err; } +int kbase_debugfs_string_validator(char *const buf) +{ + size_t index; + int err = 0; + char *ptr = buf; + + for (index = 0; *ptr; ++index) { + unsigned long test_number; + size_t len; + + /* Drop leading spaces */ + while (*ptr == ' ') + ptr++; + + /* Strings passed into the validator will be NULL terminated + * by nature, so here strcspn only needs to delimit by + * newlines, spaces and NULL terminator (delimited natively). + */ + len = strcspn(ptr, "\n "); + if (len == 0) { + /* No more values (allow this) */ + break; + } + + /* Substitute a nul terminator for a space character to make + * the substring valid for kstrtoul, and then replace it back. + */ + if (ptr[len] == ' ') { + ptr[len] = '\0'; + err = kstrtoul(ptr, 0, &test_number); + ptr[len] = ' '; + + /* len should only be incremented if there is a valid + * number to follow - otherwise this will skip over + * the NULL terminator in cases with no ending newline + */ + len++; + } else { + /* This would occur at the last element before a space + * or a NULL terminator. + */ + err = kstrtoul(ptr, 0, &test_number); + } + + if (err) + break; + /* Skip the substring (including any premature nul terminator) + */ + ptr += len; + } + return err; +} + int kbase_debugfs_helper_set_attr_from_string( const char *const buf, void *const array, size_t const nelems, - kbase_debugfs_helper_set_attr_fn const set_attr_fn) + kbase_debugfs_helper_set_attr_fn *const set_attr_fn) { char *const wbuf = kstrdup(buf, GFP_KERNEL); int err = 0; @@ -100,6 +151,13 @@ int kbase_debugfs_helper_set_attr_from_string( if (!wbuf) return -ENOMEM; + /* validate string before actually writing values */ + err = kbase_debugfs_string_validator(wbuf); + if (err) { + kfree(wbuf); + return err; + } + err = set_attr_from_string(wbuf, array, nelems, set_attr_fn); @@ -108,9 +166,9 @@ int kbase_debugfs_helper_set_attr_from_string( } ssize_t kbase_debugfs_helper_get_attr_to_string( - char *const buf, size_t const size, - void *const array, size_t const nelems, - kbase_debugfs_helper_get_attr_fn const get_attr_fn) + char *const buf, size_t const size, void *const array, + size_t const nelems, + kbase_debugfs_helper_get_attr_fn *const get_attr_fn) { ssize_t total = 0; size_t index; @@ -128,10 +186,10 @@ ssize_t kbase_debugfs_helper_get_attr_to_string( return total; } -int kbase_debugfs_helper_seq_write(struct file *const file, - const char __user *const ubuf, size_t const count, - size_t const nelems, - kbase_debugfs_helper_set_attr_fn const set_attr_fn) +int kbase_debugfs_helper_seq_write( + struct file *const file, const char __user *const ubuf, + size_t const count, size_t const nelems, + kbase_debugfs_helper_set_attr_fn *const set_attr_fn) { const struct seq_file *const sfile = file->private_data; void *const array = sfile->private; @@ -154,6 +212,14 @@ int kbase_debugfs_helper_seq_write(struct file *const file, } buf[count] = '\0'; + + /* validate string before actually writing values */ + err = kbase_debugfs_string_validator(buf); + if (err) { + kfree(buf); + return err; + } + err = set_attr_from_string(buf, array, nelems, set_attr_fn); kfree(buf); @@ -161,9 +227,9 @@ int kbase_debugfs_helper_seq_write(struct file *const file, return err; } -int kbase_debugfs_helper_seq_read(struct seq_file *const sfile, - size_t const nelems, - kbase_debugfs_helper_get_attr_fn const get_attr_fn) +int kbase_debugfs_helper_seq_read( + struct seq_file *const sfile, size_t const nelems, + kbase_debugfs_helper_get_attr_fn *const get_attr_fn) { void *const array = sfile->private; size_t index; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h index c3c9efa..4c69d8b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DEBUGFS_HELPER_H_ @@ -31,8 +30,8 @@ * @index: An element index. The valid range depends on the use-case. * @value: Attribute value to be set. */ -typedef void (*kbase_debugfs_helper_set_attr_fn)( - void *array, size_t index, size_t value); +typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index, + size_t value); /** * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an @@ -56,7 +55,30 @@ typedef void (*kbase_debugfs_helper_set_attr_fn)( */ int kbase_debugfs_helper_set_attr_from_string( const char *buf, void *array, size_t nelems, - kbase_debugfs_helper_set_attr_fn set_attr_fn); + kbase_debugfs_helper_set_attr_fn *set_attr_fn); + +/** + * kbase_debugfs_string_validator - Validate a string to be written to a + * debugfs file for any incorrect formats + * or wrong values. + * + * This function is to be used before any writes to debugfs values are done + * such that any strings with erroneous values (such as octal 09 or + * hexadecimal 0xGH are fully ignored) - without this validation, any correct + * values before the first incorrect one will still be entered into the + * debugfs file. This essentially iterates the values through kstrtoul to see + * if it is valid. + * + * It is largely similar to set_attr_from_string to iterate through the values + * of the input string. This function also requires the input string to be + * writable. + * + * @buf: Null-terminated string to validate. + * + * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if + * any value in the string was wrong or with an incorrect format. + */ +int kbase_debugfs_string_validator(char *const buf); /** * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an @@ -67,8 +89,7 @@ int kbase_debugfs_helper_set_attr_from_string( * * Return: Value of attribute. */ -typedef size_t (*kbase_debugfs_helper_get_attr_fn)( - void *array, size_t index); +typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index); /** * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string @@ -89,7 +110,7 @@ typedef size_t (*kbase_debugfs_helper_get_attr_fn)( */ ssize_t kbase_debugfs_helper_get_attr_to_string( char *buf, size_t size, void *array, size_t nelems, - kbase_debugfs_helper_get_attr_fn get_attr_fn); + kbase_debugfs_helper_get_attr_fn *get_attr_fn); /** * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an @@ -110,8 +131,8 @@ ssize_t kbase_debugfs_helper_get_attr_to_string( * Return: 0 if success, negative error code otherwise. */ int kbase_debugfs_helper_seq_read( - struct seq_file *const sfile, size_t const nelems, - kbase_debugfs_helper_get_attr_fn const get_attr_fn); + struct seq_file *sfile, size_t nelems, + kbase_debugfs_helper_get_attr_fn *get_attr_fn); /** * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an @@ -132,10 +153,10 @@ int kbase_debugfs_helper_seq_read( * * Return: 0 if success, negative error code otherwise. */ -int kbase_debugfs_helper_seq_write(struct file *const file, - const char __user *const ubuf, size_t const count, - size_t const nelems, - kbase_debugfs_helper_set_attr_fn const set_attr_fn); +int kbase_debugfs_helper_seq_write(struct file *file, + const char __user *ubuf, size_t count, + size_t nelems, + kbase_debugfs_helper_set_attr_fn *set_attr_fn); #endif /*_KBASE_DEBUGFS_HELPER_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h index 7056d80..146695c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,17 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_defs.h - * - * Defintions (types, defines, etcs) common to Kbase. They are placed here to - * allow the hierarchy of header files to work. + * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here + * to allow the hierarchy of header files to work. */ #ifndef _KBASE_DEFS_H_ @@ -37,10 +32,14 @@ #include #include #include -#include +#include #include #include -#include +#if MALI_USE_CSF +#include +#else +#include +#endif #include #include @@ -49,9 +48,6 @@ #include #include -#ifdef CONFIG_MALI_BUSLOG -#include -#endif #if defined(CONFIG_SYNC) #include @@ -59,7 +55,7 @@ #include "mali_kbase_fence_defs.h" #endif -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) #include #endif /* CONFIG_DEBUG_FS */ @@ -75,8 +71,7 @@ #include #include -#if defined(CONFIG_PM_RUNTIME) || \ - (defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) +#if defined(CONFIG_PM_RUNTIME) || defined(CONFIG_PM) #define KBASE_PM_RUNTIME 1 #endif @@ -123,6 +118,11 @@ */ #define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) +/** + * Maximum number of GPU memory region zones + */ +#define KBASE_REG_ZONE_MAX 4ul + #include "mali_kbase_hwaccess_defs.h" /* Maximum number of pages of memory that require a permanent mapping, per @@ -138,24 +138,29 @@ */ #define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC) +#if MALI_USE_CSF +/* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF + * hwcnt backend allocate the ring buffer to communicate with CSF firmware for + * HWC dump samples. + * To meet the hardware requirement, this number MUST be power of 2, otherwise, + * CSF hwcnt backend creation will be failed. + */ +#define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128) +#endif + /* Maximum number of clock/regulator pairs that may be referenced by * the device node. * This is dependent on support for of_property_read_u64_array() in the * kernel. */ -#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \ - defined(LSK_OPPV2_BACKPORT) #define BASE_MAX_NR_CLOCKS_REGULATORS (2) -#else -#define BASE_MAX_NR_CLOCKS_REGULATORS (1) -#endif /* Forward declarations */ struct kbase_context; struct kbase_device; struct kbase_as; struct kbase_mmu_setup; -struct kbase_ipa_model_vinstr_data; +struct kbase_kinstr_jm; /** * struct kbase_io_access - holds information about 1 register access @@ -178,11 +183,7 @@ struct kbase_io_access { * @buf: array of kbase_io_access */ struct kbase_io_history { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) bool enabled; -#else - u32 enabled; -#endif spinlock_t lock; size_t count; @@ -242,29 +243,6 @@ struct kbase_fault { bool protected_mode; }; -/** - * struct kbase_as - object representing an address space of GPU. - * @number: Index at which this address space structure is present - * in an array of address space structures embedded inside the - * struct kbase_device. - * @pf_wq: Workqueue for processing work items related to Bus fault - * and Page fault handling. - * @work_pagefault: Work item for the Page fault handling. - * @work_busfault: Work item for the Bus fault handling. - * @pf_data: Data relating to page fault. - * @bf_data: Data relating to bus fault. - * @current_setup: Stores the MMU configuration for this address space. - */ -struct kbase_as { - int number; - struct workqueue_struct *pf_wq; - struct work_struct work_pagefault; - struct work_struct work_busfault; - struct kbase_fault pf_data; - struct kbase_fault bf_data; - struct kbase_mmu_setup current_setup; -}; - /** * struct kbase_mmu_table - object representing a set of GPU page tables * @mmu_teardown_pages: Buffer of 4 Pages in size, used to cache the entries @@ -291,7 +269,11 @@ struct kbase_mmu_table { struct kbase_context *kctx; }; +#if MALI_USE_CSF +#include "csf/mali_kbase_csf_defs.h" +#else #include "jm/mali_kbase_jm_defs.h" +#endif static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) @@ -320,71 +302,115 @@ struct kbasep_mem_device { atomic_t ir_threshold; }; +struct kbase_clk_rate_listener; + +/** + * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback + * + * @listener: Clock frequency change listener. + * @clk_index: Index of the clock for which the change has occurred. + * @clk_rate_hz: Clock frequency(Hz). + * + * A callback to call when clock rate changes. The function must not + * sleep. No clock rate manager functions must be called from here, as + * its lock is taken. + */ +typedef void +kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener, + u32 clk_index, u32 clk_rate_hz); + +/** + * struct kbase_clk_rate_listener - Clock frequency listener + * + * @node: List node. + * @notify: Callback to be called when GPU frequency changes. + */ +struct kbase_clk_rate_listener { + struct list_head node; + kbase_clk_rate_listener_on_change_t *notify; +}; + /** - * Data stored per device for power management. + * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock + * rate trace manager. * - * This structure contains data for the power management framework. There is one - * instance of this structure per device in the system. + * @gpu_idle: Tracks the idle state of GPU. + * @clks: Array of pointer to structures storing data for every + * enumerated GPU clock. + * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace + * operations. + * @gpu_clk_rate_trace_write: Pointer to the function that would emit the + * tracepoint for the clock rate change. + * @listeners: List of listener attached. + * @lock: Lock to serialize the actions of GPU clock rate trace + * manager. + */ +struct kbase_clk_rate_trace_manager { + bool gpu_idle; + struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops; + struct list_head listeners; + spinlock_t lock; +}; + +/** + * struct kbase_pm_device_data - Data stored per device for power management. + * @lock: The lock protecting Power Management structures accessed outside of + * IRQ. + * This lock must also be held whenever the GPU is being powered on or + * off. + * @active_count: The reference count of active contexts on this device. Note + * that some code paths keep shaders/the tiler powered whilst this is 0. + * Use kbase_pm_is_active() instead to check for such cases. + * @suspending: Flag indicating suspending/suspended + * @gpu_lost: Flag indicating gpu lost + * This structure contains data for the power management framework. There + * is one instance of this structure per device in the system. + * @zero_active_count_wait: Wait queue set when active_count == 0 + * @resume_wait: system resume of GPU device. + * @debug_core_mask: Bit masks identifying the available shader cores that are + * specified via sysfs. One mask per job slot. + * @debug_core_mask_all: Bit masks identifying the available shader cores that + * are specified via sysfs. + * @callback_power_runtime_init: Callback for initializing the runtime power + * management. Return 0 on success, else error code + * @callback_power_runtime_term: Callback for terminating the runtime power + * management. + * @dvfs_period: Time in milliseconds between each dvfs sample + * @backend: KBase PM backend data + * @arb_vm_state: The state of the arbiter VM machine + * @gpu_users_waiting: Used by virtualization to notify the arbiter that there + * are users waiting for the GPU so that it can request and resume the + * driver. + * @clk_rtm: The state of the GPU clock rate trace manager */ struct kbase_pm_device_data { - /** - * The lock protecting Power Management structures accessed outside of - * IRQ. - * - * This lock must also be held whenever the GPU is being powered on or - * off. - */ struct mutex lock; - - /** - * The reference count of active contexts on this device. Note that - * some code paths keep shaders/the tiler powered whilst this is 0. Use - * kbase_pm_is_active() instead to check for such cases. - */ int active_count; - /** Flag indicating suspending/suspended */ bool suspending; #ifdef CONFIG_MALI_ARBITER_SUPPORT - /* Flag indicating gpu lost */ - bool gpu_lost; + atomic_t gpu_lost; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ - /* Wait queue set when active_count == 0 */ wait_queue_head_t zero_active_count_wait; + wait_queue_head_t resume_wait; - /** - * Bit masks identifying the available shader cores that are specified - * via sysfs. One mask per job slot. - */ +#if MALI_USE_CSF + u64 debug_core_mask; +#else + /* One mask per job slot. */ u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS]; u64 debug_core_mask_all; +#endif /* MALI_USE_CSF */ - /** - * Callback for initializing the runtime power management. - * - * @param kbdev The kbase device - * - * @return 0 on success, else error code - */ - int (*callback_power_runtime_init)(struct kbase_device *kbdev); - - /** - * Callback for terminating the runtime power management. - * - * @param kbdev The kbase device - */ + int (*callback_power_runtime_init)(struct kbase_device *kbdev); void (*callback_power_runtime_term)(struct kbase_device *kbdev); - - /* Time in milliseconds between each dvfs sample */ u32 dvfs_period; - struct kbase_pm_backend_data backend; - #ifdef CONFIG_MALI_ARBITER_SUPPORT - /** - * The state of the arbiter VM machine - */ struct kbase_arbiter_vm_state *arb_vm_state; + atomic_t gpu_users_waiting; #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + struct kbase_clk_rate_trace_manager clk_rtm; }; /** @@ -524,7 +550,6 @@ struct kbase_mmu_mode { unsigned long flags; }; -struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); #define DEVNAME_SIZE 16 @@ -559,6 +584,32 @@ struct kbase_devfreq_queue_info { enum kbase_devfreq_work_type acted_type; }; +/** + * struct kbase_process - Representing an object of a kbase process instantiated + * when the first kbase context is created under it. + * @tgid: Thread group ID. + * @total_gpu_pages: Total gpu pages allocated across all the contexts + * of this process, it accounts for both native allocations + * and dma_buf imported allocations. + * @kctx_list: List of kbase contexts created for the process. + * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree + * based on key tgid, kprcs_node is the node link to + * &struct_kbase_device.process_root. + * @dma_buf_root: RB tree of the dma-buf imported allocations, imported + * across all the contexts created for this process. + * Used to ensure that pages of allocation are accounted + * only once for the process, even if the allocation gets + * imported multiple times for the process. + */ +struct kbase_process { + pid_t tgid; + size_t total_gpu_pages; + struct list_head kctx_list; + + struct rb_node kprcs_node; + struct rb_root dma_buf_root; +}; + /** * struct kbase_device - Object representing an instance of GPU platform device, * allocated from the probe method of mali driver. @@ -568,8 +619,8 @@ struct kbase_devfreq_queue_info { * issues present in the GPU. * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW * issues present in the GPU. - * @hw_quirks_jm: Configuration to be used for the Job Manager as per - * the HW issues present in the GPU. + * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU + * subsystems as per the HW issues present in the GPU. * @entry: Links the device instance to the global list of GPU * devices. The list would have as many entries as there * are GPU device instances. @@ -586,6 +637,8 @@ struct kbase_devfreq_queue_info { * @irqs: Array containing IRQ resource info for 3 types of * interrupts : Job scheduling, MMU & GPU events (like * power management, cache etc.) + * @irqs.irq: irq number + * @irqs.flags: irq flags * @clocks: Pointer to the input clock resources referenced by * the GPU device node. * @nr_clocks: Number of clocks set in the clocks array. @@ -619,6 +672,8 @@ struct kbase_devfreq_queue_info { * accesses made by the driver. * @pm: Per device object for storing data for power management * framework. + * @fw_load_lock: Mutex to protect firmware loading in @ref kbase_open. + * @csf: CSF object for the GPU device. * @js_data: Per device object encapsulating the current context of * Job Scheduler, which is global to the device and is not * tied to any particular struct kbase_context running on @@ -646,11 +701,21 @@ struct kbase_devfreq_queue_info { * @disjoint_event: struct for keeping track of the disjoint information, * that whether the GPU is in a disjoint state and the * number of disjoint events that have occurred on GPU. + * @disjoint_event.count: disjoint event count + * @disjoint_event.state: disjoint event state * @nr_hw_address_spaces: Number of address spaces actually available in the * GPU, remains constant after driver initialisation. * @nr_user_address_spaces: Number of address spaces available to user contexts + * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance + * counters. * @hwcnt: Structure used for instrumentation and HW counters * dumping + * @hwcnt.lock: The lock should be used when accessing any of the + * following members + * @hwcnt.kctx: kbase context + * @hwcnt.addr: HW counter address + * @hwcnt.addr_bytes: HW counter size in bytes + * @hwcnt.backend: Kbase instrumentation backend * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access. * @hwcnt_gpu_ctx: Context for GPU hardware counter access. * @hwaccess_lock must be held when calling @@ -661,6 +726,7 @@ struct kbase_devfreq_queue_info { * are enabled. If zero, there is no timeline client and * therefore timeline is disabled. * @timeline: Timeline context created per device. + * @ktrace: kbase device's ktrace * @trace_lock: Lock to serialize the access to trace buffer. * @trace_first_out: Index/offset in the trace buffer at which the first * unread message is present. @@ -686,6 +752,8 @@ struct kbase_devfreq_queue_info { * including any contexts that might be created for * hardware counters. * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list. + * @group_max_uid_in_devices: Max value of any queue group UID in any kernel + * context in the kbase device. * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed * to devfreq_add_device() to add devfreq feature to Mali * GPU device. @@ -714,6 +782,7 @@ struct kbase_devfreq_queue_info { * table in devicetree. * @num_opps: Number of operating performance points available for the Mali * GPU device. + * @last_devfreq_metrics: last PM metrics * @devfreq_queue: Per device object for storing data that manages devfreq * suspend & resume request queue and the related items. * @devfreq_cooling: Pointer returned on registering devfreq cooling device @@ -724,6 +793,17 @@ struct kbase_devfreq_queue_info { * previously entered protected mode. * @ipa: Top level structure for IPA, containing pointers to both * configured & fallback models. + * @ipa.lock: Access to this struct must be with ipa.lock held + * @ipa.configured_model: ipa model to use + * @ipa.fallback_model: ipa fallback model + * @ipa.last_metrics: Values of the PM utilization metrics from last time + * the power model was invoked. The utilization is + * calculated as the difference between last_metrics + * and the current values. + * @ipa.force_fallback_model: true if use of fallback model has been forced by + * the User + * @ipa.last_sample_time: Records the time when counters, used for dynamic + * energy estimation, were last sampled. * @previous_frequency: Previous frequency of GPU clock used for * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is * restored when L2 is powered on. @@ -732,6 +812,7 @@ struct kbase_devfreq_queue_info { * @mali_debugfs_directory: Root directory for the debugfs files created by the driver * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing * a sub-directory for every context. + * @debugfs_instr_directory: Instrumentation debugfs directory * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault * has occurred. * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the @@ -748,6 +829,8 @@ struct kbase_devfreq_queue_info { * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs * file "read_register". + * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be + * read through debugfs file "read_register". * @ctx_num: Total number of contexts created for the device. * @io_history: Pointer to an object keeping a track of all recent * register accesses. The history of register accesses @@ -806,12 +889,40 @@ struct kbase_devfreq_queue_info { * Job Scheduler * @l2_size_override: Used to set L2 cache size via device tree blob * @l2_hash_override: Used to set L2 cache hash via device tree blob + * @l2_hash_values_override: true if @l2_hash_values is valid. + * @l2_hash_values: Used to set L2 asn_hash via device tree blob + * @process_root: rb_tree root node for maintaining a rb_tree of + * kbase_process based on key tgid(thread group ID). + * @dma_buf_root: rb_tree root node for maintaining a rb_tree of + * &struct kbase_dma_buf based on key dma_buf. + * We maintain a rb_tree of dma_buf mappings under + * kbase_device and kbase_process, one indicates a + * mapping and gpu memory usage at device level and + * other one at process level. + * @total_gpu_pages: Total GPU pages used for the complete GPU device. + * @dma_buf_lock: This mutex should be held while accounting for + * @total_gpu_pages from imported dma buffers. + * @gpu_mem_usage_lock: This spinlock should be held while accounting + * @total_gpu_pages for both native and dma-buf imported + * allocations. + * @dummy_job_wa: struct for dummy job execution workaround for the + * GPU hang issue + * @dummy_job_wa.ctx: dummy job workaround context + * @dummy_job_wa.jc: dummy job workaround job + * @dummy_job_wa.slot: dummy job workaround slot + * @dummy_job_wa.flags: dummy job workaround flags + * @dummy_job_wa_loaded: Flag for indicating that the workaround blob has + * been loaded. Protected by @fw_load_lock. + * @arb: Pointer to the arbiter device + * @pcm_dev: The priority control manager device. + * @oom_notifier_block: notifier_block containing kernel-registered out-of- + * memory handler. */ struct kbase_device { u32 hw_quirks_sc; u32 hw_quirks_tiler; u32 hw_quirks_mmu; - u32 hw_quirks_jm; + u32 hw_quirks_gpu; struct list_head entry; struct device *dev; @@ -827,7 +938,7 @@ struct kbase_device { struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_clocks; -#ifdef CONFIG_REGULATOR +#if IS_ENABLED(CONFIG_REGULATOR) struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_regulators; #if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) @@ -837,16 +948,6 @@ struct kbase_device { char devname[DEVNAME_SIZE]; u32 id; -#ifdef CONFIG_MALI_NO_MALI - void *model; - struct kmem_cache *irq_slab; - struct workqueue_struct *irq_workq; - atomic_t serving_job_irq; - atomic_t serving_gpu_irq; - atomic_t serving_mmu_irq; - spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_NO_MALI */ - struct kbase_pm_device_data pm; struct kbase_mem_pool_group mem_pools; @@ -874,8 +975,10 @@ struct kbase_device { s8 nr_hw_address_spaces; s8 nr_user_address_spaces; +#if MALI_USE_CSF + struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; +#else struct kbase_hwcnt { - /* The lock should be used when accessing any of the following members */ spinlock_t lock; struct kbase_context *kctx; @@ -884,6 +987,7 @@ struct kbase_device { struct kbase_instr_backend backend; } hwcnt; +#endif struct kbase_hwcnt_backend_interface hwcnt_gpu_iface; struct kbase_hwcnt_context *hwcnt_gpu_ctx; @@ -906,6 +1010,7 @@ struct kbase_device { struct list_head kctx_list; struct mutex kctx_list_lock; + atomic_t group_max_uid_in_devices; #ifdef CONFIG_MALI_DEVFREQ struct devfreq_dev_profile devfreq_profile; @@ -917,17 +1022,10 @@ struct kbase_device { struct kbase_devfreq_opp *devfreq_table; int num_opps; struct kbasep_pm_metrics last_devfreq_metrics; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) struct kbase_devfreq_queue_info devfreq_queue; -#endif -#ifdef CONFIG_DEVFREQ_THERMAL -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) - struct devfreq_cooling_device *devfreq_cooling; -#else +#if IS_ENABLED(CONFIG_DEVFREQ_THERMAL) struct thermal_cooling_device *devfreq_cooling; -#endif bool ipa_protection_mode_switched; struct { /* Access to this struct must be with ipa.lock held */ @@ -940,11 +1038,13 @@ struct kbase_device { * the difference between last_metrics and the current values. */ struct kbasep_pm_metrics last_metrics; - /* Model data to pass to ipa_gpu_active/idle() */ - struct kbase_ipa_model_vinstr_data *model_data; /* true if use of fallback model has been forced by the User */ bool force_fallback_model; + /* Records the time when counters, used for dynamic energy + * estimation, were last sampled. + */ + ktime_t last_sample_time; } ipa; #endif /* CONFIG_DEVFREQ_THERMAL */ #endif /* CONFIG_MALI_DEVFREQ */ @@ -952,9 +1052,10 @@ struct kbase_device { atomic_t job_fault_debug; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *mali_debugfs_directory; struct dentry *debugfs_ctx_directory; + struct dentry *debugfs_instr_directory; #ifdef CONFIG_MALI_DEBUG u64 debugfs_as_read_bitmap; @@ -968,14 +1069,14 @@ struct kbase_device { #if !MALI_CUSTOMER_RELEASE struct { - u16 reg_offset; + u32 reg_offset; } regs_dump_debugfs_data; #endif /* !MALI_CUSTOMER_RELEASE */ #endif /* CONFIG_DEBUG_FS */ atomic_t ctx_num; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_io_history io_history; #endif /* CONFIG_DEBUG_FS */ @@ -985,8 +1086,7 @@ struct kbase_device { bool poweroff_pending; - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) +#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE) bool infinite_cache_active_default; #else u32 infinite_cache_active_default; @@ -1015,9 +1115,6 @@ struct kbase_device { struct work_struct protected_mode_hwcnt_disable_work; -#ifdef CONFIG_MALI_BUSLOG - struct bus_logger_client *buslogger; -#endif bool irq_reset_flush; @@ -1029,7 +1126,14 @@ struct kbase_device { u8 l2_size_override; u8 l2_hash_override; + bool l2_hash_values_override; + u32 l2_hash_values[ASN_HASH_COUNT]; + struct mutex fw_load_lock; +#if MALI_USE_CSF + /* CSF object for the GPU device. */ + struct kbase_csf_device csf; +#else struct kbasep_js_device_data js_data; /* See KBASE_JS_*_PRIORITY_MODE for details. */ @@ -1042,6 +1146,14 @@ struct kbase_device { u8 backup_serialize_jobs; #endif /* CONFIG_MALI_CINSTR_GWT */ +#endif /* MALI_USE_CSF */ + + struct rb_root process_root; + struct rb_root dma_buf_root; + + size_t total_gpu_pages; + struct mutex dma_buf_lock; + spinlock_t gpu_mem_usage_lock; struct { struct kbase_context *ctx; @@ -1049,16 +1161,16 @@ struct kbase_device { int slot; u64 flags; } dummy_job_wa; + bool dummy_job_wa_loaded; #ifdef CONFIG_MALI_ARBITER_SUPPORT - /* Pointer to the arbiter device */ struct kbase_arbiter_device arb; #endif -}; + /* Priority Control Manager device */ + struct priority_control_manager_device *pcm_dev; -#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ - (((minor) & 0xFFF) << 8) | \ - ((0 & 0xFF) << 0)) + struct notifier_block oom_notifier_block; +}; /** * enum kbase_file_state - Initialization state of a file opened by @kbase_open @@ -1109,7 +1221,92 @@ struct kbase_file { unsigned long api_version; atomic_t setup_state; }; - +#if MALI_JIT_PRESSURE_LIMIT_BASE +/** + * enum kbase_context_flags - Flags for kbase contexts + * + * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit + * process on a 64-bit kernel. + * + * @KCTX_RUNNABLE_REF: Set when context is counted in + * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing. + * + * @KCTX_ACTIVE: Set when the context is active. + * + * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this + * context. + * + * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been + * initialized. + * + * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new + * allocations. Existing allocations will not change. + * + * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs. + * + * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept + * scheduled in. + * + * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool. + * This is only ever updated whilst the jsctx_mutex is held. + * + * @KCTX_DYING: Set when the context process is in the process of being evicted. + * + * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this + * context, to disable use of implicit dma-buf fences. This is used to avoid + * potential synchronization deadlocks. + * + * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory + * allocations. For 64-bit clients it is enabled by default, and disabled by + * default on 32-bit clients. Being able to clear this flag is only used for + * testing purposes of the custom zone allocation on 64-bit user-space builds, + * where we also require more control than is available through e.g. the JIT + * allocation mechanism. However, the 64-bit user-space client must still + * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT + * + * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled + * from it for job slot 0. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled + * from it for job slot 1. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled + * from it for job slot 2. This is reset when the context first goes active or + * is re-activated on that slot. + * + * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for + * the context due to unhandled page(or bus) fault. It is cleared when the + * refcount for the context drops to 0 or on when the address spaces are + * re-enabled on GPU reset or power cycle. + * + * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual + * address page limit, so we must take care to not exceed the physical limit + * + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. + */ +enum kbase_context_flags { + KCTX_COMPAT = 1U << 0, + KCTX_RUNNABLE_REF = 1U << 1, + KCTX_ACTIVE = 1U << 2, + KCTX_PULLED = 1U << 3, + KCTX_MEM_PROFILE_INITIALIZED = 1U << 4, + KCTX_INFINITE_CACHE = 1U << 5, + KCTX_SUBMIT_DISABLED = 1U << 6, + KCTX_PRIVILEGED = 1U << 7, + KCTX_SCHEDULED = 1U << 8, + KCTX_DYING = 1U << 9, + KCTX_NO_IMPLICIT_SYNC = 1U << 10, + KCTX_FORCE_SAME_VA = 1U << 11, + KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, + KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, + KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, + KCTX_AS_DISABLED_ON_FAULT = 1U << 15, + KCTX_JPL_ENABLED = 1U << 16, +}; +#else /** * enum kbase_context_flags - Flags for kbase contexts * @@ -1190,6 +1387,7 @@ enum kbase_context_flags { KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14, KCTX_AS_DISABLED_ON_FAULT = 1U << 15, }; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct kbase_sub_alloc { struct list_head link; @@ -1197,6 +1395,21 @@ struct kbase_sub_alloc { DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K); }; +/** + * struct kbase_reg_zone - Information about GPU memory region zones + * @base_pfn: Page Frame Number in GPU virtual address space for the start of + * the Zone + * @va_size_pages: Size of the Zone in pages + * + * Track information about a zone KBASE_REG_ZONE() and related macros. + * In future, this could also store the &rb_root that are currently in + * &kbase_context + */ +struct kbase_reg_zone { + u64 base_pfn; + u64 va_size_pages; +}; + /** * struct kbase_context - Kernel base context * @@ -1247,6 +1460,7 @@ struct kbase_sub_alloc { * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA * zone of the GPU virtual address space. Used for GPU-executable * allocations which don't need the SAME_VA property. + * @reg_zone: Zone information for the reg_rbtree_<...> members. * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for * SAME_VA allocations to defer the reservation of memory region * (from the GPU virtual address space) from base_mem_alloc @@ -1271,6 +1485,7 @@ struct kbase_sub_alloc { * which actually created the context. This is usually, * but not necessarily, the same as the thread which * opened the device file /dev/malixx instance. + * @csf: kbase csf context * @jctx: object encapsulating all the Job dispatcher related state, * including the array of atoms. * @used_pages: Keeps a track of the number of 4KB physical pages in use @@ -1287,6 +1502,8 @@ struct kbase_sub_alloc { * evictable/reclaimable. * @evict_list: List head for the list containing the allocations which * can be evicted or freed up in the shrinker callback. + * @evict_nents: Total number of pages allocated by the allocations within + * @evict_list (atomic). * @waiting_soft_jobs: List head for the list containing softjob atoms, which * are either waiting for the event set operation, or waiting * for the signaling of input fence or waiting for the GPU @@ -1297,6 +1514,8 @@ struct kbase_sub_alloc { * waiting atoms and the waitqueue to process the work item * queued for the atoms blocked on the signaling of dma-buf * fences. + * @dma_fence.waiting_resource: list head for the list of dma-buf fence + * @dma_fence.wq: waitqueue to process the work item queued * @as_nr: id of the address space being used for the scheduled in * context. This is effectively part of the Run Pool, because * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst @@ -1315,15 +1534,13 @@ struct kbase_sub_alloc { * at any point. * Generally the reference count is incremented when the context * is scheduled in and an atom is pulled from the context's per - * slot runnable tree. + * slot runnable tree in JM GPU or GPU command queue + * group is programmed on CSG slot in CSF GPU. * @mm_update_lock: lock used for handling of special tracking page. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, * to the memory consumed by the process. - * @same_va_end: End address of the SAME_VA zone (in 4KB page units) - * @exec_va_start: Start address of the EXEC_VA zone (in 4KB page units) - * or U64_MAX if the EXEC_VA zone is uninitialized. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @jit_va: Indicates if a JIT_VA zone has been created. * @mem_profile_data: Buffer containing the profiling information provided by @@ -1399,6 +1616,16 @@ struct kbase_sub_alloc { * that were used (i.e. the * &struct_kbase_va_region.used_pages for regions * that have had a usage report). + * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being + * now allocated for just-in-time memory + * allocations of a context (across all the + * threads). This is supposed to be updated + * with @reg_lock held before allocating + * the backing pages. This helps ensure that + * total physical memory usage for just in + * time memory allocation remains within the + * @jit_phys_pages_limit in multi-threaded + * scenarios. * @jit_active_head: List containing the just-in-time memory allocations * which are in use. * @jit_pool_head: List containing the just-in-time memory allocations @@ -1425,6 +1652,10 @@ struct kbase_sub_alloc { * is used to determine the atom's age when it is added to * the runnable RB-tree. * @trim_level: Level of JIT allocation trimming to perform on free (0-100%) + * @kprcs: Reference to @struct kbase_process that the current + * kbase_context belongs to. + * @kprcs_link: List link for the list of kbase context maintained + * under kbase_process. * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by * kbase_context.reg_lock. * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled. @@ -1435,6 +1666,12 @@ struct kbase_sub_alloc { * for context scheduling, protected by hwaccess_lock. * @atoms_count: Number of GPU atoms currently in use, per priority * @create_flags: Flags used in context creation. + * @kinstr_jm: Kernel job manager instrumentation context handle + * @tl_kctx_list_node: List item into the device timeline's list of + * contexts, for timeline summarization. + * @limited_core_mask: The mask that is applied to the affinity in case of atoms + * marked with BASE_JD_REQ_LIMITED_CORE_MASK. + * @platform_data: Pointer to platform specific per-context data. * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1453,7 +1690,9 @@ struct kbase_context { struct list_head event_list; struct list_head event_coalesce_list; struct mutex event_mutex; +#if !MALI_USE_CSF atomic_t event_closed; +#endif struct workqueue_struct *event_workq; atomic_t event_count; int event_coalesce_count; @@ -1470,7 +1709,11 @@ struct kbase_context { struct rb_root reg_rbtree_same; struct rb_root reg_rbtree_custom; struct rb_root reg_rbtree_exec; + struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; +#if MALI_USE_CSF + struct kbase_csf_context csf; +#else struct kbase_jd_context jctx; struct jsctx_queue jsctx_queue [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; @@ -1488,6 +1731,7 @@ struct kbase_context { s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; u32 slots_pullable; u32 age_count; +#endif /* MALI_USE_CSF */ DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; @@ -1503,6 +1747,7 @@ struct kbase_context { struct shrinker reclaim; struct list_head evict_list; + atomic_t evict_nents; struct list_head waiting_soft_jobs; spinlock_t waiting_soft_jobs_lock; @@ -1519,12 +1764,10 @@ struct kbase_context { spinlock_t mm_update_lock; struct mm_struct __rcu *process_mm; - u64 same_va_end; - u64 exec_va_start; u64 gpu_va_end; bool jit_va; -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) char *mem_profile_data; size_t mem_profile_size; struct mutex mem_profile_lock; @@ -1545,10 +1788,11 @@ struct kbase_context { u8 jit_current_allocations_per_bin[256]; u8 jit_version; u8 jit_group_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE u64 jit_phys_pages_limit; u64 jit_current_phys_pressure; -#endif /* MALI_JIT_PRESSURE_LIMIT */ + u64 jit_phys_pages_to_be_allocated; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ struct list_head jit_active_head; struct list_head jit_pool_head; struct list_head jit_destroy_head; @@ -1559,6 +1803,9 @@ struct kbase_context { u8 trim_level; + struct kbase_process *kprcs; + struct list_head kprcs_link; + #ifdef CONFIG_MALI_CINSTR_GWT bool gwt_enabled; bool gwt_was_enabled; @@ -1567,6 +1814,17 @@ struct kbase_context { #endif base_context_create_flags create_flags; + +#if !MALI_USE_CSF + struct kbase_kinstr_jm *kinstr_jm; +#endif + struct list_head tl_kctx_list_node; + + u64 limited_core_mask; + +#if !MALI_USE_CSF + void *platform_data; +#endif }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -1653,29 +1911,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ #define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 -/* JobDescriptorHeader - taken from the architecture specifications, the layout - * is currently identical for all GPU archs. */ -struct job_descriptor_header { - u32 exception_status; - u32 first_incomplete_task; - u64 fault_pointer; - u8 job_descriptor_size : 1; - u8 job_type : 7; - u8 job_barrier : 1; - u8 _reserved_01 : 1; - u8 _reserved_1 : 1; - u8 _reserved_02 : 1; - u8 _reserved_03 : 1; - u8 _reserved_2 : 1; - u8 _reserved_04 : 1; - u8 _reserved_05 : 1; - u16 job_index; - u16 job_dependency_index_1; - u16 job_dependency_index_2; - union { - u64 _64; - u32 _32; - } next_job; -}; - #endif /* _KBASE_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c index b5ac414..7d6e475 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c index 25acbcb..69ff8cc 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2017,2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,26 +17,21 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* Include mali_kbase_dma_fence.h before checking for CONFIG_MALI_DMA_FENCE as * it will be set there. */ #include "mali_kbase_dma_fence.h" - #include #include #include #include -#include +#include #include #include #include #include - #include static void @@ -59,7 +55,11 @@ static int kbase_dma_fence_lock_reservations(struct kbase_dma_fence_resv_info *info, struct ww_acquire_ctx *ctx) { +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) struct reservation_object *content_res = NULL; +#else + struct dma_resv *content_res = NULL; +#endif unsigned int content_res_idx = 0; unsigned int r; int err = 0; @@ -205,7 +205,7 @@ out: } static void -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) kbase_dma_fence_cb(struct fence *fence, struct fence_cb *cb) #else kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) @@ -225,12 +225,19 @@ kbase_dma_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) kbase_dma_fence_queue_work(katom); } +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) static int kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, struct reservation_object *resv, bool exclusive) +#else +static int +kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, + struct dma_resv *resv, + bool exclusive) +#endif { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *excl_fence = NULL; struct fence **shared_fences = NULL; #else @@ -240,7 +247,12 @@ kbase_dma_fence_add_reservation_callback(struct kbase_jd_atom *katom, unsigned int shared_count = 0; int err, i; - err = reservation_object_get_fences_rcu(resv, +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + err = reservation_object_get_fences_rcu( +#else + err = dma_resv_get_fences_rcu( +#endif + resv, &excl_fence, &shared_count, &shared_fences); @@ -294,9 +306,15 @@ out: return err; } +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) void kbase_dma_fence_add_reservation(struct reservation_object *resv, struct kbase_dma_fence_resv_info *info, bool exclusive) +#else +void kbase_dma_fence_add_reservation(struct dma_resv *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive) +#endif { unsigned int i; @@ -317,7 +335,7 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, struct kbase_dma_fence_resv_info *info) { int err, i; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; #else struct dma_fence *fence; @@ -346,10 +364,17 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, } for (i = 0; i < info->dma_fence_resv_count; i++) { +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) struct reservation_object *obj = info->resv_objs[i]; - +#else + struct dma_resv *obj = info->resv_objs[i]; +#endif if (!test_bit(i, info->dma_fence_excl_bitmap)) { +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) err = reservation_object_reserve_shared(obj); +#else + err = dma_resv_reserve_shared(obj, 0); +#endif if (err) { dev_err(katom->kctx->kbdev->dev, "Error %d reserving space for shared fence.\n", err); @@ -363,7 +388,11 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, goto end; } +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) reservation_object_add_shared_fence(obj, fence); +#else + dma_resv_add_shared_fence(obj, fence); +#endif } else { err = kbase_dma_fence_add_reservation_callback(katom, obj, true); if (err) { @@ -372,7 +401,11 @@ int kbase_dma_fence_wait(struct kbase_jd_atom *katom, goto end; } +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) reservation_object_add_excl_fence(obj, fence); +#else + dma_resv_add_excl_fence(obj, fence); +#endif } } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h index 2a4d6fc..38d3581 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dma_fence.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DMA_FENCE_H_ @@ -26,10 +25,14 @@ #ifdef CONFIG_MALI_DMA_FENCE #include +#include +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) #include +#else +#include +#endif #include - /* Forward declaration from mali_kbase_defs.h */ struct kbase_jd_atom; struct kbase_context; @@ -45,7 +48,11 @@ struct kbase_context; * reservation objects. */ struct kbase_dma_fence_resv_info { +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) struct reservation_object **resv_objs; +#else + struct dma_resv **resv_objs; +#endif unsigned int dma_fence_resv_count; unsigned long *dma_fence_excl_bitmap; }; @@ -60,9 +67,15 @@ struct kbase_dma_fence_resv_info { * reservation_objects. At the same time keeps track of which objects require * exclusive access in dma_fence_excl_bitmap. */ +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) void kbase_dma_fence_add_reservation(struct reservation_object *resv, struct kbase_dma_fence_resv_info *info, bool exclusive); +#else +void kbase_dma_fence_add_reservation(struct dma_resv *resv, + struct kbase_dma_fence_resv_info *info, + bool exclusive); +#endif /** * kbase_dma_fence_wait() - Creates a new fence and attaches it to the resv_objs @@ -122,8 +135,7 @@ void kbase_dma_fence_term(struct kbase_context *kctx); */ int kbase_dma_fence_init(struct kbase_context *kctx); - -#else /* CONFIG_MALI_DMA_FENCE */ +#else /* !CONFIG_MALI_DMA_FENCE */ /* Dummy functions for when dma-buf fence isn't enabled. */ static inline int kbase_dma_fence_init(struct kbase_context *kctx) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c index 188e53b..1e91ba0 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -25,7 +24,7 @@ */ #include -#include +#include #include #include @@ -282,6 +281,8 @@ int kbase_dummy_job_wa_load(struct kbase_device *kbdev) int err; struct kbase_context *kctx; + lockdep_assert_held(&kbdev->fw_load_lock); + if (!wa_blob_load_needed(kbdev)) return 0; @@ -427,6 +428,10 @@ void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) { struct kbase_context *wa_ctx; + /* return if the dummy job has not been loaded */ + if (kbdev->dummy_job_wa_loaded == false) + return; + /* Can be safely called even if the file wasn't created on probe */ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h index 5bbe37d..8713ba1 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_DUMMY_JOB_WORKAROUND_ @@ -31,6 +30,34 @@ KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP | \ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) +#if MALI_USE_CSF + +static inline int kbase_dummy_job_wa_load(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return 0; +} + +static inline void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static inline int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, + u64 cores) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(cores); + return 0; +} + +static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return false; +} + +#else int kbase_dummy_job_wa_load(struct kbase_device *kbdev); void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev); @@ -41,5 +68,6 @@ static inline bool kbase_dummy_job_wa_enabled(struct kbase_device *kbdev) return (kbdev->dummy_job_wa.ctx != NULL); } +#endif /* MALI_USE_CSF */ #endif /* _KBASE_DUMMY_JOB_WORKAROUND_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c new file mode 100644 index 0000000..1e584de --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_dvfs_debugfs.h" +#include +#include + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/** + * kbasep_dvfs_utilization_debugfs_show() - Print the DVFS utilization info + * + * @file: The seq_file for printing to + * @data: The debugfs dentry private data, a pointer to kbase_context + * + * Return: Negative error code or 0 on success. + */ +static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *data) +{ + struct kbase_device *kbdev = file->private; + +#if MALI_USE_CSF + seq_printf(file, "busy_time: %u idle_time: %u protm_time: %u\n", + kbdev->pm.backend.metrics.values.time_busy, + kbdev->pm.backend.metrics.values.time_idle, + kbdev->pm.backend.metrics.values.time_in_protm); +#else + seq_printf(file, "busy_time: %u idle_time: %u\n", + kbdev->pm.backend.metrics.values.time_busy, + kbdev->pm.backend.metrics.values.time_idle); +#endif + + return 0; +} + +static int kbasep_dvfs_utilization_debugfs_open(struct inode *in, + struct file *file) +{ + return single_open(file, kbasep_dvfs_utilization_debugfs_show, + in->i_private); +} + +static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = { + .open = kbasep_dvfs_utilization_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *file; +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif + + if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) + return; + + file = debugfs_create_file("dvfs_utilization", mode, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_dvfs_utilization_debugfs_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kbdev->dev, + "Unable to create dvfs debugfs entry"); + } +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h new file mode 100644 index 0000000..8334db7 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_dvfs_debugfs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_DVFS_DEBUGFS_H_ +#define _KBASE_DVFS_DEBUGFS_H_ + +/* Forward declaration */ +struct kbase_device; + +/** + * kbase_dvfs_status_debugfs_init() - Create a debugfs entry for DVFS queries + * + * @kbdev: Pointer to the GPU device for which to create the debugfs entry + */ +void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev); + +#endif /* _KBASE_DVFS_DEBUGFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c index c8b8f22..910c511 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2016,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include #include #include @@ -45,7 +42,7 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom); katom->status = KBASE_JD_ATOM_STATE_UNUSED; - dev_dbg(kbdev->dev, "Atom %p status to unused\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to unused\n", (void *)katom); wake_up(&katom->completed); return data; @@ -82,7 +79,7 @@ int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *ueve mutex_unlock(&ctx->event_mutex); - dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + dev_dbg(ctx->kbdev->dev, "event dequeuing %pK\n", (void *)atom); uevent->event_code = atom->event_code; uevent->atom_number = (atom - ctx->jctx.atoms); @@ -154,7 +151,8 @@ static int kbase_event_coalesce(struct kbase_context *kctx) const int event_count = kctx->event_coalesce_count; /* Join the list of pending events onto the tail of the main list - and reset it */ + * and reset it + */ list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); kctx->event_coalesce_count = 0; @@ -166,7 +164,17 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { struct kbase_device *kbdev = ctx->kbdev; - dev_dbg(kbdev->dev, "Posting event for atom %p\n", (void *)atom); + dev_dbg(kbdev->dev, "Posting event for atom %pK\n", (void *)atom); + + if (WARN_ON(atom->status != KBASE_JD_ATOM_STATE_COMPLETED)) { + dev_warn(kbdev->dev, + "%s: Atom %d (%pK) not completed (status %d)\n", + __func__, + kbase_jd_atom_id(atom->kctx, atom), + atom->kctx, + atom->status); + return; + } if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { if (atom->event_code == BASE_JD_EVENT_DONE) { @@ -227,7 +235,7 @@ int kbase_event_init(struct kbase_context *kctx) kctx->event_coalesce_count = 0; kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); - if (NULL == kctx->event_workq) + if (kctx->event_workq == NULL) return -EINVAL; return 0; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c index 7a715b3..01557cd 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,79 +17,18 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include #include -#include #include #include /* Spin lock protecting all Mali fences as fence->lock. */ static DEFINE_SPINLOCK(kbase_fence_lock); -static const char * -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_get_driver_name(struct fence *fence) -#else -kbase_fence_get_driver_name(struct dma_fence *fence) -#endif -{ - return kbase_drv_name; -} - -static const char * -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_get_timeline_name(struct fence *fence) -#else -kbase_fence_get_timeline_name(struct dma_fence *fence) -#endif -{ - return kbase_timeline_name; -} - -static bool -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_enable_signaling(struct fence *fence) -#else -kbase_fence_enable_signaling(struct dma_fence *fence) -#endif -{ - return true; -} - -static void -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -kbase_fence_fence_value_str(struct fence *fence, char *str, int size) -#else -kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) -#endif -{ -#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) - snprintf(str, size, "%u", fence->seqno); -#else - snprintf(str, size, "%llu", fence->seqno); -#endif -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) -const struct fence_ops kbase_fence_ops = { - .wait = fence_default_wait, -#else -const struct dma_fence_ops kbase_fence_ops = { - .wait = dma_fence_default_wait, -#endif - .get_driver_name = kbase_fence_get_driver_name, - .get_timeline_name = kbase_fence_get_timeline_name, - .enable_signaling = kbase_fence_enable_signaling, - .fence_value_str = kbase_fence_fence_value_str -}; - - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence * kbase_fence_out_new(struct kbase_jd_atom *katom) #else @@ -96,7 +36,7 @@ struct dma_fence * kbase_fence_out_new(struct kbase_jd_atom *katom) #endif { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; #else struct dma_fence *fence; @@ -157,7 +97,7 @@ kbase_fence_free_callbacks(struct kbase_jd_atom *katom) return res; } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) int kbase_fence_add_callback(struct kbase_jd_atom *katom, struct fence *fence, @@ -211,4 +151,3 @@ kbase_fence_add_callback(struct kbase_jd_atom *katom, return err; } - diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h index 8e7024e..37823d5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_FENCE_H_ @@ -35,7 +34,7 @@ #include "mali_kbase_fence_defs.h" #include "mali_kbase.h" -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) extern const struct fence_ops kbase_fence_ops; #else extern const struct dma_fence_ops kbase_fence_ops; @@ -49,7 +48,7 @@ extern const struct dma_fence_ops kbase_fence_ops; * @node: List head for linking this callback to the katom */ struct kbase_fence_cb { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence_cb fence_cb; struct fence *fence; #else @@ -66,7 +65,7 @@ struct kbase_fence_cb { * * return: A new fence object on success, NULL on failure. */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom); #else struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); @@ -88,6 +87,7 @@ struct dma_fence *kbase_fence_out_new(struct kbase_jd_atom *katom); #endif +#if !MALI_USE_CSF /** * kbase_fence_out_remove() - Removes the output fence from atom * @katom: Atom to remove output fence for @@ -168,7 +168,7 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, * Return: 0 on success: fence was either already signaled, or callback was * set up. Negative error code is returned on error. */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) int kbase_fence_add_callback(struct kbase_jd_atom *katom, struct fence *fence, fence_func_t callback); @@ -269,6 +269,7 @@ bool kbase_fence_free_callbacks(struct kbase_jd_atom *katom); */ #define kbase_fence_out_get(katom) dma_fence_get((katom)->dma_fence.fence) +#endif /* !MALI_USE_CSF */ /** * kbase_fence_put() - Releases a reference to a fence diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h index 607a95c..7a150bd 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_FENCE_DEFS_H_ @@ -28,11 +27,9 @@ * This file hides the compatibility issues with this for the rest the driver */ -#if defined(CONFIG_MALI_DMA_FENCE) || defined(CONFIG_SYNC_FILE) - #include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) #include @@ -55,7 +52,7 @@ #include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) #define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? \ (a)->status ?: 1 \ : 0) @@ -63,6 +60,4 @@ #endif /* < 4.10.0 */ -#endif /* CONFIG_MALI_DMA_FENCE || CONFIG_SYNC_FILE */ - #endif /* _KBASE_FENCE_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c new file mode 100644 index 0000000..14ddf03 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_fence_ops.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include + +static const char * +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_get_driver_name(struct fence *fence) +#else +kbase_fence_get_driver_name(struct dma_fence *fence) +#endif +{ + return kbase_drv_name; +} + +static const char * +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_get_timeline_name(struct fence *fence) +#else +kbase_fence_get_timeline_name(struct dma_fence *fence) +#endif +{ + return kbase_timeline_name; +} + +static bool +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_enable_signaling(struct fence *fence) +#else +kbase_fence_enable_signaling(struct dma_fence *fence) +#endif +{ + return true; +} + +static void +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +kbase_fence_fence_value_str(struct fence *fence, char *str, int size) +#else +kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size) +#endif +{ +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + snprintf(str, size, "%u", fence->seqno); +#else + snprintf(str, size, "%llu", fence->seqno); +#endif +} + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) +const struct fence_ops kbase_fence_ops = { + .wait = fence_default_wait, +#else +const struct dma_fence_ops kbase_fence_ops = { + .wait = dma_fence_default_wait, +#endif + .get_driver_name = kbase_fence_get_driver_name, + .get_timeline_name = kbase_fence_get_timeline_name, + .enable_signaling = kbase_fence_enable_signaling, + .fence_value_str = kbase_fence_fence_value_str +}; + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h index 6428f08..88c96e0 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gator.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* NB taken from gator */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c index 93f1565..6eaae83 100755 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,24 +17,23 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include #include -#ifdef CONFIG_DEBUG_FS -/** Show callback for the @c gpu_memory debugfs file. +#if IS_ENABLED(CONFIG_DEBUG_FS) +/** + * kbasep_gpu_memory_seq_show - Show callback for the @c gpu_memory debugfs file + * @sfile: The debugfs entry + * @data: Data associated with the entry * * This function is called to get the contents of the @c gpu_memory debugfs * file. This is a report of current gpu memory usage. * - * @param sfile The debugfs entry - * @param data Data associated with the entry - * - * @return 0 if successfully prints data in debugfs entry file - * -1 if it encountered an error + * Return: + * * 0 if successfully prints data in debugfs entry file + * * -1 if it encountered an error */ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h index 28a871a..6d5423f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2014, 2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_gpu_memory_debugfs.h - * Header file for gpu_memory entry in debugfs + * DOC: Header file for gpu_memory entry in debugfs * */ @@ -34,8 +30,20 @@ #include #include +/* kbase_io_history_add - add new entry to the register access history + * + * @h: Pointer to the history data structure + * @addr: Register address + * @value: The value that is either read from or written to the register + * @write: 1 if it's a register write, 0 if it's a read + */ +void kbase_io_history_add(struct kbase_io_history *h, void __iomem const *addr, + u32 value, u8 write); + /** - * @brief Initialize gpu_memory debugfs entry + * kbasep_gpu_memory_debugfs_init - Initialize gpu_memory debugfs entry + * + * @kbdev: Device pointer */ void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c index ae2458f..e4d52c9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -1,12 +1,12 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Base kernel property query APIs */ @@ -32,9 +28,9 @@ #include #include #include -#include "mali_kbase_ioctl.h" +#include #include -#include +#include #include #include @@ -48,7 +44,7 @@ static void kbase_gpuprops_construct_coherent_groups( u64 first_set, first_set_prev; u32 num_groups = 0; - KBASE_DEBUG_ASSERT(NULL != props); + KBASE_DEBUG_ASSERT(props != NULL); props->coherency_info.coherency = props->raw_props.mem_features; props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); @@ -107,6 +103,71 @@ static void kbase_gpuprops_construct_coherent_groups( props->coherency_info.num_groups = num_groups; } +/** + * kbase_gpuprops_get_curr_config_props - Get the current allocated resources + * @kbdev: The &struct kbase_device structure for the device + * @curr_config: The &struct curr_config_props structure to receive the result + * + * Fill the &struct curr_config_props structure with values from the GPU + * configuration registers. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, + struct curr_config_props * const curr_config) +{ + struct kbase_current_config_regdump curr_config_regdump; + int err; + + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + /* If update not needed just return. */ + if (!curr_config->update_needed) + return 0; + + /* Dump relevant registers */ + err = kbase_backend_gpuprops_get_curr_config(kbdev, + &curr_config_regdump); + if (err) + return err; + + curr_config->l2_slices = + KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1; + + curr_config->l2_present = + ((u64) curr_config_regdump.l2_present_hi << 32) + + curr_config_regdump.l2_present_lo; + + curr_config->shader_present = + ((u64) curr_config_regdump.shader_present_hi << 32) + + curr_config_regdump.shader_present_lo; + + curr_config->num_cores = hweight64(curr_config->shader_present); + + curr_config->update_needed = false; + + return 0; +} + +/** + * kbase_gpuprops_req_curr_config_update - Request Current Config Update + * @kbdev: The &struct kbase_device structure for the device + * + * Requests the current configuration to be updated next time the + * kbase_gpuprops_get_curr_config_props() is called. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev) +{ + if (WARN_ON(!kbdev)) + return -EINVAL; + + kbdev->gpu_props.curr_config.update_needed = true; + return 0; +} + /** * kbase_gpuprops_get_props - Get the GPU configuration * @gpu_props: The &struct base_gpu_props structure @@ -124,8 +185,8 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, int i; int err; - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != gpu_props); + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(gpu_props != NULL); /* Dump relevant registers */ err = kbase_backend_gpuprops_get(kbdev, ®dump); @@ -166,6 +227,10 @@ static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props, gpu_props->raw_props.thread_features = regdump.thread_features; gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc; + gpu_props->raw_props.gpu_features = + ((u64) regdump.gpu_features_hi << 32) + + regdump.gpu_features_lo; + return 0; } @@ -182,6 +247,59 @@ void kbase_gpuprops_update_core_props_gpu_id( KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); } +/** + * kbase_gpuprops_update_max_config_props - Updates the max config properties in + * the base_gpu_props. + * @base_props: The &struct base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Updates the &struct base_gpu_props structure with the max config properties. + */ +static void kbase_gpuprops_update_max_config_props( + struct base_gpu_props * const base_props, struct kbase_device *kbdev) +{ + int l2_n = 0; + + if (WARN_ON(!kbdev) || WARN_ON(!base_props)) + return; + + /* return if the max_config is not set during arbif initialization */ + if (kbdev->gpu_props.max_config.core_mask == 0) + return; + + /* + * Set the base_props with the maximum config values to ensure that the + * user space will always be based on the maximum resources available. + */ + base_props->l2_props.num_l2_slices = + kbdev->gpu_props.max_config.l2_slices; + base_props->raw_props.shader_present = + kbdev->gpu_props.max_config.core_mask; + /* + * Update l2_present in the raw data to be consistent with the + * max_config.l2_slices number. + */ + base_props->raw_props.l2_present = 0; + for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) { + base_props->raw_props.l2_present <<= 1; + base_props->raw_props.l2_present |= 0x1; + } + /* + * Update the coherency_info data using just one core group. For + * architectures where the max_config is provided by the arbiter it is + * not necessary to split the shader core groups in different coherent + * groups. + */ + base_props->coherency_info.coherency = + base_props->raw_props.mem_features; + base_props->coherency_info.num_core_groups = 1; + base_props->coherency_info.num_groups = 1; + base_props->coherency_info.group[0].core_mask = + kbdev->gpu_props.max_config.core_mask; + base_props->coherency_info.group[0].num_cores = + hweight32(kbdev->gpu_props.max_config.core_mask); +} + /** * kbase_gpuprops_calculate_props - Calculate the derived properties * @gpu_props: The &struct base_gpu_props structure @@ -195,7 +313,6 @@ static void kbase_gpuprops_calculate_props( { int i; u32 gpu_id; - u32 product_id; /* Populate the base_gpu_props structure */ kbase_gpuprops_update_core_props_gpu_id(gpu_props); @@ -218,7 +335,8 @@ static void kbase_gpuprops_calculate_props( /* Field with number of l2 slices is added to MEM_FEATURES register * since t76x. Below code assumes that for older GPU reserved bits will - * be read as zero. */ + * be read as zero. + */ gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; @@ -251,9 +369,19 @@ static void kbase_gpuprops_calculate_props( * Workaround for the incorrectly applied THREAD_FEATURES to tDUx. */ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; - product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; +#if MALI_USE_CSF + gpu_props->thread_props.max_registers = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 0U, 22); + gpu_props->thread_props.impl_tech = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 22U, 2); + gpu_props->thread_props.max_task_queue = + KBASE_UBFX32(gpu_props->raw_props.thread_features, + 24U, 8); + gpu_props->thread_props.max_thread_group_split = 0; +#else if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) { gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, @@ -279,6 +407,7 @@ static void kbase_gpuprops_calculate_props( KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); } +#endif /* If values are not specified, then use defaults */ if (gpu_props->thread_props.max_registers == 0) { @@ -286,8 +415,30 @@ static void kbase_gpuprops_calculate_props( gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; } - /* Initialize the coherent_group structure for each group */ - kbase_gpuprops_construct_coherent_groups(gpu_props); + + /* + * If the maximum resources allocated information is available it is + * necessary to update the base_gpu_props with the max_config info to + * the userspace. This is applicable to systems that receive this + * information from the arbiter. + */ + if (kbdev->gpu_props.max_config.core_mask) + /* Update the max config properties in the base_gpu_props */ + kbase_gpuprops_update_max_config_props(gpu_props, + kbdev); + else + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, + const struct max_config_props *max_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!max_config)) + return; + + kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices; + kbdev->gpu_props.max_config.core_mask = max_config->core_mask; } void kbase_gpuprops_set(struct kbase_device *kbdev) @@ -295,7 +446,8 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) struct kbase_gpu_props *gpu_props; struct gpu_raw_gpu_props *raw; - KBASE_DEBUG_ASSERT(NULL != kbdev); + if (WARN_ON(!kbdev)) + return; gpu_props = &kbdev->gpu_props; raw = &gpu_props->props.raw_props; @@ -315,9 +467,19 @@ void kbase_gpuprops_set(struct kbase_device *kbdev) gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); gpu_props->num_cores = hweight64(raw->shader_present); - gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_core_groups = + gpu_props->props.coherency_info.num_core_groups; gpu_props->num_address_spaces = hweight32(raw->as_present); gpu_props->num_job_slots = hweight32(raw->js_present); + + /* + * Current configuration is used on HW interactions so that the maximum + * config is just used for user space avoiding interactions with parts + * of the hardware that might not be allocated to the kbase instance at + * that moment. + */ + kbase_gpuprops_req_curr_config_update(kbdev); + kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config); } int kbase_gpuprops_set_features(struct kbase_device *kbdev) @@ -355,13 +517,34 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev) * in sysfs. */ static u8 override_l2_size; -module_param(override_l2_size, byte, 0); +module_param(override_l2_size, byte, 0000); MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing"); static u8 override_l2_hash; -module_param(override_l2_hash, byte, 0); +module_param(override_l2_hash, byte, 0000); MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); +static u32 l2_hash_values[ASN_HASH_COUNT] = { + 0, +}; +static int num_override_l2_hash_values; +module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000); +MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing"); + +/* Definitions for range of supported user defined hash functions for GPUs + * that support L2_CONFIG and not ASN_HASH features. Supported hash function + * range from 0b1000-0b1111 inclusive. Selection of any other values will + * lead to undefined behavior. + */ +#define USER_DEFINED_HASH_LO ((u8)0x08) +#define USER_DEFINED_HASH_HI ((u8)0x0F) + +enum l2_config_override_result { + L2_CONFIG_OVERRIDE_FAIL = -1, + L2_CONFIG_OVERRIDE_NONE, + L2_CONFIG_OVERRIDE_OK, +}; + /** * kbase_read_l2_config_from_dt - Read L2 configuration * @kbdev: The kbase device for which to get the L2 configuration. @@ -370,30 +553,67 @@ MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing"); * Override values in module parameters take priority over override values in * device tree. * - * Return: true if either size or hash was overridden, false if no overrides - * were found. + * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly + * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided. + * L2_CONFIG_OVERRIDE_FAIL otherwise. */ -static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev) +static enum l2_config_override_result +kbase_read_l2_config_from_dt(struct kbase_device *const kbdev) { struct device_node *np = kbdev->dev->of_node; if (!np) - return false; + return L2_CONFIG_OVERRIDE_NONE; if (override_l2_size) kbdev->l2_size_override = override_l2_size; else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override)) kbdev->l2_size_override = 0; - if (override_l2_hash) + /* Check overriding value is supported, if not will result in + * undefined behavior. + */ + if (override_l2_hash >= USER_DEFINED_HASH_LO && + override_l2_hash <= USER_DEFINED_HASH_HI) kbdev->l2_hash_override = override_l2_hash; else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override)) kbdev->l2_hash_override = 0; - if (kbdev->l2_size_override || kbdev->l2_hash_override) - return true; + kbdev->l2_hash_values_override = false; + if (num_override_l2_hash_values) { + int i; + + kbdev->l2_hash_values_override = true; + for (i = 0; i < num_override_l2_hash_values; i++) + kbdev->l2_hash_values[i] = l2_hash_values[i]; + } else if (!of_property_read_u32_array(np, "l2-hash-values", + kbdev->l2_hash_values, + ASN_HASH_COUNT)) + kbdev->l2_hash_values_override = true; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && + (kbdev->l2_hash_override)) { + dev_err(kbdev->dev, "l2-hash not supported\n"); + return L2_CONFIG_OVERRIDE_FAIL; + } + + if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) && + (kbdev->l2_hash_values_override)) { + dev_err(kbdev->dev, "l2-hash-values not supported\n"); + return L2_CONFIG_OVERRIDE_FAIL; + } + + if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) { + dev_err(kbdev->dev, + "both l2-hash & l2-hash-values not supported\n"); + return L2_CONFIG_OVERRIDE_FAIL; + } + + if (kbdev->l2_size_override || kbdev->l2_hash_override || + kbdev->l2_hash_values_override) + return L2_CONFIG_OVERRIDE_OK; - return false; + return L2_CONFIG_OVERRIDE_NONE; } int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) @@ -405,8 +625,25 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) struct base_gpu_props *gpu_props = &kbdev->gpu_props.props; /* Check for L2 cache size & hash overrides */ - if (!kbase_read_l2_config_from_dt(kbdev)) - return 0; + switch (kbase_read_l2_config_from_dt(kbdev)) { + case L2_CONFIG_OVERRIDE_FAIL: + err = -EIO; + goto exit; + case L2_CONFIG_OVERRIDE_NONE: + goto exit; + default: + break; + } + + /* pm.active_count is expected to be 1 here, which is set in + * kbase_hwaccess_pm_powerup(). + */ + WARN_ON(kbdev->pm.active_count != 1); + /* The new settings for L2 cache can only be applied when it is + * off, so first do the power down. + */ + kbase_pm_context_idle(kbdev); + kbase_pm_wait_for_desired_state(kbdev); /* Need L2 to get powered to reflect to L2_FEATURES */ kbase_pm_context_active(kbdev); @@ -417,21 +654,21 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev) /* Dump L2_FEATURES register */ err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump); if (err) - goto idle_gpu; + goto exit; dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n", - regdump.l2_features); + regdump.l2_features); + dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n", + regdump.l2_config); + /* Update gpuprops with reflected L2_FEATURES */ gpu_props->raw_props.l2_features = regdump.l2_features; gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); - -idle_gpu: - /* Let GPU idle */ - kbase_pm_context_idle(kbdev); } +exit: return err; } @@ -511,7 +748,7 @@ static struct { PROP(RAW_THREAD_FEATURES, raw_props.thread_features), PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc), PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode), - + PROP(RAW_GPU_FEATURES, raw_props.gpu_features), PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups), PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups), PROP(COHERENCY_COHERENCY, coherency_info.coherency), diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h index 5eee794..b20b99b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015, 2017, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,29 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2011-2015, 2017, 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * */ - - /** - * @file mali_kbase_gpuprops.h - * Base kernel property query APIs + * DOC: Base kernel property query APIs */ #ifndef _KBASE_GPUPROPS_H_ @@ -64,11 +46,10 @@ struct kbase_device; (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) /** - * @brief Set up Kbase GPU properties. + * kbase_gpuprops_set - Set up Kbase GPU properties. + * @kbdev: The struct kbase_device structure for the device * * Set up Kbase GPU properties with information from the GPU registers - * - * @param kbdev The struct kbase_device structure for the device */ void kbase_gpuprops_set(struct kbase_device *kbdev); @@ -89,6 +70,8 @@ int kbase_gpuprops_set_features(struct kbase_device *kbdev); * @kbdev: Device pointer * * This function updates l2_features and the log2 cache size. + * The function expects GPU to be powered up and value of pm.active_count + * to be 1. * * Return: Zero on success, Linux error code for failure */ @@ -132,4 +115,38 @@ int kbase_device_populate_max_freq(struct kbase_device *kbdev); void kbase_gpuprops_update_core_props_gpu_id( struct base_gpu_props * const gpu_props); +/** + * kbase_gpuprops_set_max_config - Set the max config information + * @kbdev: Device pointer + * @max_config: Maximum configuration data to be updated + * + * This function sets max_config in the kbase_gpu_props. + */ +void kbase_gpuprops_set_max_config(struct kbase_device *kbdev, + const struct max_config_props *max_config); + +/** + * kbase_gpuprops_get_curr_config_props - Get the current allocated resources + * @kbdev: The &struct kbase_device structure for the device + * @curr_config: The &struct curr_config_props structure to receive the result + * + * Fill the &struct curr_config_props structure with values from the GPU + * configuration registers. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev, + struct curr_config_props * const curr_config); + +/** + * kbase_gpuprops_req_curr_config_update - Request Current Config Update + * @kbdev: The &struct kbase_device structure for the device + * + * Requests the current configuration to be updated next time the + * kbase_gpuprops_get_curr_config_props() is called. + * + * Return: Zero on success, Linux error code on failure + */ +int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev); + #endif /* _KBASE_GPUPROPS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h index ec6f1c3..02705a0 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,21 +17,16 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_gpuprops_types.h - * Base kernel property query APIs + * DOC: Base kernel property query APIs */ #ifndef _KBASE_GPUPROPS_TYPES_H_ #define _KBASE_GPUPROPS_TYPES_H_ -#include "mali_base_kernel.h" +#include #define KBASE_GPU_SPEED_MHZ 123 #define KBASE_GPU_PC_SIZE_LOG2 24U @@ -38,6 +34,7 @@ struct kbase_gpuprops_regdump { u32 gpu_id; u32 l2_features; + u32 l2_config; u32 core_features; u32 tiler_features; u32 mem_features; @@ -60,6 +57,30 @@ struct kbase_gpuprops_regdump { u32 stack_present_lo; u32 stack_present_hi; u32 coherency_features; + u32 gpu_features_lo; + u32 gpu_features_hi; +}; + +/** + * struct kbase_current_config_regdump - Register dump for current resources + * allocated to the GPU. + * @mem_features: Memory system features. Contains information about the + * features of the memory system. Used here to get the L2 slice + * count. + * @shader_present_lo: Shader core present bitmap. Low word. + * @shader_present_hi: Shader core present bitmap. High word. + * @l2_present_lo: L2 cache present bitmap. Low word. + * @l2_present_hi: L2 cache present bitmap. High word. + * + * Register dump structure used to store the resgisters data realated to the + * current resources allocated to the GPU. + */ +struct kbase_current_config_regdump { + u32 mem_features; + u32 shader_present_lo; + u32 shader_present_hi; + u32 l2_present_lo; + u32 l2_present_hi; }; struct kbase_gpu_cache_props { @@ -76,6 +97,50 @@ struct kbase_gpu_mmu_props { u8 pa_bits; }; +/** + * struct max_config_props - Properties based on the maximum resources + * available. + * @l2_slices: Maximum number of L2 slices that can be assinged to the GPU + * during runtime. + * @padding: Padding to a multiple of 64 bits. + * @core_mask: Largest core mask bitmap that can be assigned to the GPU during + * runtime. + * + * Properties based on the maximum resources available (not necessarly + * allocated at that moment). Used to provide the maximum configuration to the + * userspace allowing the applications to allocate enough resources in case the + * real allocated resources change. + */ +struct max_config_props { + u8 l2_slices; + u8 padding[3]; + u32 core_mask; +}; + +/** + * struct curr_config_props - Properties based on the current resources + * allocated to the GPU. + * @l2_present: Current L2 present bitmap that is allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. + * @num_cores: Current number of shader cores allocated to the GPU. + * @l2_slices: Current number of L2 slices allocated to the GPU. + * @update_needed: Defines if it is necessary to re-read the registers to + * update the current allocated resources. + * @padding: Padding to a multiple of 64 bits. + * + * Properties based on the current resource available. Used for operations with + * hardware interactions to avoid using userspace data that can be based on + * the maximum resource available. + */ +struct curr_config_props { + u64 l2_present; + u64 shader_present; + u16 num_cores; + u8 l2_slices; + bool update_needed; + u8 padding[4]; +}; + struct kbase_gpu_props { /* kernel-only properties */ u8 num_cores; @@ -88,6 +153,12 @@ struct kbase_gpu_props { struct kbase_gpu_mem_props mem; struct kbase_gpu_mmu_props mmu; + /* Properties based on the current resource available */ + struct curr_config_props curr_config; + + /* Properties based on the maximum resource available */ + struct max_config_props max_config; + /* Properties shared with userspace */ struct base_gpu_props props; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c index 6a47c9d..2a20a3d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_gwt.h" @@ -71,6 +70,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->gwt_current_list); INIT_LIST_HEAD(&kctx->gwt_snapshot_list); +#if !MALI_USE_CSF /* If GWT is enabled using new vector dumping format * from user space, back up status of the job serialization flag and * use full serialisation of jobs for dumping. @@ -80,6 +80,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx) kctx->kbdev->serialize_jobs = KBASE_SERIALIZE_INTRA_SLOT | KBASE_SERIALIZE_INTER_SLOT; +#endif /* Mark gwt enabled before making pages read only in case a write page fault is triggered while we're still in this loop. (kbase_gpu_vm_lock() doesn't prevent this!) @@ -113,7 +114,9 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx) kfree(pos); } +#if !MALI_USE_CSF kctx->kbdev->serialize_jobs = kctx->kbdev->backup_serialize_jobs; +#endif kbase_gpu_gwt_setup_pages(kctx, ~0UL); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h index 7e7746e..30de43d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_gwt.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,13 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #if !defined(_KBASE_GWT_H) #define _KBASE_GWT_H #include -#include +#include /** * kbase_gpu_gwt_start - Start the GPU write tracking diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c index f8a9248..7ad583c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Run-time work-arounds helpers */ @@ -68,6 +65,9 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBEX: features = base_hw_features_tBEx; break; + case GPU_ID2_PRODUCT_TBAX: + features = base_hw_features_tBAx; + break; case GPU_ID2_PRODUCT_TDUX: features = base_hw_features_tDUx; break; @@ -81,14 +81,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TVAX: features = base_hw_features_tVAx; break; - case GPU_ID2_PRODUCT_TTUX: - /* Fallthrough */ - case GPU_ID2_PRODUCT_LTUX: - features = base_hw_features_tTUx; - break; - case GPU_ID2_PRODUCT_TE2X: - features = base_hw_features_tE2x; - break; default: features = base_hw_features_generic; break; @@ -140,103 +132,99 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( }; static const struct base_hw_product base_hw_products[] = { - {GPU_ID2_PRODUCT_TMIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 1), - base_hw_issues_tMIx_r0p0_05dev0}, - {GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1}, - {U32_MAX /* sentinel value */, NULL} } }, - - {GPU_ID2_PRODUCT_THEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2}, - {GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TSIX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDVX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TGOX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TTRX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TNAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1}, - {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LBEX, - {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0}, - {GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TBEX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0}, - {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1}, - {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TDUX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TODX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LODX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TGRX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TVAX, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TTUX, - {{GPU_ID2_VERSION_MAKE(2, 0, 0), base_hw_issues_tTUx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_LTUX, - {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0}, - {U32_MAX, NULL} } }, - - {GPU_ID2_PRODUCT_TE2X, - {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0}, - {U32_MAX, NULL} } }, + { GPU_ID2_PRODUCT_TMIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 1), + base_hw_issues_tMIx_r0p0_05dev0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tMIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tMIx_r0p1 }, + { U32_MAX /* sentinel value */, NULL } } }, + + { GPU_ID2_PRODUCT_THEX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tHEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tHEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tHEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tHEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tHEx_r0p2 }, + { GPU_ID2_VERSION_MAKE(0, 3, 0), base_hw_issues_tHEx_r0p3 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TSIX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tSIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tSIx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tSIx_r0p1 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tSIx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tSIx_r1p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TDVX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDVx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TNOX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNOx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TGOX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGOx_r0p0 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TTRX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 2, 0), base_hw_issues_tTRx_r0p2 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TNAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1 }, + { GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LBEX, + { { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_lBEx_r1p0 }, + { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_lBEx_r1p1 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TBEX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tBEx_r0p1 }, + { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TBAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 1), base_hw_issues_tBAx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 2), base_hw_issues_tBAx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TDUX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TODX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tODx_r0p0 }, + { GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tODx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_LODX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TGRX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tGRx_r0p0 }, + { U32_MAX, NULL } } }, + + { GPU_ID2_PRODUCT_TVAX, + { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 }, + { U32_MAX, NULL } } }, }; u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; @@ -269,8 +257,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( } /* Check whether this is a candidate for most recent - known version not later than the actual - version. */ + * known version not later than the actual version. + */ if ((version > product->map[v].version) && (product->map[v].version >= fallback_version)) { #if MALI_CUSTOMER_RELEASE @@ -287,7 +275,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id( if ((issues == NULL) && (fallback_issues != NULL)) { /* Fall back to the issue set of the most recent known - version not later than the actual version. */ + * version not later than the actual version. + */ issues = fallback_issues; #if MALI_CUSTOMER_RELEASE @@ -340,7 +329,8 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) #if !MALI_CUSTOMER_RELEASE /* The GPU ID might have been replaced with the last - known version of the same GPU. */ + * known version of the same GPU. + */ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; #endif } else { @@ -374,6 +364,9 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TBEX: issues = base_hw_issues_model_tBEx; break; + case GPU_ID2_PRODUCT_TBAX: + issues = base_hw_issues_model_tBAx; + break; case GPU_ID2_PRODUCT_TDUX: issues = base_hw_issues_model_tDUx; break; @@ -387,13 +380,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) case GPU_ID2_PRODUCT_TVAX: issues = base_hw_issues_model_tVAx; break; - case GPU_ID2_PRODUCT_TTUX: - case GPU_ID2_PRODUCT_LTUX: - issues = base_hw_issues_model_tTUx; - break; - case GPU_ID2_PRODUCT_TE2X: - issues = base_hw_issues_model_tE2x; - break; default: dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h index f386b16..6c04a23 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hw.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file - * Run-time work-arounds helpers + * DOC: Run-time work-arounds helpers */ #ifndef _KBASE_HW_H_ @@ -33,13 +29,17 @@ #include "mali_kbase_defs.h" /** - * @brief Tell whether a work-around should be enabled + * Tell whether a work-around should be enabled + * @kbdev: Device pointer + * @issue: issue to be checked */ #define kbase_hw_has_issue(kbdev, issue)\ test_bit(issue, &(kbdev)->hw_issues_mask[0]) /** - * @brief Tell whether a feature is supported + * Tell whether a feature is supported + * @kbdev: Device pointer + * @feature: feature to be checked */ #define kbase_hw_has_feature(kbdev, feature)\ test_bit(feature, &(kbdev)->hw_features_mask[0]) @@ -63,7 +63,8 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev); /** - * @brief Set the features mask depending on the GPU ID + * Set the features mask depending on the GPU ID + * @kbdev: Device pointer */ void kbase_hw_set_features_mask(struct kbase_device *kbdev); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h index 89df251..0da4eb2 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * HW access backend common APIs */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h index 124a2d9..62a6ec5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,20 +17,16 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /** - * @file mali_kbase_hwaccess_gpu_defs.h - * HW access common definitions + * DOC: HW access common definitions */ #ifndef _KBASE_HWACCESS_DEFS_H_ #define _KBASE_HWACCESS_DEFS_H_ -#include +#include /** * struct kbase_hwaccess_data - object encapsulating the GPU backend specific @@ -43,7 +40,9 @@ * @backend: GPU backend specific data for HW access layer */ struct kbase_hwaccess_data { +#if !MALI_USE_CSF struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS]; +#endif struct kbase_backend_data backend; }; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h index 3ae0dbe..71ccc91 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,25 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * - *//* SPDX-License-Identifier: GPL-2.0 */ -/* - * - * (C) COPYRIGHT 2014-2015, 2018, 2019-2020 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * */ - /** * Base kernel property query backend APIs */ @@ -55,6 +39,23 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump); +/** + * kbase_backend_gpuprops_get_curr_config() - Fill @curr_config_regdump with + * relevant GPU properties read from + * the GPU registers. + * @kbdev: Device pointer. + * @curr_config_regdump: Pointer to struct kbase_current_config_regdump + * structure. + * + * The caller should ensure that GPU remains powered-on during this function and + * the caller must ensure this function returns success before using the values + * returned in the curr_config_regdump in any part of the kernel. + * + * Return: Zero for succeess or a Linux error code + */ +int kbase_backend_gpuprops_get_curr_config(struct kbase_device *kbdev, + struct kbase_current_config_regdump *curr_config_regdump); + /** * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read * from GPU diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h index be85491..f836953 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2017-2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2017-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * HW Access instrumentation common APIs */ @@ -29,27 +26,26 @@ #ifndef _KBASE_HWACCESS_INSTR_H_ #define _KBASE_HWACCESS_INSTR_H_ -#include +#include /** * struct kbase_instr_hwcnt_enable - Enable hardware counter collection. * @dump_buffer: GPU address to write counters to. * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer. - * @jm_bm: counters selection bitmask (JM). + * @fe_bm: counters selection bitmask (Front End). * @shader_bm: counters selection bitmask (Shader). * @tiler_bm: counters selection bitmask (Tiler). * @mmu_l2_bm: counters selection bitmask (MMU_L2). - * @use_secondary: use secondary performance counters set for applicable - * counter blocks. + * @counter_set: the performance counter set to use. */ struct kbase_instr_hwcnt_enable { u64 dump_buffer; u64 dump_buffer_bytes; - u32 jm_bm; + u32 fe_bm; u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; - bool use_secondary; + u8 counter_set; }; /** @@ -139,7 +135,7 @@ int kbase_instr_backend_init(struct kbase_device *kbdev); */ void kbase_instr_backend_term(struct kbase_device *kbdev); -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS +#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS /** * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the * hardware counter set. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index 3d5934e..8689647 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * HW access job manager common APIs */ @@ -31,7 +29,7 @@ /** * kbase_backend_run_atom() - Run an atom on the GPU * @kbdev: Device pointer - * @atom: Atom to run + * @katom: Atom to run * * Caller must hold the HW access lock */ @@ -154,6 +152,7 @@ void kbase_backend_cache_clean(struct kbase_device *kbdev, void kbase_backend_complete_wq(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +#if !MALI_USE_CSF /** * kbase_backend_complete_wq_post_sched - Perform backend-specific actions * required on completing an atom, after @@ -166,6 +165,7 @@ void kbase_backend_complete_wq(struct kbase_device *kbdev, */ void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev, base_jd_core_req core_req); +#endif /* !MALI_USE_CSF */ /** * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h index bbaf6ea..36bbe2d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,14 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /** - * @file mali_kbase_hwaccess_pm.h - * HW access power manager common APIs + * DOC: HW access power manager common APIs */ #ifndef _KBASE_HWACCESS_PM_H_ @@ -32,7 +29,7 @@ #include #include -#include +#include /* Forward definition - see mali_kbase.h */ struct kbase_device; @@ -80,24 +77,21 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, * the time this function returns, regardless of whether or not the active power * policy asks for the GPU to be powered off. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); /** * Perform any backend-specific actions to suspend the GPU * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); /** * Perform any backend-specific actions to resume the GPU from a suspend * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); @@ -105,8 +99,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); * Perform any required actions for activating the GPU. Called when the first * context goes active. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); @@ -114,35 +107,43 @@ void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); * Perform any required actions for idling the GPU. Called when the last * context goes idle. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) */ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); - +#if MALI_USE_CSF +/** + * Set the debug core mask. + * + * This determines which cores the power manager is allowed to use. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @new_core_mask: The core mask to use + */ +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask); +#else /** * Set the debug core mask. * * This determines which cores the power manager is allowed to use. * - * @param kbdev The kbase device structure for the device (must be a - * valid pointer) - * @param new_core_mask_js0 The core mask to use for job slot 0 - * @param new_core_mask_js0 The core mask to use for job slot 1 - * @param new_core_mask_js0 The core mask to use for job slot 2 + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @new_core_mask_js0: The core mask to use for job slot 0 + * @new_core_mask_js1: The core mask to use for job slot 1 + * @new_core_mask_js2: The core mask to use for job slot 2 */ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0, u64 new_core_mask_js1, u64 new_core_mask_js2); - +#endif /* MALI_USE_CSF */ /** * Get the current policy. * * Returns the policy that is currently active. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * @return The current policy */ @@ -152,10 +153,9 @@ const struct kbase_pm_ca_policy /** * Change the policy to the one specified. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) - * @param policy The policy to change to (valid pointer returned from - * @ref kbase_pm_ca_list_policies) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @policy: The policy to change to (valid pointer returned from + * @ref kbase_pm_ca_list_policies) */ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, const struct kbase_pm_ca_policy *policy); @@ -163,23 +163,20 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev, /** * Retrieve a static list of the available policies. * - * @param[out] policies An array pointer to take the list of policies. This may - * be NULL. The contents of this array must not be - * modified. + * @policies: An array pointer to take the list of policies. This may be NULL. + * The contents of this array must not be modified. * * @return The number of policies */ int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); - /** * Get the current policy. * * Returns the policy that is currently active. * - * @param kbdev The kbase device structure for the device (must be a valid - * pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * @return The current policy */ @@ -188,9 +185,9 @@ const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); /** * Change the policy to the one specified. * - * @param kbdev The kbase device structure for the device (must be a valid + * @kbdev: The kbase device structure for the device (must be a valid * pointer) - * @param policy The policy to change to (valid pointer returned from + * @policy: The policy to change to (valid pointer returned from * @ref kbase_pm_list_policies) */ void kbase_pm_set_policy(struct kbase_device *kbdev, diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h index a61e5b9..8a4ece4 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014,2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,13 +16,6 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - */ - - -/** * */ @@ -30,13 +24,27 @@ /** * kbase_backend_get_gpu_time() - Get current GPU time + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in. + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec64 *ts); + +/** + * kbase_backend_get_gpu_time_norequest() - Get current GPU time without + * request/release cycle counter * @kbdev: Device pointer * @cycle_counter: Pointer to u64 to store cycle counter in * @system_time: Pointer to u64 to store system time in * @ts: Pointer to struct timespec to store current monotonic * time in */ -void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, - u64 *system_time, struct timespec64 *ts); +void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, + u64 *cycle_counter, + u64 *system_time, + struct timespec64 *ts); #endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c index 14ec5cb..ea4893d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -28,9 +27,6 @@ #include "mali_kbase_hwcnt_accumulator.h" #include "mali_kbase_hwcnt_backend.h" #include "mali_kbase_hwcnt_types.h" -#include "mali_malisw.h" -#include "mali_kbase_debug.h" -#include "mali_kbase_linux.h" #include #include @@ -51,6 +47,7 @@ enum kbase_hwcnt_accum_state { /** * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. + * @metadata: Pointer to immutable hwcnt metadata. * @backend: Pointer to created counter backend. * @state: The current state of the accumulator. * - State transition from disabled->enabled or @@ -89,6 +86,7 @@ enum kbase_hwcnt_accum_state { * accum_lock. */ struct kbase_hwcnt_accumulator { + const struct kbase_hwcnt_metadata *metadata; struct kbase_hwcnt_backend *backend; enum kbase_hwcnt_accum_state state; struct kbase_hwcnt_enable_map enable_map; @@ -117,6 +115,10 @@ struct kbase_hwcnt_accumulator { * state_lock. * - Can be read while holding either lock. * @accum: Hardware counter accumulator structure. + * @wq: Centralized workqueue for users of hardware counters to + * submit async hardware counter related work. Never directly + * called, but it's expected that a lot of the functions in this + * API will end up called from the enqueued async work. */ struct kbase_hwcnt_context { const struct kbase_hwcnt_backend_interface *iface; @@ -125,6 +127,7 @@ struct kbase_hwcnt_context { struct mutex accum_lock; bool accum_inited; struct kbase_hwcnt_accumulator accum; + struct workqueue_struct *wq; }; int kbase_hwcnt_context_init( @@ -138,7 +141,7 @@ int kbase_hwcnt_context_init( hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); if (!hctx) - return -ENOMEM; + goto err_alloc_hctx; hctx->iface = iface; spin_lock_init(&hctx->state_lock); @@ -146,11 +149,21 @@ int kbase_hwcnt_context_init( mutex_init(&hctx->accum_lock); hctx->accum_inited = false; + hctx->wq = + alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); + if (!hctx->wq) + goto err_alloc_workqueue; + *out_hctx = hctx; return 0; + + destroy_workqueue(hctx->wq); +err_alloc_workqueue: + kfree(hctx); +err_alloc_hctx: + return -ENOMEM; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init); void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) { @@ -159,9 +172,13 @@ void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) /* Make sure we didn't leak the accumulator */ WARN_ON(hctx->accum_inited); + + /* We don't expect any work to be pending on this workqueue. + * Regardless, this will safely drain and complete the work. + */ + destroy_workqueue(hctx->wq); kfree(hctx); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term); /** * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. @@ -197,22 +214,23 @@ static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) if (errcode) goto error; + hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info); hctx->accum.state = ACCUM_STATE_ERROR; - errcode = kbase_hwcnt_enable_map_alloc( - hctx->iface->metadata, &hctx->accum.enable_map); + errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, + &hctx->accum.enable_map); if (errcode) goto error; hctx->accum.enable_map_any_enabled = false; - errcode = kbase_hwcnt_dump_buffer_alloc( - hctx->iface->metadata, &hctx->accum.accum_buf); + errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, + &hctx->accum.accum_buf); if (errcode) goto error; - errcode = kbase_hwcnt_enable_map_alloc( - hctx->iface->metadata, &hctx->accum.scratch_map); + errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, + &hctx->accum.scratch_map); if (errcode) goto error; @@ -242,6 +260,7 @@ static void kbasep_hwcnt_accumulator_disable( bool backend_enabled = false; struct kbase_hwcnt_accumulator *accum; unsigned long flags; + u64 dump_time_ns; WARN_ON(!hctx); lockdep_assert_held(&hctx->accum_lock); @@ -271,7 +290,7 @@ static void kbasep_hwcnt_accumulator_disable( goto disable; /* Try and accumulate before disabling */ - errcode = hctx->iface->dump_request(accum->backend); + errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); if (errcode) goto disable; @@ -365,8 +384,8 @@ static int kbasep_hwcnt_accumulator_dump( WARN_ON(!hctx); WARN_ON(!ts_start_ns); WARN_ON(!ts_end_ns); - WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata)); - WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata)); + WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata)); + WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata)); WARN_ON(!hctx->accum_inited); lockdep_assert_held(&hctx->accum_lock); @@ -419,23 +438,16 @@ static int kbasep_hwcnt_accumulator_dump( /* Initiate the dump if the backend is enabled. */ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { - /* Disable pre-emption, to make the timestamp as accurate as - * possible. - */ - preempt_disable(); - { + if (dump_buf) { + errcode = hctx->iface->dump_request( + accum->backend, &dump_time_ns); + dump_requested = true; + } else { dump_time_ns = hctx->iface->timestamp_ns( - accum->backend); - if (dump_buf) { - errcode = hctx->iface->dump_request( accum->backend); - dump_requested = true; - } else { - errcode = hctx->iface->dump_clear( - accum->backend); - } + errcode = hctx->iface->dump_clear(accum->backend); } - preempt_enable(); + if (errcode) goto error; } else { @@ -615,7 +627,6 @@ int kbase_hwcnt_accumulator_acquire( return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire); void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) { @@ -650,7 +661,6 @@ void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) spin_unlock_irqrestore(&hctx->state_lock, flags); mutex_unlock(&hctx->accum_lock); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release); void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) { @@ -669,7 +679,6 @@ void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) mutex_unlock(&hctx->accum_lock); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable); bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) { @@ -698,7 +707,6 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) return atomic_disabled; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic); void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) { @@ -718,7 +726,6 @@ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) spin_unlock_irqrestore(&hctx->state_lock, flags); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable); const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( struct kbase_hwcnt_context *hctx) @@ -726,9 +733,17 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( if (!hctx) return NULL; - return hctx->iface->metadata; + return hctx->iface->metadata(hctx->iface->info); +} + +bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, + struct work_struct *work) +{ + if (WARN_ON(!hctx) || WARN_ON(!work)) + return false; + + return queue_work(hctx->wq, work); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata); int kbase_hwcnt_accumulator_set_counters( struct kbase_hwcnt_accumulator *accum, @@ -745,8 +760,8 @@ int kbase_hwcnt_accumulator_set_counters( hctx = container_of(accum, struct kbase_hwcnt_context, accum); - if ((new_map->metadata != hctx->iface->metadata) || - (dump_buf && (dump_buf->metadata != hctx->iface->metadata))) + if ((new_map->metadata != hctx->accum.metadata) || + (dump_buf && (dump_buf->metadata != hctx->accum.metadata))) return -EINVAL; mutex_lock(&hctx->accum_lock); @@ -758,7 +773,6 @@ int kbase_hwcnt_accumulator_set_counters( return errcode; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters); int kbase_hwcnt_accumulator_dump( struct kbase_hwcnt_accumulator *accum, @@ -774,7 +788,7 @@ int kbase_hwcnt_accumulator_dump( hctx = container_of(accum, struct kbase_hwcnt_context, accum); - if (dump_buf && (dump_buf->metadata != hctx->iface->metadata)) + if (dump_buf && (dump_buf->metadata != hctx->accum.metadata)) return -EINVAL; mutex_lock(&hctx->accum_lock); @@ -786,7 +800,6 @@ int kbase_hwcnt_accumulator_dump( return errcode; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump); u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h index eb82ea4..4887eaa 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h index b7aa0e1..0b5a188 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -41,11 +40,25 @@ struct kbase_hwcnt_dump_buffer; struct kbase_hwcnt_backend_info; /* - * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter - * backend, used to perform dumps. + * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter + * backend, used to perform dumps. */ struct kbase_hwcnt_backend; +/* + * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter + * metadata that describes the layout + * of the counter data structures. + * @info: Non-NULL pointer to backend info. + * + * Multiple calls to this function with the same info are guaranteed to return + * the same metadata object each time. + * + * Return: Non-NULL pointer to immutable hardware counter metadata. + */ +typedef const struct kbase_hwcnt_metadata * +kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info); + /** * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. * @info: Non-NULL pointer to backend info. @@ -56,16 +69,15 @@ struct kbase_hwcnt_backend; * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_init_fn)( - const struct kbase_hwcnt_backend_info *info, - struct kbase_hwcnt_backend **out_backend); +typedef int +kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); /** * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. * @backend: Pointer to backend to be terminated. */ -typedef void (*kbase_hwcnt_backend_term_fn)( - struct kbase_hwcnt_backend *backend); +typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend @@ -74,8 +86,8 @@ typedef void (*kbase_hwcnt_backend_term_fn)( * * Return: Backend timestamp in nanoseconds. */ -typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( - struct kbase_hwcnt_backend *backend); +typedef u64 +kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the @@ -90,7 +102,7 @@ typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)( * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_dump_enable_fn)( +typedef int kbase_hwcnt_backend_dump_enable_fn( struct kbase_hwcnt_backend *backend, const struct kbase_hwcnt_enable_map *enable_map); @@ -106,7 +118,7 @@ typedef int (*kbase_hwcnt_backend_dump_enable_fn)( * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( +typedef int kbase_hwcnt_backend_dump_enable_nolock_fn( struct kbase_hwcnt_backend *backend, const struct kbase_hwcnt_enable_map *enable_map); @@ -118,8 +130,8 @@ typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)( * If the backend is already disabled, does nothing. * Any undumped counter values since the last dump get will be lost. */ -typedef void (*kbase_hwcnt_backend_dump_disable_fn)( - struct kbase_hwcnt_backend *backend); +typedef void +kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped @@ -130,21 +142,24 @@ typedef void (*kbase_hwcnt_backend_dump_disable_fn)( * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_dump_clear_fn)( - struct kbase_hwcnt_backend *backend); +typedef int +kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter * dump. * @backend: Non-NULL pointer to backend. + * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was + * requested will be written out to on success. * * If the backend is not enabled or another dump is already in progress, * returns an error. * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_dump_request_fn)( - struct kbase_hwcnt_backend *backend); +typedef int +kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns); /** * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested @@ -155,8 +170,8 @@ typedef int (*kbase_hwcnt_backend_dump_request_fn)( * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_dump_wait_fn)( - struct kbase_hwcnt_backend *backend); +typedef int +kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend); /** * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the @@ -168,24 +183,25 @@ typedef int (*kbase_hwcnt_backend_dump_wait_fn)( * @accumulate: True if counters should be accumulated into dump_buffer, rather * than copied. * - * If the backend is not enabled, returns an error. - * If a dump is in progress (i.e. dump_wait has not yet returned successfully) - * then the resultant contents of the dump buffer will be undefined. + * The resultant contents of the dump buffer are only well defined if a prior + * call to dump_wait returned successfully, and a new dump has not yet been + * requested by a call to dump_request. * * Return: 0 on success, else error code. */ -typedef int (*kbase_hwcnt_backend_dump_get_fn)( - struct kbase_hwcnt_backend *backend, - struct kbase_hwcnt_dump_buffer *dump_buffer, - const struct kbase_hwcnt_enable_map *enable_map, - bool accumulate); +typedef int +kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); /** * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual * interface. - * @metadata: Immutable hardware counter metadata. * @info: Immutable info used to initialise an instance of the * backend. + * @metadata: Function ptr to get the immutable hardware counter + * metadata. * @init: Function ptr to initialise an instance of the backend. * @term: Function ptr to terminate an instance of the backend. * @timestamp_ns: Function ptr to get the current backend timestamp. @@ -200,18 +216,18 @@ typedef int (*kbase_hwcnt_backend_dump_get_fn)( * buffer. */ struct kbase_hwcnt_backend_interface { - const struct kbase_hwcnt_metadata *metadata; const struct kbase_hwcnt_backend_info *info; - kbase_hwcnt_backend_init_fn init; - kbase_hwcnt_backend_term_fn term; - kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns; - kbase_hwcnt_backend_dump_enable_fn dump_enable; - kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock; - kbase_hwcnt_backend_dump_disable_fn dump_disable; - kbase_hwcnt_backend_dump_clear_fn dump_clear; - kbase_hwcnt_backend_dump_request_fn dump_request; - kbase_hwcnt_backend_dump_wait_fn dump_wait; - kbase_hwcnt_backend_dump_get_fn dump_get; + kbase_hwcnt_backend_metadata_fn *metadata; + kbase_hwcnt_backend_init_fn *init; + kbase_hwcnt_backend_term_fn *term; + kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns; + kbase_hwcnt_backend_dump_enable_fn *dump_enable; + kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock; + kbase_hwcnt_backend_dump_disable_fn *dump_disable; + kbase_hwcnt_backend_dump_clear_fn *dump_clear; + kbase_hwcnt_backend_dump_request_fn *dump_request; + kbase_hwcnt_backend_dump_wait_fn *dump_wait; + kbase_hwcnt_backend_dump_get_fn *dump_get; }; #endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c new file mode 100644 index 0000000..58b5e72 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.c @@ -0,0 +1,1864 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_hwcnt_backend_csf.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BASE_MAX_NR_CLOCKS_REGULATORS +#define BASE_MAX_NR_CLOCKS_REGULATORS 2 +#endif + +/** + * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is + * an error. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A dump has been requested and we are + * waiting for an ACK, this ACK could come from either PRFCNT_ACK, + * PROTMODE_ENTER_ACK, or if an error occurs. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert + * immediately after receiving the ACK, so we know which index corresponds to + * the buffer we requested. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and + * now we have kicked off the worker. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now + * we have kicked off the worker to accumulate up to that insert and then copy + * the delta to the user buffer to prepare for dump_get(). + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. + * + * Valid state transitions: + * IDLE -> REQUESTED (on dump request) + * REQUESTED -> QUERYING_INSERT (on dump ack) + * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) + * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) + * ACCUMULATING -> COMPLETED (on accumulation completion) + * COMPLETED -> REQUESTED (on dump request) + * COMPLETED -> IDLE (on disable) + * ANY -> IDLE (on error) + */ +enum kbase_hwcnt_backend_csf_dump_state { + KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, + KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, + KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, + KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, + KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED, +}; + +/** + * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states. + * + * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend + * is disabled. + * + * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in + * progress, waiting for firmware acknowledgment. + * + * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged, + * enable is done. + * + * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in + * progress, waiting for firmware acknowledgment. + * + * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been + * acknowledged, waiting for dump workers to be finished. + * + * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An + * unrecoverable error happened, waiting for dump workers to be finished. + * + * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error + * happened, and dump workers have finished, waiting for reset. + * + * Valid state transitions: + * DISABLED -> TRANSITIONING_TO_ENABLED (on enable) + * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack) + * ENABLED -> TRANSITIONING_TO_DISABLED (on disable) + * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack) + * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed) + * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error) + * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable + * error) + * UNRECOVERABLE_ERROR -> DISABLED (on before reset) + */ +enum kbase_hwcnt_backend_csf_enable_state { + KBASE_HWCNT_BACKEND_CSF_DISABLED, + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED, + KBASE_HWCNT_BACKEND_CSF_ENABLED, + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED, + KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR, +}; + +/** + * struct kbase_hwcnt_backend_csf_info - Information used to create an instance + * of a CSF hardware counter backend. + * @backend: Pointer to access CSF backend. + * @fw_in_protected_mode: True if FW is running in protected mode, else + * false. + * @unrecoverable_error_happened: True if an recoverable error happened, else + * false. + * @csf_if: CSF interface object pointer. + * @ring_buf_cnt: Dump buffer count in the ring buffer. + * @counter_set: The performance counter set to use. + * @metadata: Hardware counter metadata. + * @prfcnt_info: Performance counter information. + */ +struct kbase_hwcnt_backend_csf_info { + struct kbase_hwcnt_backend_csf *backend; + bool fw_in_protected_mode; + bool unrecoverable_error_happened; + struct kbase_hwcnt_backend_csf_if *csf_if; + u32 ring_buf_cnt; + enum kbase_hwcnt_set counter_set; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; +}; + +/** + * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout + * information. + * @fe_cnt: Front end block count. + * @tiler_cnt: Tiler block count. + * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. + * @shader_cnt: Shader Core block count. + * @block_cnt: Total block count (sum of all other block counts). + * @shader_avail_mask: Bitmap of all shader cores in the system. + * @offset_enable_mask: Offset of enable mask in the block. + * @headers_per_block: Header size per block. + * @counters_per_block: Counters size per block. + * @values_per_block: Total size per block. + */ +struct kbase_hwcnt_csf_physical_layout { + size_t fe_cnt; + size_t tiler_cnt; + size_t mmu_l2_cnt; + size_t shader_cnt; + size_t block_cnt; + u64 shader_avail_mask; + size_t offset_enable_mask; + size_t headers_per_block; + size_t counters_per_block; + size_t values_per_block; +}; + +/** + * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend. + * @info: CSF Info used to create the backend. + * @dump_state: The dumping state of the backend. + * @enable_state: The CSF backend internal enabled state. + * @insert_index_to_accumulate: The insert index in the ring buffer which need + * to accumulate up to. + * @enable_state_waitq: Wait queue object used to notify the enable + * changing flag is done. + * @to_user_buf: HWC sample buffer for client user. + * @accum_buf: HWC sample buffer used as an internal + * accumulator. + * @old_sample_buf: HWC sample buffer to save the previous values + * for delta calculation. + * @ring_buf: Opaque pointer for ring buffer object. + * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: Cycle count elapsed for a given sample period. + * @prev_cycle_count: Previous cycle count to calculate the cycle + * count for sample period. + * @phys_layout: Physical memory layout information of HWC + * sample buffer. + * @dump_completed: Completion signaled by the dump worker when + * it is completed accumulating up to the + * insert_index_to_accumulate. + * Should be initialized to the "complete" state. + * @hwc_dump_workq: Single threaded work queue for HWC workers + * execution. + * @hwc_dump_work: Worker to accumulate samples. + * @hwc_threshold_work: Worker for consuming available samples when + * threshold interrupt raised. + */ +struct kbase_hwcnt_backend_csf { + struct kbase_hwcnt_backend_csf_info *info; + enum kbase_hwcnt_backend_csf_dump_state dump_state; + enum kbase_hwcnt_backend_csf_enable_state enable_state; + u32 insert_index_to_accumulate; + wait_queue_head_t enable_state_waitq; + u32 *to_user_buf; + u32 *accum_buf; + u32 *old_sample_buf; + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; + void *ring_buf_cpu_base; + u64 clk_enable_map; + u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_hwcnt_csf_physical_layout phys_layout; + struct completion dump_completed; + struct workqueue_struct *hwc_dump_workq; + struct work_struct hwc_dump_work; + struct work_struct hwc_threshold_work; +}; + +static bool kbasep_hwcnt_backend_csf_backend_exists( + struct kbase_hwcnt_backend_csf_info *csf_info) +{ + WARN_ON(!csf_info); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + return (csf_info->backend != NULL); +} + +/** + * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count + * tracking. + * + * @backend_csf: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + */ +static void kbasep_hwcnt_backend_csf_cc_initial_sample( + struct kbase_hwcnt_backend_csf *backend_csf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + u64 clk_enable_map = enable_map->clk_enable_map; + u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; + size_t clk; + + /* Read cycle count from CSF interface for both clock domains. */ + backend_csf->info->csf_if->get_gpu_cycle_count( + backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); + + kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) + backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; + } + + /* Keep clk_enable_map for dump_request. */ + backend_csf->clk_enable_map = clk_enable_map; +} + +static void +kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) +{ + u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; + size_t clk; + + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + + backend_csf->info->csf_if->get_gpu_cycle_count( + backend_csf->info->csf_if->ctx, cycle_counts, + backend_csf->clk_enable_map); + + kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + backend_csf->clk_enable_map, clk)) { + backend_csf->cycle_count_elapsed[clk] = + cycle_counts[clk] - + backend_csf->prev_cycle_count[clk]; + backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; + } + } +} + +/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 +kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + + if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) + return 0; + + return backend_csf->info->csf_if->timestamp_ns( + backend_csf->info->csf_if->ctx); +} + +/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to + * guarantee headers are + * enabled if any counter is + * required. + *@phys_enable_map: HWC physical enable map to be processed. + */ +static void kbasep_hwcnt_backend_csf_process_enable_map( + struct kbase_hwcnt_physical_enable_map *phys_enable_map) +{ + WARN_ON(!phys_enable_map); + + /* Enable header if any counter is required from user, the header is + * controlled by bit 0 of the enable mask. + */ + if (phys_enable_map->fe_bm) + phys_enable_map->fe_bm |= 1; + + if (phys_enable_map->tiler_bm) + phys_enable_map->tiler_bm |= 1; + + if (phys_enable_map->mmu_l2_bm) + phys_enable_map->mmu_l2_bm |= 1; + + if (phys_enable_map->shader_bm) + phys_enable_map->shader_bm |= 1; +} + +static void kbasep_hwcnt_backend_csf_init_layout( + const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, + struct kbase_hwcnt_csf_physical_layout *phys_layout) +{ + WARN_ON(!prfcnt_info); + WARN_ON(!phys_layout); + + phys_layout->fe_cnt = 1; + phys_layout->tiler_cnt = 1; + phys_layout->mmu_l2_cnt = prfcnt_info->l2_count; + phys_layout->shader_cnt = fls64(prfcnt_info->core_mask); + phys_layout->block_cnt = phys_layout->fe_cnt + phys_layout->tiler_cnt + + phys_layout->mmu_l2_cnt + + phys_layout->shader_cnt; + + phys_layout->shader_avail_mask = prfcnt_info->core_mask; + + phys_layout->headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + phys_layout->values_per_block = + prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_BYTES; + phys_layout->counters_per_block = + phys_layout->values_per_block - phys_layout->headers_per_block; + phys_layout->offset_enable_mask = KBASE_HWCNT_V5_PRFCNT_EN_HEADER; +} + +static void kbasep_hwcnt_backend_csf_reset_internal_buffers( + struct kbase_hwcnt_backend_csf *backend_csf) +{ + memset(backend_csf->to_user_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->accum_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->old_sample_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); +} + +static void kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( + struct kbase_hwcnt_backend_csf *backend_csf, u32 *sample) +{ + u32 block_idx; + const struct kbase_hwcnt_csf_physical_layout *phys_layout; + u32 *block_buf; + + phys_layout = &backend_csf->phys_layout; + + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + block_buf = sample + block_idx * phys_layout->values_per_block; + block_buf[phys_layout->offset_enable_mask] = 0; + } +} + +static void kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header( + struct kbase_hwcnt_backend_csf *backend_csf) +{ + u32 idx; + u32 *sample; + char *cpu_dump_base; + size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + + cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; + + for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { + sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; + kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( + backend_csf, sample); + } +} + +static void kbasep_hwcnt_backend_csf_update_user_sample( + struct kbase_hwcnt_backend_csf *backend_csf) +{ + /* Copy the data into the sample and wait for the user to get it. */ + memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, + backend_csf->info->prfcnt_info.dump_bytes); + + /* After copied data into user sample, clear the accumulator values to + * prepare for the next accumulator, such as the next request or + * threshold. + */ + memset(backend_csf->accum_buf, 0, + backend_csf->info->prfcnt_info.dump_bytes); +} + +static void kbasep_hwcnt_backend_csf_accumulate_sample( + const struct kbase_hwcnt_csf_physical_layout *phys_layout, + size_t dump_bytes, u32 *accum_buf, const u32 *old_sample_buf, + const u32 *new_sample_buf, bool clearing_samples) +{ + size_t block_idx, ctr_idx; + const u32 *old_block = old_sample_buf; + const u32 *new_block = new_sample_buf; + u32 *acc_block = accum_buf; + + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + const u32 old_enable_mask = + old_block[phys_layout->offset_enable_mask]; + const u32 new_enable_mask = + new_block[phys_layout->offset_enable_mask]; + + if (new_enable_mask == 0) { + /* Hardware block was unavailable or we didn't turn on + * any counters. Do nothing. + */ + } else { + /* Hardware block was available and it had some counters + * enabled. We need to update the accumulation buffer. + */ + + /* Unconditionally copy the headers. */ + memcpy(acc_block, new_block, + phys_layout->headers_per_block * + KBASE_HWCNT_VALUE_BYTES); + + /* Accumulate counter samples + * + * When accumulating samples we need to take into + * account whether the counter sampling method involves + * clearing counters back to zero after each sample is + * taken. + * + * The intention for CSF was that all HW should use + * counters which wrap to zero when their maximum value + * is reached. This, combined with non-clearing + * sampling, enables multiple concurrent users to + * request samples without interfering with each other. + * + * However some early HW may not support wrapping + * counters, for these GPUs counters must be cleared on + * sample to avoid loss of data due to counters + * saturating at their maximum value. + */ + if (!clearing_samples) { + if (old_enable_mask == 0) { + /* Hardware block was previously + * unavailable. Accumulate the new + * counters only, as we know previous + * values are zeroes. + */ + for (ctr_idx = + phys_layout + ->headers_per_block; + ctr_idx < + phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx]; + } + } else { + /* Hardware block was previously + * available. Accumulate the delta + * between old and new counter values. + */ + for (ctr_idx = + phys_layout + ->headers_per_block; + ctr_idx < + phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx] - + old_block[ctr_idx]; + } + } + } else { + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < phys_layout->values_per_block; + ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx]; + } + } + } + old_block += phys_layout->values_per_block; + new_block += phys_layout->values_per_block; + acc_block += phys_layout->values_per_block; + } + + WARN_ON(old_block != + old_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); + WARN_ON(new_block != + new_sample_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); + WARN_ON(acc_block != accum_buf + dump_bytes / KBASE_HWCNT_VALUE_BYTES); + (void)dump_bytes; +} + +static void kbasep_hwcnt_backend_csf_accumulate_samples( + struct kbase_hwcnt_backend_csf *backend_csf, u32 extract_index_to_start, + u32 insert_index_to_stop) +{ + u32 raw_idx; + unsigned long flags; + u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; + const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; + const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; + u32 *old_sample_buf = backend_csf->old_sample_buf; + u32 *new_sample_buf; + + if (extract_index_to_start == insert_index_to_stop) + /* No samples to accumulate. Early out. */ + return; + + /* Sync all the buffers to CPU side before read the data. */ + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, + extract_index_to_start, + insert_index_to_stop, true); + + /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ + for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; + raw_idx++) { + /* The logical "&" acts as a modulo operation since buf_count + * must be a power of two. + */ + const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); + + new_sample_buf = + (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; + + kbasep_hwcnt_backend_csf_accumulate_sample( + &backend_csf->phys_layout, buf_dump_bytes, + backend_csf->accum_buf, old_sample_buf, new_sample_buf, + clearing_samples); + + old_sample_buf = new_sample_buf; + } + + /* Save the newest buffer as the old buffer for next time. */ + memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); + + /* Reset the prfcnt_en header on each sample before releasing them. */ + for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; + raw_idx++) { + const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); + u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; + + kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header( + backend_csf, sample); + } + + /* Sync zeroed buffers to avoid coherency issues on future use. */ + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, + extract_index_to_start, + insert_index_to_stop, false); + + /* After consuming all samples between extract_idx and insert_idx, + * set the raw extract index to insert_idx so that the sample buffers + * can be released back to the ring buffer pool. + */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + backend_csf->info->csf_if->set_extract_index( + backend_csf->info->csf_if->ctx, insert_index_to_stop); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); +} + +static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + struct kbase_hwcnt_backend_csf *backend_csf, + enum kbase_hwcnt_backend_csf_enable_state new_state) +{ + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + + if (backend_csf->enable_state != new_state) { + backend_csf->enable_state = new_state; + + wake_up(&backend_csf->enable_state_waitq); + } +} + +/** + * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. + * @work: Work structure. + * + * To accumulate all available samples in the ring buffer when a request has + * been done. + * + */ +static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf; + u32 insert_index_to_acc; + u32 extract_index; + u32 insert_index; + + WARN_ON(!work); + backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, + hwc_dump_work); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); + + /* The backend was disabled or had an error while the worker was being + * launched. + */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + WARN_ON(backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(!completion_done(&backend_csf->dump_completed)); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return; + } + + WARN_ON(backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); + + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; + insert_index_to_acc = backend_csf->insert_index_to_accumulate; + + /* Read the raw extract and insert indexes from the CSF interface. */ + backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, + &extract_index, &insert_index); + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* Accumulate up to the insert we grabbed at the prfcnt request + * interrupt. + */ + kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, + insert_index_to_acc); + + /* Copy to the user buffer so if a threshold interrupt fires + * between now and get(), the accumulations are untouched. + */ + kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); + + /* Dump done, set state back to COMPLETED for next request. */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); + + /* The backend was disabled or had an error while we were accumulating. + */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + WARN_ON(backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(!completion_done(&backend_csf->dump_completed)); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return; + } + + WARN_ON(backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); + + /* Our work here is done - set the wait object and unblock waiters. */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + complete_all(&backend_csf->dump_completed); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); +} + +/** + * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker. + * + * @work: Work structure. + * + * Called when a HWC threshold interrupt raised to consume all available samples + * in the ring buffer. + */ +static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf; + u32 extract_index; + u32 insert_index; + + WARN_ON(!work); + + backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, + hwc_threshold_work); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); + + /* Read the raw extract and insert indexes from the CSF interface. */ + backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, + &extract_index, &insert_index); + + /* The backend was disabled or had an error while the worker was being + * launched. + */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return; + } + + /* Early out if we are not in the IDLE state or COMPLETED state, as this + * means a concurrent dump is in progress and we don't want to + * interfere. + */ + if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && + (backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return; + } + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* Accumulate everything we possibly can. We grabbed the insert index + * immediately after we acquired the lock but before we checked whether + * a concurrent dump was triggered. This ensures that if a concurrent + * dump was triggered between releasing the lock and now, we know for a + * fact that our insert will not exceed the concurrent dump's + * insert_to_accumulate, so we don't risk accumulating too much data. + */ + kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, + insert_index); + + /* No need to wake up anything since it is not a user dump request. */ +} + +static void kbase_hwcnt_backend_csf_submit_dump_worker( + struct kbase_hwcnt_backend_csf_info *csf_info) +{ + u32 extract_index; + + WARN_ON(!csf_info); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); + WARN_ON(csf_info->backend->enable_state != + KBASE_HWCNT_BACKEND_CSF_ENABLED); + WARN_ON(csf_info->backend->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); + + /* Save insert index now so that the dump worker only accumulates the + * HWC data associated with this request. Extract index is not stored + * as that needs to be checked when accumulating to prevent re-reading + * buffers that have already been read and returned to the GPU. + */ + csf_info->csf_if->get_indexes( + csf_info->csf_if->ctx, &extract_index, + &csf_info->backend->insert_index_to_accumulate); + csf_info->backend->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; + + /* Submit the accumulator task into the work queue. */ + queue_work(csf_info->backend->hwc_dump_workq, + &csf_info->backend->hwc_dump_work); +} + +static void kbasep_hwcnt_backend_csf_get_physical_enable( + struct kbase_hwcnt_backend_csf *backend_csf, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_backend_csf_if_enable *enable) +{ + enum kbase_hwcnt_physical_set phys_counter_set; + struct kbase_hwcnt_physical_enable_map phys_enable_map; + + kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); + + /* process the enable_map to guarantee the block header is enabled which + * is needed for delta calculation. + */ + kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); + + kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, + backend_csf->info->counter_set); + + /* Use processed enable_map to enable HWC in HW level. */ + enable->fe_bm = phys_enable_map.fe_bm; + enable->shader_bm = phys_enable_map.shader_bm; + enable->tiler_bm = phys_enable_map.tiler_bm; + enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable->counter_set = phys_counter_set; + enable->clk_enable_map = enable_map->clk_enable_map; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_csf_dump_enable_nolock( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf_if_enable enable; + + if (!backend_csf || !enable_map || + (enable_map->metadata != backend_csf->info->metadata)) + return -EINVAL; + + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + + kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, + &enable); + + /* enable_state should be DISABLED before we transfer it to enabled */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) + return -EIO; + + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + WARN_ON(!completion_done(&backend_csf->dump_completed)); + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); + + backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, &enable); + + kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_csf_dump_enable( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + + if (!backend_csf) + return -EINVAL; + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, + enable_map); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + return errcode; +} + +static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( + struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) +{ + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + + while ((backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || + (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, *lock_flags); + + wait_event( + backend_csf->enable_state_waitq, + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, + lock_flags); + } +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void +kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + bool do_disable = false; + + WARN_ON(!backend_csf); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* Make sure we wait until any previous enable or disable have completed + * before doing anything. + */ + kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, + &flags); + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || + backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { + /* If we are already disabled or in an unrecoverable error + * state, there is nothing for us to do. + */ + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return; + } + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + /* Only disable if we were previously enabled - in all other + * cases the call to disable will have already been made. + */ + do_disable = true; + } + + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(!completion_done(&backend_csf->dump_completed)); + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* Block until any async work has completed. We have transitioned out of + * the ENABLED state so we can guarantee no new work will concurrently + * be submitted. + */ + flush_workqueue(backend_csf->hwc_dump_workq); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + if (do_disable) + backend_csf->info->csf_if->dump_disable( + backend_csf->info->csf_if->ctx); + + kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, + &flags); + + switch (backend_csf->enable_state) { + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); + break; + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); + break; + default: + WARN_ON(true); + break; + } + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* After disable, zero the header of all buffers in the ring buffer back + * to 0 to prepare for the next enable. + */ + kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); + + /* Sync zeroed buffers to avoid coherency issues on future use. */ + backend_csf->info->csf_if->ring_buf_sync( + backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, + backend_csf->info->ring_buf_cnt, false); + + /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare + * for next enable. + */ + kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int +kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + bool do_request = false; + + if (!backend_csf) + return -EINVAL; + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* If we're transitioning to enabled there's nothing to accumulate, and + * the user dump buffer is already zeroed. We can just short circuit to + * the DUMP_COMPLETED state. + */ + if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); + kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return 0; + } + + /* Otherwise, make sure we're already enabled. */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + return -EIO; + } + + /* Make sure that this is either the first request since enable or the + * previous dump has completed, so we can avoid midway through a dump. + */ + if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && + (backend_csf->dump_state != + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { + backend_csf->info->csf_if->unlock( + backend_csf->info->csf_if->ctx, flags); + /* HWC is disabled or another dump is ongoing, or we are on + * fault. + */ + return -EIO; + } + + /* Reset the completion so dump_wait() has something to wait on. */ + reinit_completion(&backend_csf->dump_completed); + + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && + !backend_csf->info->fw_in_protected_mode) { + /* Only do the request if we are fully enabled and not in + * protected mode. + */ + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; + do_request = true; + } else { + /* Skip the request and waiting for ack and go straight to + * checking the insert and kicking off the worker to do the dump + */ + backend_csf->dump_state = + KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; + } + + /* CSF firmware might enter protected mode now, but still call request. + * That is fine, as we changed state while holding the lock, so the + * protected mode enter function will query the insert and launch the + * dumping worker. + * At some point we will get the dump request ACK saying a dump is done, + * but we can ignore it if we are not in the REQUESTED state and process + * it in next round dumping worker. + */ + + *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); + kbasep_hwcnt_backend_csf_cc_update(backend_csf); + + if (do_request) + backend_csf->info->csf_if->dump_request( + backend_csf->info->csf_if->ctx); + else + kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int +kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + int errcode; + + if (!backend_csf) + return -EINVAL; + + wait_for_completion(&backend_csf->dump_completed); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + /* Make sure the last dump actually succeeded. */ + errcode = (backend_csf->dump_state == + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ? + 0 : + -EIO; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + return errcode; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int +kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + int errcode; + u64 ts; + + if (!backend_csf) + return -EINVAL; + + /* Request a dump so we can clear all current counters. */ + errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts); + if (!errcode) + /* Wait for the manual dump or auto dump to be done and + * accumulator to be updated. + */ + errcode = kbasep_hwcnt_backend_csf_dump_wait(backend); + + return errcode; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_csf_dump_get( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) +{ + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + int ret; + size_t clk; + + if (!backend_csf || !dst || !dst_enable_map || + (backend_csf->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + continue; + + /* Extract elapsed cycle count for each clock domain. */ + dst->clk_cnt_buf[clk] = backend_csf->cycle_count_elapsed[clk]; + } + + /* We just return the user buffer without checking the current state, + * as it is undefined to call this function without a prior succeeding + * one to dump_wait(). + */ + ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, + dst_enable_map, accumulate); + + return ret; +} + +/** + * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend. + * @backend_csf: Pointer to CSF backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + * + */ +static void +kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) +{ + if (!backend_csf) + return; + + destroy_workqueue(backend_csf->hwc_dump_workq); + + backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf); + + kfree(backend_csf->accum_buf); + backend_csf->accum_buf = NULL; + + kfree(backend_csf->old_sample_buf); + backend_csf->old_sample_buf = NULL; + + kfree(backend_csf->to_user_buf); + backend_csf->to_user_buf = NULL; + + kfree(backend_csf); +} + +/** + * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance. + * + * @csf_info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * Return: 0 on success, else error code. + */ +static int +kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, + struct kbase_hwcnt_backend_csf **out_backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = NULL; + int errcode = -ENOMEM; + + WARN_ON(!csf_info); + WARN_ON(!out_backend); + + backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL); + if (!backend_csf) + goto alloc_error; + + backend_csf->info = csf_info; + kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, + &backend_csf->phys_layout); + + backend_csf->accum_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); + if (!backend_csf->accum_buf) + goto err_alloc_acc_buf; + + backend_csf->old_sample_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); + if (!backend_csf->old_sample_buf) + goto err_alloc_pre_sample_buf; + + backend_csf->to_user_buf = + kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); + if (!backend_csf->to_user_buf) + goto err_alloc_user_sample_buf; + + errcode = csf_info->csf_if->ring_buf_alloc( + csf_info->csf_if->ctx, csf_info->ring_buf_cnt, + &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); + if (errcode) + goto err_ring_buf_alloc; + + /* Zero all performance enable header to prepare for first enable. */ + kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); + + /* Sync zeroed buffers to avoid coherency issues on use. */ + backend_csf->info->csf_if->ring_buf_sync( + backend_csf->info->csf_if->ctx, backend_csf->ring_buf, 0, + backend_csf->info->ring_buf_cnt, false); + + init_completion(&backend_csf->dump_completed); + + init_waitqueue_head(&backend_csf->enable_state_waitq); + + /* Allocate a single threaded work queue for dump worker and threshold + * worker. + */ + backend_csf->hwc_dump_workq = + alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!backend_csf->hwc_dump_workq) + goto err_alloc_workqueue; + + INIT_WORK(&backend_csf->hwc_dump_work, + kbasep_hwcnt_backend_csf_dump_worker); + INIT_WORK(&backend_csf->hwc_threshold_work, + kbasep_hwcnt_backend_csf_threshold_worker); + + backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + + *out_backend = backend_csf; + return 0; + + destroy_workqueue(backend_csf->hwc_dump_workq); +err_alloc_workqueue: + backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf); +err_ring_buf_alloc: + kfree(backend_csf->to_user_buf); + backend_csf->to_user_buf = NULL; +err_alloc_user_sample_buf: + kfree(backend_csf->old_sample_buf); + backend_csf->old_sample_buf = NULL; +err_alloc_pre_sample_buf: + kfree(backend_csf->accum_buf); + backend_csf->accum_buf = NULL; +err_alloc_acc_buf: + kfree(backend_csf); +alloc_error: + return errcode; +} + +/* CSF backend implementation of kbase_hwcnt_backend_init_fn */ +static int +kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = NULL; + struct kbase_hwcnt_backend_csf_info *csf_info = + (struct kbase_hwcnt_backend_csf_info *)info; + int errcode; + bool success = false; + + if (!info || !out_backend) + return -EINVAL; + + /* Create the backend. */ + errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf); + if (errcode) + return errcode; + + /* If it was not created before, attach it to csf_info. + * Use spin lock to avoid concurrent initialization. + */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + if (csf_info->backend == NULL) { + csf_info->backend = backend_csf; + *out_backend = (struct kbase_hwcnt_backend *)backend_csf; + success = true; + if (csf_info->unrecoverable_error_happened) + backend_csf->enable_state = + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; + } + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + /* Destroy the new created backend if the backend has already created + * before. In normal case, this won't happen if the client call init() + * function properly. + */ + if (!success) { + kbasep_hwcnt_backend_csf_destroy(backend_csf); + return -EBUSY; + } + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = + (struct kbase_hwcnt_backend_csf *)backend; + + if (!backend) + return; + + kbasep_hwcnt_backend_csf_dump_disable(backend); + + /* Set the backend in csf_info to NULL so we won't handle any external + * notification anymore since we are terminating. + */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + backend_csf->info->backend = NULL; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, + flags); + + kbasep_hwcnt_backend_csf_destroy(backend_csf); +} + +/** + * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + * + */ +static void kbasep_hwcnt_backend_csf_info_destroy( + const struct kbase_hwcnt_backend_csf_info *info) +{ + if (!info) + return; + + /* The backend should be destroyed before the info object destroy. */ + WARN_ON(info->backend != NULL); + + /* The metadata should be destroyed before the info object destroy. */ + WARN_ON(info->metadata != NULL); + + kfree(info); +} + +/** + * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info. + * + * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure + * used to create backend interface. + * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. + * MUST be power of 2. + * @out_info: Non-NULL pointer to where info is stored on success. + * @return 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_csf_info_create( + struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + const struct kbase_hwcnt_backend_csf_info **out_info) +{ + struct kbase_hwcnt_backend_csf_info *info = NULL; + + WARN_ON(!csf_if); + WARN_ON(!out_info); + WARN_ON(!is_power_of_2(ring_buf_cnt)); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + +#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) + info->counter_set = KBASE_HWCNT_SET_SECONDARY; +#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) + info->counter_set = KBASE_HWCNT_SET_TERTIARY; +#else + /* Default to primary */ + info->counter_set = KBASE_HWCNT_SET_PRIMARY; +#endif + + info->backend = NULL; + info->csf_if = csf_if; + info->ring_buf_cnt = ring_buf_cnt; + info->fw_in_protected_mode = false; + info->unrecoverable_error_happened = false; + + *out_info = info; + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */ +static const struct kbase_hwcnt_metadata * +kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) +{ + if (!info) + return NULL; + + WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata); + + return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; +} + +static void kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + struct kbase_hwcnt_backend_csf *backend_csf) +{ + bool do_disable = false; + + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + + /* We are already in or transitioning to the unrecoverable error state. + * Early out. + */ + if ((backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || + (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) + return; + + /* If we are disabled, we know we have no pending workers, so skip the + * waiting state. + */ + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); + return; + } + + /* Trigger a disable only if we are not already transitioning to + * disabled, we don't want to disable twice if an unrecoverable error + * happens while we are disabling. + */ + do_disable = (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); + + /* Transition the dump to the IDLE state and unblock any waiters. The + * IDLE state signifies an error. + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + + /* Trigger a disable only if we are not already transitioning to + * disabled, - we don't want to disable twice if an unrecoverable error + * happens while we are disabling. + */ + if (do_disable) + backend_csf->info->csf_if->dump_disable( + backend_csf->info->csf_if->ctx); +} + +static void kbasep_hwcnt_backend_csf_handle_recoverable_error( + struct kbase_hwcnt_backend_csf *backend_csf) +{ + backend_csf->info->csf_if->assert_lock_held( + backend_csf->info->csf_if->ctx); + + switch (backend_csf->enable_state) { + case KBASE_HWCNT_BACKEND_CSF_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + /* Already disabled or disabling, or in an unrecoverable error. + * Nothing to be done to handle the error. + */ + return; + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: + /* A seemingly recoverable error that occurs while we are + * transitioning to enabled is probably unrecoverable. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + backend_csf); + return; + case KBASE_HWCNT_BACKEND_CSF_ENABLED: + /* Start transitioning to the disabled state. We can't wait for + * it as this recoverable error might be triggered from an + * interrupt. The wait will be done in the eventual call to + * disable(). + */ + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + /* Transition the dump to the IDLE state and unblock any + * waiters. The IDLE state signifies an error. + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + + backend_csf->info->csf_if->dump_disable( + backend_csf->info->csf_if->ctx); + return; + } +} + +void kbase_hwcnt_backend_csf_protm_entered( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info = + (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + csf_info->fw_in_protected_mode = true; + + /* Call on_prfcnt_sample() to trigger collection of the protected mode + * entry auto-sample if there is currently a pending dump request. + */ + kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); +} + +void kbase_hwcnt_backend_csf_protm_exited( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + csf_info->fw_in_protected_mode = false; +} + +void kbase_hwcnt_backend_csf_on_unrecoverable_error( + struct kbase_hwcnt_backend_interface *iface) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf_info *csf_info; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); + csf_info->unrecoverable_error_happened = true; + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; + } + + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + +void kbase_hwcnt_backend_csf_on_before_reset( + struct kbase_hwcnt_backend_interface *iface) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); + csf_info->unrecoverable_error_happened = false; + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; + } + backend_csf = csf_info->backend; + + if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { + /* Before a reset occurs, we must either have been disabled + * (else we lose data) or we should have encountered an + * unrecoverable error. Either way, we will have disabled the + * interface and waited for any workers that might have still + * been in flight. + * If not in these states, fire off one more disable to make + * sure everything is turned off before the power is pulled. + * We can't wait for this disable to complete, but it doesn't + * really matter, the power is being pulled. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + csf_info->backend); + } + + /* A reset is the only way to exit the unrecoverable error state */ + if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); + } + + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_sample( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + /* If the current state is not REQUESTED, this HWC sample will be + * skipped and processed in next dump_request. + */ + if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) + return; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; + + kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_threshold( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) + /* Submit the threshold work into the work queue to consume the + * available samples. + */ + queue_work(backend_csf->hwc_dump_workq, + &backend_csf->hwc_threshold_work); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_overflow( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + + /* Called when an overflow occurs. We treat this as a recoverable error, + * so we start transitioning to the disabled state. + * We could try and handle it while enabled, but in a real system we + * never expect an overflow to occur so there is no point implementing + * complex recovery code when we can just turn ourselves off instead for + * a while. + */ + kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_enable( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); + } else if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_ENABLED) { + /* Unexpected, but we are already in the right state so just + * ignore it. + */ + } else { + /* Unexpected state change, assume everything is broken until + * we reset. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + csf_info->backend); + } +} + +void kbase_hwcnt_backend_csf_on_prfcnt_disable( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, + KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); + } else if (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_DISABLED) { + /* Unexpected, but we are already in the right state so just + * ignore it. + */ + } else { + /* Unexpected state change, assume everything is broken until + * we reset. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error( + csf_info->backend); + } +} + +int kbase_hwcnt_backend_csf_metadata_init( + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_gpu_info gpu_info; + + if (!iface) + return -EINVAL; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + WARN_ON(!csf_info->csf_if->get_prfcnt_info); + + csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, + &csf_info->prfcnt_info); + + /* The clock domain counts should not exceed the number of maximum + * number of clock regulators. + */ + if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) + return -EIO; + + gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; + gpu_info.prfcnt_values_per_block = + csf_info->prfcnt_info.prfcnt_block_size / + KBASE_HWCNT_VALUE_BYTES; + errcode = kbase_hwcnt_csf_metadata_create( + &gpu_info, csf_info->counter_set, &csf_info->metadata); + if (errcode) + return errcode; + + /* + * Dump abstraction size should be exactly the same size and layout as + * the physical dump size, for backwards compatibility. + */ + WARN_ON(csf_info->prfcnt_info.dump_bytes != + csf_info->metadata->dump_buf_bytes); + + return 0; +} + +void kbase_hwcnt_backend_csf_metadata_term( + struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + if (!iface) + return; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + if (csf_info->metadata) { + kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); + csf_info->metadata = NULL; + } +} + +int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, + u32 ring_buf_cnt, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_csf_info *info = NULL; + + if (!iface || !csf_if) + return -EINVAL; + + /* The buffer count must be power of 2 */ + if (!is_power_of_2(ring_buf_cnt)) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, + &info); + if (errcode) + return errcode; + + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->metadata = kbasep_hwcnt_backend_csf_metadata; + iface->init = kbasep_hwcnt_backend_csf_init; + iface->term = kbasep_hwcnt_backend_csf_term; + iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_csf_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_csf_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_csf_info_destroy( + (const struct kbase_hwcnt_backend_csf_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h new file mode 100644 index 0000000..ce1af9a --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_CSF_H_ +#define _KBASE_HWCNT_BACKEND_CSF_H_ + +#include "mali_kbase_hwcnt_backend.h" +#include "mali_kbase_hwcnt_backend_csf_if.h" + +/** + * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend + * interface. + * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure + * used to create backend interface. + * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring + * buffer, MUST be power of 2. + * @iface: Non-NULL pointer to backend interface structure that is filled + * in on creation success. + * + * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, + u32 ring_buf_cnt, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF + * hardware counter backend. + * @iface: Non-NULL pointer to backend interface structure + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_csf_metadata_init( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF + * hardware counter backend. + * @iface: Non-NULL pointer to backend interface structure. + */ +void kbase_hwcnt_backend_csf_metadata_term( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_csf_destroy( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive + * notification that protected mode + * has been entered. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_protm_entered( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive + * notification that protected mode has + * been exited. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_protm_exited( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function + * called when unrecoverable + * errors are detected. + * @iface: Non-NULL pointer to HWC backend interface. + * + * This should be called on encountering errors that can only be recovered from + * with reset, or that may put HWC logic in state that could result in hang. For + * example, on bus error, or when FW becomes unresponsive. + */ +void kbase_hwcnt_backend_csf_on_unrecoverable_error( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be + * called immediately before a + * reset. Takes us out of the + * unrecoverable error state, if we + * were in it. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_before_reset( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample + * complete interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_sample( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter + * buffer reach threshold + * interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_threshold( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer + * overflow interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_overflow( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled + * interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_enable( + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter + * disabled interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_disable( + struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h new file mode 100644 index 0000000..f6387c2 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if.h @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Virtual interface for CSF hardware counter backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_ +#define _KBASE_HWCNT_BACKEND_CSF_IF_H_ + +#include + +/** + * struct kbase_hwcnt_backend_csf_if_ctx - Opaque pointer to a CSF interface + * context. + */ +struct kbase_hwcnt_backend_csf_if_ctx; + +/** + * struct kbase_hwcnt_backend_csf_if_ring_buf - Opaque pointer to a CSF + * interface ring buffer. + */ +struct kbase_hwcnt_backend_csf_if_ring_buf; + +/** + * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection + * structure. + * @fe_bm: Front End counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @counter_set: The performance counter set to enable. + * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * counter for a given clock domain. + */ +struct kbase_hwcnt_backend_csf_if_enable { + u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + u8 counter_set; + u64 clk_enable_map; +}; + +/** + * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter + * information. + * @dump_bytes: Bytes of GPU memory required to perform a performance + * counter dump. + * @prfcnt_block_size Bytes of each performance counter block. + * @l2_count: The MMU L2 cache count. + * @core_mask: Shader core mask. + * @clk_cnt: Clock domain count in the system. + * @clearing_samples: Indicates whether counters are cleared after each sample + * is taken. + */ +struct kbase_hwcnt_backend_csf_if_prfcnt_info { + size_t dump_bytes; + size_t prfcnt_block_size; + size_t l2_count; + u64 core_mask; + u8 clk_cnt; + bool clearing_samples; +}; + +/** + * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the + * backend spinlock is + * held. + * @ctx: Non-NULL pointer to a CSF context. + */ +typedef void kbase_hwcnt_backend_csf_if_assert_lock_held_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. + * + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Pointer to the memory location that would store the previous + * interrupt state. + */ +typedef void +kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. + * + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Previously stored interrupt state when Scheduler interrupt + * spinlock was acquired. + */ +typedef void +kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance + * counter information. + * @ctx: Non-NULL pointer to a CSF context. + * @prfcnt_info: Non-NULL pointer to struct where performance counter + * information should be stored. + */ +typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); + +/** + * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer + * for CSF interface. + * @ctx: Non-NULL pointer to a CSF context. + * @buf_count: The buffer count in the ring buffer to be allocated, + * MUST be power of 2. + * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is + * stored when success. + * @ring_buf: Non-NULL pointer to where ring buffer is stored when success. + * + * A ring buffer is needed by the CSF interface to do manual HWC sample and + * automatic HWC samples, the buffer count in the ring buffer MUST be power + * of 2 to meet the hardware requirement. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, + void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); + +/** + * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers + * memory. + * @ctx: Non-NULL pointer to a CSF context. + * @ring_buf: Non-NULL pointer to the ring buffer. + * @buf_index_first: The first buffer index in the ring buffer to be synced, + * inclusive. + * @buf_index_last: The last buffer index in the ring buffer to be synced, + * exclusive. + * @for_cpu: The direction of sync to be applied, set to true when CPU + * cache needs invalidating before reading the buffer, and set + * to false after CPU writes to flush these before this memory + * is overwritten by the GPU. + * + * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU + * are correctly observed. + */ +typedef void kbase_hwcnt_backend_csf_if_ring_buf_sync_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + u32 buf_index_first, u32 buf_index_last, bool for_cpu); + +/** + * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for + * the CSF interface. + * + * @ctx: Non-NULL pointer to a CSF interface context. + * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. + */ +typedef void kbase_hwcnt_backend_csf_if_ring_buf_free_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); + +/** + * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current + * timestamp of the CSF + * interface. + * @ctx: Non-NULL pointer to a CSF interface context. + * + * Return: CSF interface timestamp in nanoseconds. + */ +typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware + * counter in CSF interface. + * @ctx: Non-NULL pointer to a CSF interface context. + * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. + * @enable: Non-NULL pointer to the enable map of HWC. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_dump_enable_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable); + +/** + * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter + * in CSF interface. + * @ctx: Non-NULL pointer to a CSF interface context. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. + * + * @ctx: Non-NULL pointer to the interface context. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_dump_request_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and + * insert indexes of the + * ring buffer. + * + * @ctx: Non-NULL pointer to a CSF interface context. + * @extract_index: Non-NULL pointer where current extract index to be saved. + * @insert_index: Non-NULL pointer where current insert index to be saved. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, + u32 *insert_index); + +/** + * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract + * index of the ring + * buffer. + * + * @ctx: Non-NULL pointer to a CSF interface context. + * @extract_index: New extract index to be set. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_set_extract_index_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current + * GPU cycle count. + * @ctx: Non-NULL pointer to a CSF interface context. + * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, + * the array size should be at least as big as the number of + * clock domains returned by get_prfcnt_info interface. + * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock + * domain. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, + u64 clk_enable_map); + +/** + * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual + * interface. + * @ctx: CSF interface context. + * @assert_lock_held: Function ptr to assert backend spinlock is held. + * @lock: Function ptr to acquire backend spinlock. + * @unlock: Function ptr to release backend spinlock. + * @get_prfcnt_info: Function ptr to get performance counter related + * information. + * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. + * @ring_buf_sync: Function ptr to sync ring buffer to CPU. + * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. + * @timestamp_ns: Function ptr to get the current CSF interface + * timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_enable_nolock: Function ptr to enable dumping while the + * backend-specific spinlock is already held. + * @dump_disable: Function ptr to disable dumping. + * @dump_request: Function ptr to request a dump. + * @get_indexes: Function ptr to get extract and insert indexes of the + * ring buffer. + * @set_extract_index: Function ptr to set extract index of ring buffer. + * @get_gpu_cycle_count: Function ptr to get the GPU cycle count. + */ +struct kbase_hwcnt_backend_csf_if { + struct kbase_hwcnt_backend_csf_if_ctx *ctx; + kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; + kbase_hwcnt_backend_csf_if_lock_fn *lock; + kbase_hwcnt_backend_csf_if_unlock_fn *unlock; + kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; + kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; + kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; + kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; + kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; + kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; + kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; + kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; + kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; + kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; + kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; +}; + +#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c new file mode 100644 index 0000000..979299f --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -0,0 +1,787 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * CSF GPU HWC backend firmware interface APIs. + */ + +#include +#include +#include +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include + +#include "csf/mali_kbase_csf_firmware.h" +#include "mali_kbase_hwcnt_backend_csf_if_fw.h" +#include "mali_kbase_hwaccess_time.h" +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include +#include "mali_kbase_ccswe.h" + + +/** The number of nanoseconds in a second. */ +#define NSECS_IN_SEC 1000000000ull /* ns */ + +/* Ring buffer virtual address start at 4GB */ +#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) + +/** + * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface + * used to save the manual and + * auto HWC samples from + * firmware. + * @gpu_dump_base: Starting GPU base address of the ring buffer. + * @cpu_dump_base: Starting CPU address for the mapping. + * @buf_count: Buffer count in the ring buffer, MUST be power of 2. + * @as_nr: Address space number for the memory mapping. + * @phys: Physical memory allocation used by the mapping. + * @num_pages: Size of the mapping, in memory pages. + */ +struct kbase_hwcnt_backend_csf_if_fw_ring_buf { + u64 gpu_dump_base; + void *cpu_dump_base; + size_t buf_count; + u32 as_nr; + struct tagged_addr *phys; + size_t num_pages; +}; + +/** + * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF + * interface, used to communicate + * with firmware. + * @kbdev: KBase device. + * @buf_bytes: The size in bytes for each buffer in the ring buffer. + * @clk_cnt: The number of clock domains in the system. + * The maximum is 64. + * @clk_enable_map: Bitmask of enabled clocks + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + */ +struct kbase_hwcnt_backend_csf_if_fw_ctx { + struct kbase_device *kbdev; + size_t buf_bytes; + u8 clk_cnt; + u64 clk_enable_map; + struct kbase_clk_rate_listener rate_listener; + struct kbase_ccswe ccswe_shader_cores; +}; + +static void kbasep_hwcnt_backend_csf_if_fw_assert_lock_held( + struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); +} + +static void +kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock(kbdev, flags); +} + +static void kbasep_hwcnt_backend_csf_if_fw_unlock( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, unsigned long flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback + * + * @rate_listener: Callback state + * @clk_index: Clock index + * @clk_rate_hz: Clock frequency(hz) + */ +static void kbasep_hwcnt_backend_csf_if_fw_on_freq_change( + struct kbase_clk_rate_listener *rate_listener, u32 clk_index, + u32 clk_rate_hz) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + container_of(rate_listener, + struct kbase_hwcnt_backend_csf_if_fw_ctx, + rate_listener); + u64 timestamp_ns; + + if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + + timestamp_ns = ktime_get_raw_ns(); + kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, + clk_rate_hz); +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking + * + * @fw_ctx: Non-NULL pointer to CSF firmware interface context. + * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. + */ +static void kbasep_hwcnt_backend_csf_if_fw_cc_enable( + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, u64 clk_enable_map) +{ + struct kbase_device *kbdev = fw_ctx->kbdev; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + /* software estimation for non-top clock domains */ + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + const struct kbase_clk_data *clk_data = + rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + u32 cur_freq; + unsigned long flags; + u64 timestamp_ns; + + timestamp_ns = ktime_get_raw_ns(); + + spin_lock_irqsave(&rtm->lock, flags); + + cur_freq = (u32)clk_data->clock_val; + kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); + kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, + timestamp_ns, cur_freq); + + kbase_clk_rate_trace_manager_subscribe_no_lock( + rtm, &fw_ctx->rate_listener); + + spin_unlock_irqrestore(&rtm->lock, flags); + } + + fw_ctx->clk_enable_map = clk_enable_map; +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking + * + * @fw_ctx: Non-NULL pointer to CSF firmware interface context. + */ +static void kbasep_hwcnt_backend_csf_if_fw_cc_disable( + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +{ + struct kbase_device *kbdev = fw_ctx->kbdev; + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + u64 clk_enable_map = fw_ctx->clk_enable_map; + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, + KBASE_CLOCK_DOMAIN_SHADER_CORES)) + kbase_clk_rate_trace_manager_unsubscribe( + rtm, &fw_ctx->rate_listener); +} + +static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + u32 prfcnt_size; + u32 prfcnt_hw_size = 0; + u32 prfcnt_fw_size = 0; + u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * + KBASE_HWCNT_VALUE_BYTES; + + WARN_ON(!ctx); + WARN_ON(!prfcnt_info); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + prfcnt_size = kbdev->csf.global_iface.prfcnt_size; + prfcnt_hw_size = (prfcnt_size & 0xFF) << 8; + prfcnt_fw_size = (prfcnt_size >> 16) << 8; + fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; + + + prfcnt_info->dump_bytes = fw_ctx->buf_bytes; + prfcnt_info->prfcnt_block_size = prfcnt_block_size; + prfcnt_info->l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices; + prfcnt_info->core_mask = + kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + prfcnt_info->clk_cnt = fw_ctx->clk_cnt; + prfcnt_info->clearing_samples = true; + + /* Block size must be multiple of counter size. */ + WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_BYTES) != + 0); + /* Total size must be multiple of block size. */ + WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != + 0); +} + +static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, + void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) +{ + struct kbase_device *kbdev; + struct tagged_addr *phys; + struct page **page_list; + void *cpu_addr; + int ret; + int i; + size_t num_pages; + u64 flags; + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; + + pgprot_t cpu_map_prot = PAGE_KERNEL; + u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; + + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + WARN_ON(!cpu_dump_base); + WARN_ON(!out_ring_buf); + + kbdev = fw_ctx->kbdev; + + /* The buffer count must be power of 2 */ + if (!is_power_of_2(buf_count)) + return -EINVAL; + + /* alignment failure */ + if (gpu_va_base & (2048 - 1)) + return -EINVAL; + + fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); + if (!fw_ring_buf) + return -ENOMEM; + + num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); + phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); + if (!phys) + goto phys_alloc_error; + + page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + goto page_list_alloc_error; + + /* Get physical page for the buffer */ + ret = kbase_mem_pool_alloc_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false); + if (ret != num_pages) + goto phys_mem_pool_alloc_error; + + /* Get the CPU virtual address */ + for (i = 0; i < num_pages; i++) + page_list[i] = as_page(phys[i]); + + cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); + if (!cpu_addr) + goto vmap_error; + + flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + + /* Update MMU table */ + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, + gpu_va_base >> PAGE_SHIFT, phys, num_pages, + flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW); + if (ret) + goto mmu_insert_failed; + + kfree(page_list); + + fw_ring_buf->gpu_dump_base = gpu_va_base; + fw_ring_buf->cpu_dump_base = cpu_addr; + fw_ring_buf->phys = phys; + fw_ring_buf->num_pages = num_pages; + fw_ring_buf->buf_count = buf_count; + fw_ring_buf->as_nr = MCU_AS_NR; + + *cpu_dump_base = fw_ring_buf->cpu_dump_base; + *out_ring_buf = + (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; + + + return 0; + +mmu_insert_failed: + vunmap(cpu_addr); +vmap_error: + kbase_mem_pool_free_pages( + &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false, false); +phys_mem_pool_alloc_error: + kfree(page_list); +page_list_alloc_error: + kfree(phys); +phys_alloc_error: + kfree(fw_ring_buf); + return -ENOMEM; +} + +static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + u32 buf_index_first, u32 buf_index_last, bool for_cpu) +{ + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = + (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + size_t i; + size_t pg_first; + size_t pg_last; + u64 start_address; + u64 stop_address; + u32 ring_buf_index_first; + u32 ring_buf_index_last; + + WARN_ON(!ctx); + WARN_ON(!ring_buf); + + /* The index arguments for this function form an inclusive, exclusive + * range. + * However, when masking back to the available buffers we will make this + * inclusive at both ends so full flushes are not 0 -> 0. + */ + ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); + ring_buf_index_last = + (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); + + /* The start address is the offset of the first buffer. */ + start_address = fw_ctx->buf_bytes * ring_buf_index_first; + pg_first = start_address >> PAGE_SHIFT; + + /* The stop address is the last byte in the final buffer. */ + stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; + pg_last = stop_address >> PAGE_SHIFT; + + /* Check whether the buffer range wraps. */ + if (start_address > stop_address) { + /* sync the first part to the end of ring buffer. */ + for (i = pg_first; i < fw_ring_buf->num_pages; i++) { + struct page *pg = as_page(fw_ring_buf->phys[i]); + + if (for_cpu) { + kbase_sync_single_for_cpu(fw_ctx->kbdev, + kbase_dma_addr(pg), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + } else { + kbase_sync_single_for_device(fw_ctx->kbdev, + kbase_dma_addr(pg), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + } + + /* second part starts from page 0. */ + pg_first = 0; + } + + for (i = pg_first; i <= pg_last; i++) { + struct page *pg = as_page(fw_ring_buf->phys[i]); + + if (for_cpu) { + kbase_sync_single_for_cpu(fw_ctx->kbdev, + kbase_dma_addr(pg), PAGE_SIZE, + DMA_BIDIRECTIONAL); + } else { + kbase_sync_single_for_device(fw_ctx->kbdev, + kbase_dma_addr(pg), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + } +} + +static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns( + struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + CSTD_UNUSED(ctx); + return ktime_get_raw_ns(); +} + +static void kbasep_hwcnt_backend_csf_if_fw_ring_buf_free( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) +{ + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = + (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + if (!fw_ring_buf) + return; + + if (fw_ring_buf->phys) { + u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; + + WARN_ON(kbase_mmu_teardown_pages( + fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, + gpu_va_base >> PAGE_SHIFT, fw_ring_buf->num_pages, + MCU_AS_NR)); + + vunmap(fw_ring_buf->cpu_dump_base); + + kbase_mem_pool_free_pages( + &fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + fw_ring_buf->num_pages, fw_ring_buf->phys, false, + false); + + kfree(fw_ring_buf->phys); + + kfree(fw_ring_buf); + } +} + +static void kbasep_hwcnt_backend_csf_if_fw_dump_enable( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable) +{ + u32 prfcnt_config; + struct kbase_device *kbdev; + struct kbase_csf_global_iface *global_iface; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = + (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + + WARN_ON(!ctx); + WARN_ON(!ring_buf); + WARN_ON(!enable); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + kbdev = fw_ctx->kbdev; + global_iface = &kbdev->csf.global_iface; + + /* Configure */ + prfcnt_config = fw_ring_buf->buf_count; + prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT; + + /* Configure the ring buffer base address */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, + fw_ring_buf->as_nr); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, + fw_ring_buf->gpu_dump_base & U32_MAX); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, + fw_ring_buf->gpu_dump_base >> 32); + + /* Set extract position to 0 */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); + + /* Configure the enable bitmap */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, + enable->fe_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, + enable->shader_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, + enable->mmu_l2_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, + enable->tiler_bm); + + /* Configure the HWC set and buffer size */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, + prfcnt_config); + + kbdev->csf.hwcnt.enable_pending = true; + + /* Unmask the interrupts */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); + + /* Enable the HWC */ + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, + (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), + GLB_REQ_PRFCNT_ENABLE_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, + GLB_PRFCNT_CONFIG); + + kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, + enable->clk_enable_map); +} + +static void kbasep_hwcnt_backend_csf_if_fw_dump_disable( + struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_device *kbdev; + struct kbase_csf_global_iface *global_iface; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + kbdev = fw_ctx->kbdev; + global_iface = &kbdev->csf.global_iface; + + /* Disable the HWC */ + kbdev->csf.hwcnt.enable_pending = true; + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, + GLB_REQ_PRFCNT_ENABLE_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + /* mask the interrupts */ + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask( + global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); + + /* In case we have a previous request in flight when the disable + * happens. + */ + kbdev->csf.hwcnt.request_pending = false; + + kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); +} + +static void kbasep_hwcnt_backend_csf_if_fw_dump_request( + struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + u32 glb_req; + struct kbase_device *kbdev; + struct kbase_csf_global_iface *global_iface; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + kbdev = fw_ctx->kbdev; + global_iface = &kbdev->csf.global_iface; + + /* Trigger dumping */ + kbdev->csf.hwcnt.request_pending = true; + glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, + GLB_REQ_PRFCNT_SAMPLE_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +static void kbasep_hwcnt_backend_csf_if_fw_get_indexes( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, + u32 *insert_index) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + WARN_ON(!extract_index); + WARN_ON(!insert_index); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + *extract_index = kbase_csf_firmware_global_input_read( + &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT); + *insert_index = kbase_csf_firmware_global_output( + &fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_INSERT); +} + +static void kbasep_hwcnt_backend_csf_if_fw_set_extract_index( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_idx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + /* Set the raw extract index to release the buffer back to the ring + * buffer. + */ + kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, + GLB_PRFCNT_EXTRACT, extract_idx); +} + +static void kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, + u64 clk_enable_map) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + u8 clk; + u64 timestamp_ns = ktime_get_raw_ns(); + + WARN_ON(!ctx); + WARN_ON(!cycle_counts); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { + if (!(clk_enable_map & (1ull << clk))) + continue; + + if (clk == KBASE_CLOCK_DOMAIN_TOP) { + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + fw_ctx->kbdev, &cycle_counts[clk], NULL, NULL); + } else { + /* Estimate cycle count for non-top clock domain. */ + cycle_counts[clk] = kbase_ccswe_cycle_at( + &fw_ctx->ccswe_shader_cores, timestamp_ns); + } + } +} + +/** + * kbasep_hwcnt_backedn_csf_if_fw_cts_destroy() - Destroy a CSF FW interface context. + * + * @fw_ctx: Pointer to context to destroy. + */ +static void kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +{ + if (!fw_ctx) + return; + + kfree(fw_ctx); +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. + * + * @kbdev: Non_NULL pointer to kbase device. + * @out_ctx: Non-NULL pointer to where info is stored on success. + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_csf_if_fw_ctx_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) +{ + u8 clk; + int errcode = -ENOMEM; + struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_ctx); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + goto error; + + ctx->kbdev = kbdev; + + /* Determine the number of available clock domains. */ + for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { + if (kbdev->pm.clk_rtm.clks[clk] == NULL) + break; + } + ctx->clk_cnt = clk; + + ctx->clk_enable_map = 0; + kbase_ccswe_init(&ctx->ccswe_shader_cores); + ctx->rate_listener.notify = + kbasep_hwcnt_backend_csf_if_fw_on_freq_change; + + *out_ctx = ctx; + + return 0; +error: + kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); + return errcode; +} + +void kbase_hwcnt_backend_csf_if_fw_destroy( + struct kbase_hwcnt_backend_csf_if *if_fw) +{ + if (!if_fw) + return; + + kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); + memset(if_fw, 0, sizeof(*if_fw)); +} + +int kbase_hwcnt_backend_csf_if_fw_create( + struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw) +{ + int errcode; + struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; + + if (!kbdev || !if_fw) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); + if (errcode) + return errcode; + + if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; + if_fw->assert_lock_held = + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; + if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; + if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; + if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; + if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; + if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; + if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; + if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; + if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; + if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; + if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; + if_fw->get_gpu_cycle_count = + kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; + if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; + if_fw->set_extract_index = + kbasep_hwcnt_backend_csf_if_fw_set_extract_index; + + return 0; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h new file mode 100644 index 0000000..b69668b --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_csf_if_fw.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW + */ + +#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ +#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ + +#include "mali_kbase_hwcnt_backend_csf_if.h" + +/** + * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface + * of hardware counter backend. + * @kbdev: Non-NULL pointer to Kbase device. + * @if_fw: Non-NULL pointer to backend interface structure that is filled in on + * creation success. + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_csf_if_fw_create( + struct kbase_device *kbdev, struct kbase_hwcnt_backend_csf_if *if_fw); + +/** + * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of + * hardware counter backend. + * @if_fw: Pointer to a CSF interface to destroy. + */ +void kbase_hwcnt_backend_csf_if_fw_destroy( + struct kbase_hwcnt_backend_csf_if *if_fw); + +#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c new file mode 100644 index 0000000..64001b1 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.c @@ -0,0 +1,793 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase_hwcnt_backend_jm.h" +#include "mali_kbase_hwcnt_gpu.h" +#include "mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#include "backend/gpu/mali_kbase_pm_ca.h" +#include "mali_kbase_hwaccess_instr.h" +#include "mali_kbase_hwaccess_time.h" +#include "mali_kbase_ccswe.h" + +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include "backend/gpu/mali_kbase_pm_internal.h" + +/** + * struct kbase_hwcnt_backend_jm_info - Information used to create an instance + * of a JM hardware counter backend. + * @kbdev: KBase device. + * @counter_set: The performance counter set to use. + * @metadata: Hardware counter metadata. + * @dump_bytes: Bytes of GPU memory required to perform a + * hardware counter dump. + */ +struct kbase_hwcnt_backend_jm_info { + struct kbase_device *kbdev; + enum kbase_hwcnt_set counter_set; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; +}; + +/** + * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @enabled: True if dumping has been enabled, else false. + * @pm_core_mask: PM state sync-ed shaders core mask for the enabled + * dumping. + * @curr_config: Current allocated hardware resources to correctly map the src + * raw dump buffer to the dst dump buffer. + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: + * Cycle count elapsed for a given sample period. + * The top clock cycle, index 0, is read directly from + * hardware, but the other clock domains need to be + * calculated with software estimation. + * @prev_cycle_count: Previous cycle count to calculate the cycle count for + * sample period. + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + */ +struct kbase_hwcnt_backend_jm { + const struct kbase_hwcnt_backend_jm_info *info; + struct kbase_context *kctx; + u64 gpu_dump_va; + void *cpu_dump_va; + struct kbase_vmap_struct *vmap; + bool enabled; + u64 pm_core_mask; + struct kbase_hwcnt_curr_config curr_config; + u64 clk_enable_map; + u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_listener rate_listener; + struct kbase_ccswe ccswe_shader_cores; +}; + +/** + * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used + * to create the hwcnt metadata. + * @kbdev: Non-NULL pointer to kbase device. + * @info: Non-NULL pointer to data structure to be filled in. + * + * The initialised info struct will only be valid for use while kbdev is valid. + */ +static int +kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) +{ + size_t clk; + + if (!kbdev || !info) + return -EINVAL; + + { + const struct base_gpu_props *props = &kbdev->gpu_props.props; + const size_t l2_count = props->l2_props.num_l2_slices; + const size_t core_mask = + props->coherency_info.group[0].core_mask; + + info->l2_count = l2_count; + info->core_mask = core_mask; + info->prfcnt_values_per_block = + KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; + } + + /* Determine the number of available clock domains. */ + for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { + if (kbdev->pm.clk_rtm.clks[clk] == NULL) + break; + } + info->clk_cnt = clk; + + return 0; +} + +/** + * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback + * + * @rate_listener: Callback state + * @clk_index: Clock index + * @clk_rate_hz: Clock frequency(hz) + */ +static void kbasep_hwcnt_backend_jm_on_freq_change( + struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, + u32 clk_rate_hz) +{ + struct kbase_hwcnt_backend_jm *backend_jm = container_of( + rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); + u64 timestamp_ns; + + if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + + timestamp_ns = ktime_get_raw_ns(); + kbase_ccswe_freq_change( + &backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** + * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking + * + * @backend_jm: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. + */ +static void kbasep_hwcnt_backend_jm_cc_enable( + struct kbase_hwcnt_backend_jm *backend_jm, + const struct kbase_hwcnt_enable_map *enable_map, + u64 timestamp_ns) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + u64 clk_enable_map = enable_map->clk_enable_map; + u64 cycle_count; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn on the cycle counter */ + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + kbdev, &cycle_count, NULL, NULL); + + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = + cycle_count; + } + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + /* software estimation for non-top clock domains */ + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + const struct kbase_clk_data *clk_data = + rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + u32 cur_freq; + unsigned long flags; + + spin_lock_irqsave(&rtm->lock, flags); + + cur_freq = (u32) clk_data->clock_val; + kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); + kbase_ccswe_freq_change( + &backend_jm->ccswe_shader_cores, + timestamp_ns, + cur_freq); + + kbase_clk_rate_trace_manager_subscribe_no_lock( + rtm, &backend_jm->rate_listener); + + spin_unlock_irqrestore(&rtm->lock, flags); + + /* ccswe was reset. The estimated cycle is zero. */ + backend_jm->prev_cycle_count[ + KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; + } + + /* Keep clk_enable_map for dump_request. */ + backend_jm->clk_enable_map = clk_enable_map; +} + +/** + * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking + * + * @backend_jm: Non-NULL pointer to backend. + */ +static void kbasep_hwcnt_backend_jm_cc_disable( + struct kbase_hwcnt_backend_jm *backend_jm) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + u64 clk_enable_map = backend_jm->clk_enable_map; + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn off the cycle counter */ + kbase_pm_release_gpu_cycle_counter(kbdev); + } + + if (kbase_hwcnt_clk_enable_map_enabled( + clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + + kbase_clk_rate_trace_manager_unsubscribe( + rtm, &backend_jm->rate_listener); + } +} + + +/** + * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with + * current config information. + * @kbdev: Non-NULL pointer to kbase device. + * @curr_config: Non-NULL pointer to return the current configuration of + * hardware allocated to the GPU. + * + * The current configuration information is used for architectures where the + * max_config interface is available from the Arbiter. In this case the current + * allocated hardware is not always the same, so the current config information + * is used to correctly map the current allocated resources to the memory layout + * that is copied to the user space. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_gpu_update_curr_config( + struct kbase_device *kbdev, + struct kbase_hwcnt_curr_config *curr_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + curr_config->num_l2_slices = + kbdev->gpu_props.curr_config.l2_slices; + curr_config->shader_present = + kbdev->gpu_props.curr_config.shader_present; + return 0; +} + +/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_jm_timestamp_ns( + struct kbase_hwcnt_backend *backend) +{ + (void)backend; + return ktime_get_raw_ns(); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable_nolock( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_hwcnt_physical_enable_map phys_enable_map; + enum kbase_hwcnt_physical_set phys_counter_set; + struct kbase_instr_hwcnt_enable enable; + u64 timestamp_ns; + + if (!backend_jm || !enable_map || backend_jm->enabled || + (enable_map->metadata != backend_jm->info->metadata)) + return -EINVAL; + + kctx = backend_jm->kctx; + kbdev = backend_jm->kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); + + kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, + backend_jm->info->counter_set); + + enable.fe_bm = phys_enable_map.fe_bm; + enable.shader_bm = phys_enable_map.shader_bm; + enable.tiler_bm = phys_enable_map.tiler_bm; + enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable.counter_set = phys_counter_set; + enable.dump_buffer = backend_jm->gpu_dump_va; + enable.dump_buffer_bytes = backend_jm->info->dump_bytes; + + timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, + &backend_jm->curr_config); + if (errcode) + goto error; + + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); + if (errcode) + goto error; + + backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + + backend_jm->enabled = true; + + kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); + + return 0; +error: + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable( + struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + unsigned long flags; + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + + if (!backend_jm) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock( + backend, enable_map); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_jm_dump_disable( + struct kbase_hwcnt_backend *backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (WARN_ON(!backend_jm) || !backend_jm->enabled) + return; + + kbasep_hwcnt_backend_jm_cc_disable(backend_jm); + + errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); + WARN_ON(errcode); + + backend_jm->enabled = false; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_jm_dump_clear( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_clear(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_jm_dump_request( + struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + const struct kbase_hwcnt_metadata *metadata; + u64 current_cycle_count; + size_t clk; + int ret; + + if (!backend_jm || !backend_jm->enabled || !dump_time_ns) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + metadata = backend_jm->info->metadata; + + /* Disable pre-emption, to make the timestamp as accurate as possible */ + preempt_disable(); + { + *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + backend_jm->clk_enable_map, clk)) + continue; + + if (clk == KBASE_CLOCK_DOMAIN_TOP) { + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest( + kbdev, ¤t_cycle_count, + NULL, NULL); + } else { + /* + * Estimate cycle count for non-top clock + * domain. + */ + current_cycle_count = kbase_ccswe_cycle_at( + &backend_jm->ccswe_shader_cores, + *dump_time_ns); + } + backend_jm->cycle_count_elapsed[clk] = + current_cycle_count - + backend_jm->prev_cycle_count[clk]; + + /* + * Keep the current cycle count for later calculation. + */ + backend_jm->prev_cycle_count[clk] = current_cycle_count; + } + } + preempt_enable(); + + return ret; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_jm_dump_wait( + struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_jm_dump_get( + struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + (struct kbase_hwcnt_backend_jm *)backend; + size_t clk; + + if (!backend_jm || !dst || !dst_enable_map || + (backend_jm->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Invalidate the kernel buffer before reading from it. */ + kbase_sync_mem_regions( + backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); + + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { + if (!kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + continue; + + /* Extract elapsed cycle count for each clock domain. */ + dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk]; + } + + return kbase_hwcnt_jm_dump_get(dst, backend_jm->cpu_dump_va, + dst_enable_map, backend_jm->pm_core_mask, + &backend_jm->curr_config, accumulate); +} + +/** + * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. + * @info: Non-NULL pointer to JM backend info. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address + * is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_dump_alloc( + const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_context *kctx, + u64 *gpu_dump_va) +{ + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + + WARN_ON(!info); + WARN_ON(!kctx); + WARN_ON(!gpu_dump_va); + + flags = BASE_MEM_PROT_CPU_RD | + BASE_MEM_PROT_GPU_WR | + BASEP_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU | + BASE_MEM_UNCACHED_GPU; + + nr_pages = PFN_UP(info->dump_bytes); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va); + + if (!reg) + return -ENOMEM; + + return 0; +} + +/** + * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: GPU dump buffer virtual address. + */ +static void kbasep_hwcnt_backend_jm_dump_free( + struct kbase_context *kctx, + u64 gpu_dump_va) +{ + WARN_ON(!kctx); + if (gpu_dump_va) + kbase_mem_free(kctx, gpu_dump_va); +} + +/** + * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. + * @backend: Pointer to JM backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_jm *backend) +{ + if (!backend) + return; + + if (backend->kctx) { + struct kbase_context *kctx = backend->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + if (backend->cpu_dump_va) + kbase_phy_alloc_mapping_put(kctx, backend->vmap); + + if (backend->gpu_dump_va) + kbasep_hwcnt_backend_jm_dump_free( + kctx, backend->gpu_dump_va); + + kbasep_js_release_privileged_ctx(kbdev, kctx); + kbase_destroy_context(kctx); + } + + kfree(backend); +} + +/** + * kbasep_hwcnt_backend_jm_create() - Create a JM backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_create( + const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_hwcnt_backend_jm **out_backend) +{ + int errcode; + struct kbase_device *kbdev; + struct kbase_hwcnt_backend_jm *backend = NULL; + + WARN_ON(!info); + WARN_ON(!out_backend); + + kbdev = info->kbdev; + + backend = kzalloc(sizeof(*backend), GFP_KERNEL); + if (!backend) + goto alloc_error; + + backend->info = info; + + backend->kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); + if (!backend->kctx) + goto alloc_error; + + kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + + errcode = kbasep_hwcnt_backend_jm_dump_alloc( + info, backend->kctx, &backend->gpu_dump_va); + if (errcode) + goto error; + + backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx, + backend->gpu_dump_va, &backend->vmap); + if (!backend->cpu_dump_va) + goto alloc_error; + + kbase_ccswe_init(&backend->ccswe_shader_cores); + backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; + + + *out_backend = backend; + return 0; + +alloc_error: + errcode = -ENOMEM; +error: + kbasep_hwcnt_backend_jm_destroy(backend); + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */ +static const struct kbase_hwcnt_metadata * +kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info) +{ + if (!info) + return NULL; + + return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata; +} + +/* JM backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_jm_init( + const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend = NULL; + + if (!info || !out_backend) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_create( + (const struct kbase_hwcnt_backend_jm_info *) info, &backend); + if (errcode) + return errcode; + + *out_backend = (struct kbase_hwcnt_backend *)backend; + + return 0; +} + +/* JM backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) +{ + if (!backend) + return; + + kbasep_hwcnt_backend_jm_dump_disable(backend); + kbasep_hwcnt_backend_jm_destroy( + (struct kbase_hwcnt_backend_jm *)backend); +} + +/** + * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_info_destroy( + const struct kbase_hwcnt_backend_jm_info *info) +{ + if (!info) + return; + + kbase_hwcnt_jm_metadata_destroy(info->metadata); + kfree(info); +} + +/** + * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. + * @kbdev: Non_NULL pointer to kbase device. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_info_create( + struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_jm_info **out_info) +{ + int errcode = -ENOMEM; + struct kbase_hwcnt_gpu_info hwcnt_gpu_info; + struct kbase_hwcnt_backend_jm_info *info = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_info); + + errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &hwcnt_gpu_info); + if (errcode) + return errcode; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto error; + + info->kbdev = kbdev; + +#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) + info->counter_set = KBASE_HWCNT_SET_SECONDARY; +#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) + info->counter_set = KBASE_HWCNT_SET_TERTIARY; +#else + /* Default to primary */ + info->counter_set = KBASE_HWCNT_SET_PRIMARY; +#endif + + errcode = kbase_hwcnt_jm_metadata_create(&hwcnt_gpu_info, + info->counter_set, + &info->metadata, + &info->dump_bytes); + if (errcode) + goto error; + + *out_info = info; + + return 0; +error: + kbasep_hwcnt_backend_jm_info_destroy(info); + return errcode; +} + +int kbase_hwcnt_backend_jm_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_jm_info *info = NULL; + + if (!kbdev || !iface) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); + + if (errcode) + return errcode; + + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->metadata = kbasep_hwcnt_backend_jm_metadata; + iface->init = kbasep_hwcnt_backend_jm_init; + iface->term = kbasep_hwcnt_backend_jm_term; + iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_jm_info_destroy( + (const struct kbase_hwcnt_backend_jm_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h new file mode 100644 index 0000000..5319516 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_jm.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * Concrete implementation of mali_kbase_hwcnt_backend interface for JM + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_JM_H_ +#define _KBASE_HWCNT_BACKEND_JM_H_ + +#include "mali_kbase_hwcnt_backend.h" + +struct kbase_device; + +/** + * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend + * interface. + * @kbdev: Non-NULL pointer to kbase device. + * @iface: Non-NULL pointer to backend interface structure that is filled in + * on creation success. + * + * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_jm_create( + struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_jm_destroy( + struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h index bc50ad1..1adf2ef 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -28,6 +27,7 @@ #define _KBASE_HWCNT_CONTEXT_H_ #include +#include struct kbase_hwcnt_backend_interface; struct kbase_hwcnt_context; @@ -66,7 +66,7 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata( /** * kbase_hwcnt_context_disable() - Increment the disable count of the context. - * @hctx: Pointer to the hardware counter context. + * @hctx: Non-NULL pointer to the hardware counter context. * * If a call to this function increments the disable count from 0 to 1, and * an accumulator has been acquired, then a counter dump will be performed @@ -84,7 +84,7 @@ void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the * context if possible in an atomic * context. - * @hctx: Pointer to the hardware counter context. + * @hctx: Non-NULL pointer to the hardware counter context. * * This function will only succeed if hardware counters are effectively already * disabled, i.e. there is no accumulator, the disable count is already @@ -99,7 +99,7 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); /** * kbase_hwcnt_context_enable() - Decrement the disable count of the context. - * @hctx: Pointer to the hardware counter context. + * @hctx: Non-NULL pointer to the hardware counter context. * * If a call to this function decrements the disable count from 1 to 0, and * an accumulator has been acquired, then counters will be re-enabled via the @@ -116,4 +116,36 @@ bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); */ void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); +/** + * kbase_hwcnt_context_queue_work() - Queue hardware counter related async + * work on a workqueue specialized for + * hardware counters. + * @hctx: Non-NULL pointer to the hardware counter context. + * @work: Non-NULL pointer to work to queue. + * + * Return: false if work was already on a queue, true otherwise. + * + * Performance counter related work is high priority, short running, and + * generally CPU locality is unimportant. There is no standard workqueue that + * can service this flavor of work. + * + * Rather than have each user of counters define their own workqueue, we have + * a centralized one in here that anybody using this hardware counter API + * should use. + * + * Before the context is destroyed, all work submitted must have been completed. + * Given that the work enqueued via this function is likely to be hardware + * counter related and will therefore use the context object, this is likely + * to be behavior that will occur naturally. + * + * Historical note: prior to this centralized workqueue, the system_highpri_wq + * was used. This was generally fine, except when a particularly long running, + * higher priority thread ended up scheduled on the enqueuing CPU core. Given + * that hardware counters requires tight integration with power management, + * this meant progress through the power management states could be stalled + * for however long that higher priority thread took. + */ +bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, + struct work_struct *work); + #endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c index 095c765..2975269 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,170 +17,111 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_hwcnt_gpu.h" #include "mali_kbase_hwcnt_types.h" -#include "mali_kbase.h" -#ifdef CONFIG_MALI_NO_MALI -#include "backend/gpu/mali_kbase_model_dummy.h" -#endif - -#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8 -#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4 -#define KBASE_HWCNT_V4_MAX_GROUPS \ - (KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP) -#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4 -#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \ - (KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK) -/* Index of the PRFCNT_EN header into a V4 counter block */ -#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2 - -#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 -#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 -#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \ - (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK) -/* Index of the PRFCNT_EN header into a V5 counter block */ -#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 - -/** - * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter - * metadata for a v4 GPU. - * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. - * @metadata: Non-NULL pointer to where created metadata is stored on success. - * - * Return: 0 on success, else error code. - */ -static int kbasep_hwcnt_backend_gpu_metadata_v4_create( - const struct kbase_hwcnt_gpu_v4_info *v4_info, - const struct kbase_hwcnt_metadata **metadata) -{ - size_t grp; - int errcode = -ENOMEM; - struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description *grps; - size_t avail_mask_bit; - WARN_ON(!v4_info); - WARN_ON(!metadata); +#include +#include - /* Check if there are enough bits in the availability mask to represent - * all the hardware counter blocks in the system. - */ - if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS) - return -EINVAL; - grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL); - if (!grps) - goto clean_up; - - desc.grp_cnt = v4_info->cg_count; - desc.grps = grps; - - for (grp = 0; grp < v4_info->cg_count; grp++) { - size_t blk; - size_t sc; - const u64 core_mask = v4_info->cgs[grp].core_mask; - struct kbase_hwcnt_block_description *blks = kcalloc( - KBASE_HWCNT_V4_BLOCKS_PER_GROUP, - sizeof(*blks), - GFP_KERNEL); - - if (!blks) - goto clean_up; - - grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4; - grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP; - grps[grp].blks = blks; - - for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) { - blks[blk].inst_cnt = 1; - blks[blk].hdr_cnt = - KBASE_HWCNT_V4_HEADERS_PER_BLOCK; - blks[blk].ctr_cnt = - KBASE_HWCNT_V4_COUNTERS_PER_BLOCK; +static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, + bool is_csf) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; + break; + case KBASE_HWCNT_SET_SECONDARY: + if (is_csf) { + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; + } else { + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; } - - for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) { - blks[sc].type = core_mask & (1ull << sc) ? - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER : - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; + break; + case KBASE_HWCNT_SET_TERTIARY: + if (is_csf) { + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; + } else { + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; } - - blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER; - blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2; - blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; - blks[7].type = (grp == 0) ? - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM : - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED; - - WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8); + break; + default: + WARN_ON(true); } +} - /* Initialise the availability mask */ - desc.avail_mask = 0; - avail_mask_bit = 0; - - for (grp = 0; grp < desc.grp_cnt; grp++) { - size_t blk; - const struct kbase_hwcnt_block_description *blks = - desc.grps[grp].blks; - for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) { - WARN_ON(blks[blk].inst_cnt != 1); - if (blks[blk].type != - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED) - desc.avail_mask |= (1ull << avail_mask_bit); - - avail_mask_bit++; - } +static void kbasep_get_tiler_block_type(u64 *dst, + enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + break; + case KBASE_HWCNT_SET_SECONDARY: + case KBASE_HWCNT_SET_TERTIARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + break; + default: + WARN_ON(true); } +} - errcode = kbase_hwcnt_metadata_create(&desc, metadata); - - /* Always clean up, as metadata will make a copy of the input args */ -clean_up: - if (grps) { - for (grp = 0; grp < v4_info->cg_count; grp++) - kfree(grps[grp].blks); - kfree(grps); +static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, + bool is_csf) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + break; + case KBASE_HWCNT_SET_SECONDARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; + break; + case KBASE_HWCNT_SET_TERTIARY: + if (is_csf) { + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; + } else { + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + } + break; + default: + WARN_ON(true); } - return errcode; } -/** - * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a - * V4 GPU. - * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU. - * - * Return: Size of buffer the V4 GPU needs to perform a counter dump. - */ -static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes( - const struct kbase_hwcnt_gpu_v4_info *v4_info) +static void kbasep_get_memsys_block_type(u64 *dst, + enum kbase_hwcnt_set counter_set) { - return v4_info->cg_count * - KBASE_HWCNT_V4_BLOCKS_PER_GROUP * - KBASE_HWCNT_V4_VALUES_PER_BLOCK * - KBASE_HWCNT_VALUE_BYTES; + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; + break; + case KBASE_HWCNT_SET_SECONDARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; + break; + case KBASE_HWCNT_SET_TERTIARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED; + break; + default: + WARN_ON(true); + } } /** - * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter - * metadata for a v5 GPU. - * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. - * @use_secondary: True if secondary performance counters should be used, else - * false. Ignored if secondary counters are not supported. + * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata + * for the GPU. + * @gpu_info: Non-NULL pointer to hwcnt info for current GPU. + * @is_csf: true for CSF GPU, otherwise false. + * @counter_set: The performance counter set to use. * @metadata: Non-NULL pointer to where created metadata is stored * on success. * * Return: 0 on success, else error code. */ -static int kbasep_hwcnt_backend_gpu_metadata_v5_create( - const struct kbase_hwcnt_gpu_v5_info *v5_info, - bool use_secondary, +static int kbasep_hwcnt_backend_gpu_metadata_create( + const struct kbase_hwcnt_gpu_info *gpu_info, const bool is_csf, + enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **metadata) { struct kbase_hwcnt_description desc; @@ -189,13 +131,13 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( size_t non_sc_block_count; size_t sc_block_count; - WARN_ON(!v5_info); + WARN_ON(!gpu_info); WARN_ON(!metadata); /* Calculate number of block instances that aren't shader cores */ - non_sc_block_count = 2 + v5_info->l2_count; + non_sc_block_count = 2 + gpu_info->l2_count; /* Calculate number of block instances that are shader cores */ - sc_block_count = fls64(v5_info->core_mask); + sc_block_count = fls64(gpu_info->core_mask); /* * A system can have up to 64 shader cores, but the 64-bit @@ -207,25 +149,26 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) return -EINVAL; - /* One Job Manager block */ - blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM; + /* One Front End block */ + kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); blks[0].inst_cnt = 1; blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* One Tiler block */ - blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + kbasep_get_tiler_block_type(&blks[1].type, counter_set); blks[1].inst_cnt = 1; blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* l2_count memsys blks */ - blks[2].type = use_secondary ? - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 : - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; - blks[2].inst_cnt = v5_info->l2_count; + kbasep_get_memsys_block_type(&blks[2].type, counter_set); + blks[2].inst_cnt = gpu_info->l2_count; blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK; /* * There are as many shader cores in the system as there are bits set in @@ -243,12 +186,11 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( * requirements, and embed the core mask into the availability mask so * we can determine later which shader cores physically exist. */ - blks[3].type = use_secondary ? - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 : - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); blks[3].inst_cnt = sc_block_count; blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK; + blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK; WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); @@ -258,61 +200,35 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create( desc.grp_cnt = 1; desc.grps = &group; + desc.clk_cnt = gpu_info->clk_cnt; /* The JM, Tiler, and L2s are always available, and are before cores */ desc.avail_mask = (1ull << non_sc_block_count) - 1; /* Embed the core mask directly in the availability mask */ - desc.avail_mask |= (v5_info->core_mask << non_sc_block_count); + desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); return kbase_hwcnt_metadata_create(&desc, metadata); } /** - * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a - * V5 GPU. - * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU. + * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the + * GPU. + * @gpu_info: Non-NULL pointer to hwcnt info for the GPU. * - * Return: Size of buffer the V5 GPU needs to perform a counter dump. + * Return: Size of buffer the GPU needs to perform a counter dump. */ -static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes( - const struct kbase_hwcnt_gpu_v5_info *v5_info) +static size_t +kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) { - WARN_ON(!v5_info); - return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) * - KBASE_HWCNT_V5_VALUES_PER_BLOCK * - KBASE_HWCNT_VALUE_BYTES; -} + WARN_ON(!gpu_info); -int kbase_hwcnt_gpu_info_init( - struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info) -{ - if (!kbdev || !info) - return -EINVAL; - -#ifdef CONFIG_MALI_NO_MALI - /* NO_MALI uses V5 layout, regardless of the underlying platform. */ - info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; -#else - { - const struct base_gpu_props *props = &kbdev->gpu_props.props; - const size_t l2_count = props->l2_props.num_l2_slices; - const size_t core_mask = - props->coherency_info.group[0].core_mask; - - info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - info->v5.l2_count = l2_count; - info->v5.core_mask = core_mask; - } -#endif - return 0; + return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * + gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_BYTES; } -int kbase_hwcnt_gpu_metadata_create( - const struct kbase_hwcnt_gpu_info *info, - bool use_secondary, +int kbase_hwcnt_jm_metadata_create( + const struct kbase_hwcnt_gpu_info *gpu_info, + enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata, size_t *out_dump_bytes) { @@ -320,23 +236,19 @@ int kbase_hwcnt_gpu_metadata_create( const struct kbase_hwcnt_metadata *metadata; size_t dump_bytes; - if (!info || !out_metadata || !out_dump_bytes) + if (!gpu_info || !out_metadata || !out_dump_bytes) return -EINVAL; - switch (info->type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4); - errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create( - &info->v4, &metadata); - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5); - errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create( - &info->v5, use_secondary, &metadata); - break; - default: - return -EINVAL; - } + /* + * For architectures where a max_config interface is available + * from the arbiter, the v5 dump bytes and the metadata v5 are + * based on the maximum possible allocation of the HW in the + * GPU cause it needs to be prepared for the worst case where + * all the available L2 cache and Shader cores are allocated. + */ + dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); + errcode = kbasep_hwcnt_backend_gpu_metadata_create( + gpu_info, false, counter_set, &metadata); if (errcode) return errcode; @@ -351,9 +263,37 @@ int kbase_hwcnt_gpu_metadata_create( return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create); -void kbase_hwcnt_gpu_metadata_destroy( +void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} + +int kbase_hwcnt_csf_metadata_create( + const struct kbase_hwcnt_gpu_info *gpu_info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + + if (!gpu_info || !out_metadata) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_gpu_metadata_create( + gpu_info, true, counter_set, &metadata); + if (errcode) + return errcode; + + *out_metadata = metadata; + + return 0; +} + +void kbase_hwcnt_csf_metadata_destroy( const struct kbase_hwcnt_metadata *metadata) { if (!metadata) @@ -361,7 +301,127 @@ void kbase_hwcnt_gpu_metadata_destroy( kbase_hwcnt_metadata_destroy(metadata); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy); + +int kbase_hwcnt_gpu_metadata_create_truncate_64( + const struct kbase_hwcnt_metadata **dst_md, + const struct kbase_hwcnt_metadata *src_md) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description + blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t prfcnt_values_per_block; + size_t blk; + + if (!dst_md || !src_md || !src_md->grp_metadata || + !src_md->grp_metadata[0].blk_metadata) + return -EINVAL; + + /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block + * count in the metadata. + */ + if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || + (kbase_hwcnt_metadata_block_count(src_md, 0) != + KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) + return -EINVAL; + + /* Get the values count in the first block. */ + prfcnt_values_per_block = + kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); + + /* check all blocks should have same values count. */ + for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { + size_t val_cnt = + kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); + if (val_cnt != prfcnt_values_per_block) + return -EINVAL; + } + + /* Only support 64 and 128 entries per block. */ + if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) + return -EINVAL; + + if (prfcnt_values_per_block == 64) { + /* If the values per block is 64, no need to truncate. */ + *dst_md = NULL; + return 0; + } + + /* Truncate from 128 to 64 entries per block to keep API backward + * compatibility. + */ + prfcnt_values_per_block = 64; + + for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { + blks[blk].type = + kbase_hwcnt_metadata_block_type(src_md, 0, blk); + blks[blk].inst_cnt = kbase_hwcnt_metadata_block_instance_count( + src_md, 0, blk); + blks[blk].hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + src_md, 0, blk); + blks[blk].ctr_cnt = prfcnt_values_per_block - blks[blk].hdr_cnt; + } + + group.type = kbase_hwcnt_metadata_group_type(src_md, 0); + group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; + group.blks = blks; + + desc.grp_cnt = kbase_hwcnt_metadata_group_count(src_md); + desc.avail_mask = src_md->avail_mask; + desc.clk_cnt = src_md->clk_cnt; + desc.grps = &group; + + return kbase_hwcnt_metadata_create(&desc, dst_md); +} + +void kbase_hwcnt_dump_buffer_copy_strict_narrow( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || + WARN_ON(dst == src) || WARN_ON(dst->metadata == src->metadata) || + WARN_ON(dst->metadata->grp_cnt != src->metadata->grp_cnt) || + WARN_ON(src->metadata->grp_cnt != 1) || + WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != + src->metadata->grp_metadata[0].blk_cnt) || + WARN_ON(dst->metadata->grp_metadata[0].blk_cnt != 4) || + WARN_ON(dst->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > + src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) + return; + + /* Don't use src metadata since src buffer is bigger than dst buffer. */ + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count( + metadata, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, + blk_em, val_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk); + + dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; + } +} static bool is_block_type_shader( const u64 grp_type, @@ -370,44 +430,53 @@ static bool is_block_type_shader( { bool is_shader = false; + /* Warn on unknown group type */ + if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) + return false; + + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3) + is_shader = true; + + return is_shader; +} + +static bool is_block_type_l2_cache( + const u64 grp_type, + const u64 blk_type) +{ + bool is_l2_cache = false; + switch (grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - /* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1] - * corresponds to a shader, or its implementation - * reserved. As such, here we use the blk index value to - * tell the reserved case. - */ - if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER || - (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP && - blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)) - is_shader = true; - break; case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2) - is_shader = true; + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2) + is_l2_cache = true; break; default: /* Warn on unknown group type */ WARN_ON(true); } - return is_shader; + return is_l2_cache; } -int kbase_hwcnt_gpu_dump_get( - struct kbase_hwcnt_dump_buffer *dst, - void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - u64 pm_core_mask, - bool accumulate) +int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u32 *dump_src; size_t src_offset, grp, blk, blk_inst; - size_t grp_prev = 0; u64 core_mask = pm_core_mask; + /* Variables to deal with the current configuration */ + int l2_count = 0; + bool hw_res_available = true; + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; @@ -429,27 +498,43 @@ int kbase_hwcnt_gpu_dump_get( const bool is_shader_core = is_block_type_shader( kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); + const bool is_l2_cache = is_block_type_l2_cache( + kbase_hwcnt_metadata_group_type(metadata, grp), + blk_type); - if (grp != grp_prev) { - /* grp change would only happen with V4. V5 and - * further are envisaged to be single group - * scenario only. Here needs to drop the lower - * group core-mask by shifting right with - * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP. - */ - core_mask = pm_core_mask >> - KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; - grp_prev = grp; + /* + * If l2 blocks is greater than the current allocated number of + * L2 slices, there is no hw allocated to that block. + */ + if (is_l2_cache) { + l2_count++; + if (l2_count > curr_config->num_l2_slices) + hw_res_available = false; + else + hw_res_available = true; + } + /* + * For the shader cores, the current shader_mask allocated is + * always a subgroup of the maximum shader_mask, so after + * jumping any L2 cache not available the available shader cores + * will always have a matching set of blk instances available to + * accumulate them. + */ + else { + hw_res_available = true; } - /* Early out if no values in the dest block are enabled */ + /* + * Early out if no values in the dest block are enabled or if + * the resource target of the block is not available in the HW. + */ if (kbase_hwcnt_enable_map_block_enabled( dst_enable_map, grp, blk, blk_inst)) { u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( dst, grp, blk, blk_inst); const u32 *src_blk = dump_src + src_offset; - if (!is_shader_core || (core_mask & 1)) { + if ((!is_shader_core || (core_mask & 1)) && hw_res_available) { if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, @@ -465,14 +550,60 @@ int kbase_hwcnt_gpu_dump_get( } } - src_offset += (hdr_cnt + ctr_cnt); + /* Just increase the src_offset if the HW is available */ + if (hw_res_available) + src_offset += (hdr_cnt + ctr_cnt); if (is_shader_core) core_mask = core_mask >> 1; } return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get); + +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + const u32 *dump_src; + size_t src_offset, grp, blk, blk_inst; + + if (!dst || !src || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + dump_src = (const u32 *)src; + src_offset = 0; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count( + metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count(metadata, grp, + blk); + + /* Early out if no values in the dest block are enabled */ + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, + blk, blk_inst)) { + u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance( + dst, grp, blk, blk_inst); + const u32 *src_blk = dump_src + src_offset; + + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + } + } + + src_offset += (hdr_cnt + ctr_cnt); + } + + return 0; +} /** * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block @@ -563,7 +694,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( { const struct kbase_hwcnt_metadata *metadata; - u64 jm_bm = 0; + u64 fe_bm = 0; u64 shader_bm = 0; u64 tiler_bm = 0; u64 mmu_l2_bm = 0; @@ -581,45 +712,26 @@ void kbase_hwcnt_gpu_enable_map_to_physical( metadata, grp); const u64 blk_type = kbase_hwcnt_metadata_block_type( metadata, grp, blk); - const size_t blk_val_cnt = - kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); const u64 *blk_map = kbase_hwcnt_enable_map_block_instance( src, grp, blk, blk_inst); - switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); - switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: - shader_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: - tiler_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: - mmu_l2_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: - jm_bm |= *blk_map; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: - break; - default: - WARN_ON(true); - } - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + if ((enum kbase_hwcnt_gpu_group_type)grp_type == + KBASE_HWCNT_GPU_GROUP_TYPE_V5) { switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: - jm_bm |= *blk_map; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + fe_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: tiler_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: shader_bm |= *blk_map; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: @@ -629,14 +741,13 @@ void kbase_hwcnt_gpu_enable_map_to_physical( default: WARN_ON(true); } - break; - default: + } else { WARN_ON(true); } } - dst->jm_bm = - kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0); + dst->fe_bm = + kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0); dst->shader_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0); dst->tiler_bm = @@ -644,7 +755,24 @@ void kbase_hwcnt_gpu_enable_map_to_physical( dst->mmu_l2_bm = kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical); + +void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, + enum kbase_hwcnt_set src) +{ + switch (src) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; + break; + case KBASE_HWCNT_SET_SECONDARY: + *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; + break; + case KBASE_HWCNT_SET_TERTIARY: + *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; + break; + default: + WARN_ON(true); + } +} void kbase_hwcnt_gpu_enable_map_from_physical( struct kbase_hwcnt_enable_map *dst, @@ -653,7 +781,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( const struct kbase_hwcnt_metadata *metadata; u64 ignored_hi; - u64 jm_bm; + u64 fe_bm; u64 shader_bm; u64 tiler_bm; u64 mmu_l2_bm; @@ -665,7 +793,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( metadata = dst->metadata; kbasep_hwcnt_backend_gpu_block_map_from_physical( - src->jm_bm, &jm_bm, &ignored_hi); + src->fe_bm, &fe_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( src->shader_bm, &shader_bm, &ignored_hi); kbasep_hwcnt_backend_gpu_block_map_from_physical( @@ -678,45 +806,26 @@ void kbase_hwcnt_gpu_enable_map_from_physical( metadata, grp); const u64 blk_type = kbase_hwcnt_metadata_block_type( metadata, grp, blk); - const size_t blk_val_cnt = - kbase_hwcnt_metadata_block_values_count( - metadata, grp, blk); u64 *blk_map = kbase_hwcnt_enable_map_block_instance( dst, grp, blk, blk_inst); - switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK); - switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) { - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: - *blk_map = shader_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: - *blk_map = tiler_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: - *blk_map = mmu_l2_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: - *blk_map = jm_bm; - break; - case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: - break; - default: - WARN_ON(true); - } - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK); + if ((enum kbase_hwcnt_gpu_group_type)grp_type == + KBASE_HWCNT_GPU_GROUP_TYPE_V5) { switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: - *blk_map = jm_bm; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + *blk_map = fe_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: *blk_map = tiler_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: *blk_map = shader_bm; break; case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: @@ -726,13 +835,11 @@ void kbase_hwcnt_gpu_enable_map_from_physical( default: WARN_ON(true); } - break; - default: + } else { WARN_ON(true); } } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical); void kbase_hwcnt_gpu_patch_dump_headers( struct kbase_hwcnt_dump_buffer *buf, @@ -758,16 +865,11 @@ void kbase_hwcnt_gpu_patch_dump_headers( kbasep_hwcnt_backend_gpu_block_map_to_physical( blk_map[0], 0); - switch ((enum kbase_hwcnt_gpu_group_type)grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en; - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if ((enum kbase_hwcnt_gpu_group_type)grp_type == + KBASE_HWCNT_GPU_GROUP_TYPE_V5) { buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; - break; - default: + } else { WARN_ON(true); } } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h index 12891e0..50ae80d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_HWCNT_GPU_H_ @@ -30,125 +29,157 @@ struct kbase_hwcnt_metadata; struct kbase_hwcnt_enable_map; struct kbase_hwcnt_dump_buffer; +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + \ + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) +/** Index of the PRFCNT_EN header into a V5 counter block */ +#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 + /** * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to * identify metadata groups. - * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type. * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. */ enum kbase_hwcnt_gpu_group_type { - KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10, KBASE_HWCNT_GPU_GROUP_TYPE_V5, }; -/** - * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types, - * used to identify metadata blocks. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER: Shader block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER: Tiler block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2: MMU/L2 block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM: Job Manager block. - * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block. - */ -enum kbase_hwcnt_gpu_v4_block_type { - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM, - KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED, -}; - /** * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, * used to identify metadata blocks. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM: Job Manager block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. - * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a + * counter set that a block + * doesn't support is used). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager + * or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job + * manager or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job + * manager or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. */ enum kbase_hwcnt_gpu_v5_block_type { - KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, }; +/** + * enum kbase_hwcnt_set - GPU hardware counter sets + * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters + * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters + * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters + */ +enum kbase_hwcnt_set { + KBASE_HWCNT_SET_PRIMARY, + KBASE_HWCNT_SET_SECONDARY, + KBASE_HWCNT_SET_TERTIARY, +}; + /** * struct kbase_hwcnt_physical_enable_map - Representation of enable map * directly used by GPU. - * @jm_bm: Job Manager counters selection bitmask. + * @fe_bm: Front end (JM/CSHW) counters selection bitmask. * @shader_bm: Shader counters selection bitmask. * @tiler_bm: Tiler counters selection bitmask. * @mmu_l2_bm: MMU_L2 counters selection bitmask. */ struct kbase_hwcnt_physical_enable_map { - u32 jm_bm; + u32 fe_bm; u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; }; -/** - * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs. - * @cg_count: Core group count. - * @cgs: Non-NULL pointer to array of cg_count coherent group structures. - * - * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups, - * where each core group may have a physically different layout. +/* + * Values for Hardware Counter SET_SELECT value. + * Directly passed to HW. */ -struct kbase_hwcnt_gpu_v4_info { - size_t cg_count; - const struct mali_base_gpu_coherent_group *cgs; +enum kbase_hwcnt_physical_set { + KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0, + KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1, + KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2, }; /** - * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs. - * @l2_count: L2 cache count. - * @core_mask: Shader core mask. May be sparse. + * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. + * @l2_count: L2 cache count. + * @core_mask: Shader core mask. May be sparse. + * @clk_cnt: Number of clock domains available. + * @prfcnt_values_per_block: Total entries (header + counters) of performance + * counter per block. */ -struct kbase_hwcnt_gpu_v5_info { +struct kbase_hwcnt_gpu_info { size_t l2_count; u64 core_mask; + u8 clk_cnt; + size_t prfcnt_values_per_block; }; /** - * struct kbase_hwcnt_gpu_info - Tagged union with information about the current - * GPU's hwcnt blocks. - * @type: GPU type. - * @v4: Info filled in if a v4 GPU. - * @v5: Info filled in if a v5 GPU. - */ -struct kbase_hwcnt_gpu_info { - enum kbase_hwcnt_gpu_group_type type; - union { - struct kbase_hwcnt_gpu_v4_info v4; - struct kbase_hwcnt_gpu_v5_info v5; - }; -}; - -/** - * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the - * hwcnt metadata. - * @kbdev: Non-NULL pointer to kbase device. - * @info: Non-NULL pointer to data structure to be filled in. + * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the + * GPU. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. * - * The initialised info struct will only be valid for use while kbdev is valid. + * For architectures with the max_config interface available from the Arbiter, + * the current resources allocated may change during runtime due to a + * re-partitioning (possible with partition manager). Thus, the HWC needs to be + * prepared to report any possible set of counters. For this reason the memory + * layout in the userspace is based on the maximum possible allocation. On the + * other hand, each partition has just the view of its currently allocated + * resources. Therefore, it is necessary to correctly map the dumped HWC values + * from the registers into this maximum memory layout so that it can be exposed + * to the userspace side correctly. + * + * For L2 cache just the number is enough once the allocated ones will be + * accumulated on the first L2 slots available in the destination buffer. + * + * For the correct mapping of the shader cores it is necessary to jump all the + * L2 cache slots in the destination buffer that are not allocated. But, it is + * not necessary to add any logic to map the shader cores bitmap into the memory + * layout because the shader_present allocated will always be a subset of the + * maximum shader_present. It is possible because: + * 1 - Partitions are made of slices and they are always ordered from the ones + * with more shader cores to the ones with less. + * 2 - The shader cores in a slice are always contiguous. + * 3 - A partition can only have a contiguous set of slices allocated to it. + * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with + * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: + * 0x0011|0111|0111|1111 -> note the order and that the shader cores are + * contiguous in any slice. + * Supposing that a partition takes the two slices in the middle, the current + * config shader_present for this partition would be: + * 0x0111|0111 -> note that this is a subset of the maximum above and the slices + * are contiguous. + * Therefore, by directly copying any subset of the maximum possible + * shader_present the mapping is already achieved. */ -int kbase_hwcnt_gpu_info_init( - struct kbase_device *kbdev, - struct kbase_hwcnt_gpu_info *info); +struct kbase_hwcnt_curr_config { + size_t num_l2_slices; + u64 shader_present; +}; /** - * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the - * current GPU. - * @info: Non-NULL pointer to info struct initialised by - * kbase_hwcnt_gpu_info_init. - * @use_secondary: True if secondary performance counters should be used, else - * false. Ignored if secondary counters are not supported. + * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the + * JM GPUs. + * @info: Non-NULL pointer to info struct. + * @counter_set: The performance counter set used. * @out_metadata: Non-NULL pointer to where created metadata is stored on * success. * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump @@ -156,44 +187,133 @@ int kbase_hwcnt_gpu_info_init( * * Return: 0 on success, else error code. */ -int kbase_hwcnt_gpu_metadata_create( +int kbase_hwcnt_jm_metadata_create( const struct kbase_hwcnt_gpu_info *info, - bool use_secondary, + enum kbase_hwcnt_set counter_set, const struct kbase_hwcnt_metadata **out_metadata, size_t *out_dump_bytes); /** - * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata. + * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. + * + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_jm_metadata_destroy( + const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the + * CSF GPUs. + * @info: Non-NULL pointer to info struct. + * @counter_set: The performance counter set used. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_csf_metadata_create( + const struct kbase_hwcnt_gpu_info *info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata); + +/** + * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter + * metadata. * @metadata: Pointer to metadata to destroy. */ -void kbase_hwcnt_gpu_metadata_destroy( +void kbase_hwcnt_csf_metadata_destroy( const struct kbase_hwcnt_metadata *metadata); /** - * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw + * kbase_hwcnt_gpu_metadata_create_truncate_64() - Create HWC metadata with HWC + * block entries truncated + * to 64. + * + * @dst_md: Non-NULL pointer to where created metadata is stored on success. + * @src_md: Non-NULL pointer to the HWC metadata used as the source to create + * dst_md. + * + * If the total block entries in src_md is 64, metadata dst_md returns NULL + * since no need to truncate. + * if the total block entries in src_md is 128, then a new metadata with block + * entries truncated to 64 will be created for dst_md, which keeps the interface + * to user clients backward compatible. + * If the total block entries in src_md is other values, function returns error + * since it's not supported. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_metadata_create_truncate_64( + const struct kbase_hwcnt_metadata **dst_md, + const struct kbase_hwcnt_metadata *src_md); + +/** + * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values from + * src to dst. + * + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * After the operation, all non-enabled values (including padding bytes) will be + * zero. + * + * The dst and src have different metadata, and the dst metadata is narrower + * than src metadata. + */ +void kbase_hwcnt_dump_buffer_copy_strict_narrow( + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src raw dump buffer, of same length + * as returned in out_dump_bytes parameter of + * kbase_hwcnt_jm_metadata_create. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @curr_config: Current allocated hardware resources to correctly map the + * src raw dump buffer to the dst dump buffer. + * @accumulate: True if counters in src should be accumulated into dst, + * rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + const u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate); + +/** + * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. * @dst: Non-NULL pointer to dst dump buffer. * @src: Non-NULL pointer to src raw dump buffer, of same length * as returned in out_dump_bytes parameter of - * kbase_hwcnt_gpu_metadata_create. + * kbase_hwcnt_csf_metadata_create. * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @pm_core_mask: PM state synchronized shaders core mask with the dump. * @accumulate: True if counters in src should be accumulated into dst, * rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as - * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get + * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get * the length of src. * * Return: 0 on success, else error code. */ -int kbase_hwcnt_gpu_dump_get( - struct kbase_hwcnt_dump_buffer *dst, - void *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, - const u64 pm_core_mask, - bool accumulate); +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, void *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate); /** * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction @@ -202,7 +322,7 @@ int kbase_hwcnt_gpu_dump_get( * @src: Non-NULL pointer to src enable map abstraction. * * The src must have been created from a metadata returned from a call to - * kbase_hwcnt_gpu_metadata_create. + * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. * * This is a lossy conversion, as the enable map abstraction has one bit per * individual counter block value, but the physical enable map uses 1 bit for @@ -212,6 +332,16 @@ void kbase_hwcnt_gpu_enable_map_to_physical( struct kbase_hwcnt_physical_enable_map *dst, const struct kbase_hwcnt_enable_map *src); +/** + * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical + * SET_SELECT value. + * + * @dst: Non-NULL pointer to dst physical SET_SELECT value. + * @src: Non-NULL pointer to src counter set selection. + */ +void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, + enum kbase_hwcnt_set src); + /** * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to * an enable map abstraction. @@ -219,7 +349,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical( * @src: Non-NULL pointer to src physical enable map. * * The dst must have been created from a metadata returned from a call to - * kbase_hwcnt_gpu_metadata_create. + * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. * * This is a lossy conversion, as the physical enable map can technically * support counter blocks with 128 counters each, but no hardware actually uses @@ -238,7 +368,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical( * @enable_map: Non-NULL pointer to enable map. * * The buf and enable_map must have been created from a metadata returned from - * a call to kbase_hwcnt_gpu_metadata_create. + * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. * * This function should be used before handing off a dump buffer over the * kernel-user boundary, to ensure the header is accurate for the enable map diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c index b0e6aee..0687253 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,13 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_hwcnt_legacy.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" #include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" +#include #include #include @@ -69,7 +68,7 @@ int kbase_hwcnt_legacy_client_create( goto error; /* Translate from the ioctl enable map to the internal one */ - phys_em.jm_bm = enable->jm_bm; + phys_em.fe_bm = enable->fe_bm; phys_em.shader_bm = enable->shader_bm; phys_em.tiler_bm = enable->tiler_bm; phys_em.mmu_l2_bm = enable->mmu_l2_bm; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h index 7a610ae..36ff44d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c index 1e9efde..492f572 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,26 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_hwcnt_types.h" -#include "mali_kbase.h" - -/* Minimum alignment of each block of hardware counters */ -#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ - (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) -/** - * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment. - * @value: The value to align upwards. - * @alignment: The alignment. - * - * Return: A number greater than or equal to value that is aligned to alignment. - */ -#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ - (value + ((alignment - (value % alignment)) % alignment)) +#include int kbase_hwcnt_metadata_create( const struct kbase_hwcnt_description *desc, @@ -55,6 +41,10 @@ int kbase_hwcnt_metadata_create( if (!desc || !out_metadata) return -EINVAL; + /* The maximum number of clock domains is 64. */ + if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) + return -EINVAL; + /* Calculate the bytes needed to tightly pack the metadata */ /* Top level metadata */ @@ -158,6 +148,7 @@ int kbase_hwcnt_metadata_create( enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; metadata->avail_mask = desc->avail_mask; + metadata->clk_cnt = desc->clk_cnt; WARN_ON(size != offset); /* Due to the block alignment, there should be exactly one enable map @@ -170,13 +161,11 @@ int kbase_hwcnt_metadata_create( *out_metadata = metadata; return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create); void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) { kfree(metadata); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy); int kbase_hwcnt_enable_map_alloc( const struct kbase_hwcnt_metadata *metadata, @@ -187,45 +176,55 @@ int kbase_hwcnt_enable_map_alloc( if (!metadata || !enable_map) return -EINVAL; - enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); - if (!enable_map_buf) - return -ENOMEM; + if (metadata->enable_map_bytes > 0) { + enable_map_buf = + kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + } else { + enable_map_buf = NULL; + } enable_map->metadata = metadata; - enable_map->enable_map = enable_map_buf; + enable_map->hwcnt_enable_map = enable_map_buf; return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc); void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) { if (!enable_map) return; - kfree(enable_map->enable_map); - enable_map->enable_map = NULL; + kfree(enable_map->hwcnt_enable_map); + enable_map->hwcnt_enable_map = NULL; enable_map->metadata = NULL; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free); int kbase_hwcnt_dump_buffer_alloc( const struct kbase_hwcnt_metadata *metadata, struct kbase_hwcnt_dump_buffer *dump_buf) { - u32 *buf; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + u8 *buf; if (!metadata || !dump_buf) return -EINVAL; - buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL); + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + + /* Make a single allocation for both dump_buf and clk_cnt_buf. */ + buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); if (!buf) return -ENOMEM; dump_buf->metadata = metadata; - dump_buf->dump_buf = buf; + dump_buf->dump_buf = (u32 *)buf; + dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc); void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) { @@ -235,7 +234,6 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) kfree(dump_buf->dump_buf); memset(dump_buf, 0, sizeof(*dump_buf)); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free); int kbase_hwcnt_dump_buffer_array_alloc( const struct kbase_hwcnt_metadata *metadata, @@ -246,10 +244,16 @@ int kbase_hwcnt_dump_buffer_array_alloc( size_t buf_idx; unsigned int order; unsigned long addr; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; if (!metadata || !dump_bufs) return -EINVAL; + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = + sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; + /* Allocate memory for the dump buffer struct array */ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); if (!buffers) @@ -258,8 +262,8 @@ int kbase_hwcnt_dump_buffer_array_alloc( /* Allocate pages for the actual dump buffers, as they tend to be fairly * large. */ - order = get_order(metadata->dump_buf_bytes * n); - addr = __get_free_pages(GFP_KERNEL, order); + order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); + addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!addr) { kfree(buffers); @@ -273,15 +277,18 @@ int kbase_hwcnt_dump_buffer_array_alloc( /* Set the buffer of each dump buf */ for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t offset = metadata->dump_buf_bytes * buf_idx; + const size_t dump_buf_offset = dump_buf_bytes * buf_idx; + const size_t clk_cnt_buf_offset = + (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); buffers[buf_idx].metadata = metadata; - buffers[buf_idx].dump_buf = (u32 *)(addr + offset); + buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset); + buffers[buf_idx].clk_cnt_buf = + (u64 *)(addr + clk_cnt_buf_offset); } return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc); void kbase_hwcnt_dump_buffer_array_free( struct kbase_hwcnt_dump_buffer_array *dump_bufs) @@ -293,7 +300,6 @@ void kbase_hwcnt_dump_buffer_array_free( free_pages(dump_bufs->page_addr, dump_bufs->page_order); memset(dump_bufs, 0, sizeof(*dump_bufs)); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free); void kbase_hwcnt_dump_buffer_zero( struct kbase_hwcnt_dump_buffer *dst, @@ -324,8 +330,10 @@ void kbase_hwcnt_dump_buffer_zero( kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); } + + memset(dst->clk_cnt_buf, 0, + sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero); void kbase_hwcnt_dump_buffer_zero_strict( struct kbase_hwcnt_dump_buffer *dst) @@ -334,8 +342,10 @@ void kbase_hwcnt_dump_buffer_zero_strict( return; memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); + + memset(dst->clk_cnt_buf, 0, + sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict); void kbase_hwcnt_dump_buffer_zero_non_enabled( struct kbase_hwcnt_dump_buffer *dst, @@ -375,7 +385,6 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled( } } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled); void kbase_hwcnt_dump_buffer_copy( struct kbase_hwcnt_dump_buffer *dst, @@ -384,6 +393,7 @@ void kbase_hwcnt_dump_buffer_copy( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -413,8 +423,13 @@ void kbase_hwcnt_dump_buffer_copy( kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; + } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy); void kbase_hwcnt_dump_buffer_copy_strict( struct kbase_hwcnt_dump_buffer *dst, @@ -423,6 +438,7 @@ void kbase_hwcnt_dump_buffer_copy_strict( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -451,8 +467,15 @@ void kbase_hwcnt_dump_buffer_copy_strict( kbase_hwcnt_dump_buffer_block_copy_strict( dst_blk, src_blk, blk_em, val_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + bool clk_enabled = + kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk); + + dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; + } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict); void kbase_hwcnt_dump_buffer_accumulate( struct kbase_hwcnt_dump_buffer *dst, @@ -461,6 +484,7 @@ void kbase_hwcnt_dump_buffer_accumulate( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -494,8 +518,13 @@ void kbase_hwcnt_dump_buffer_accumulate( kbase_hwcnt_dump_buffer_block_accumulate( dst_blk, src_blk, hdr_cnt, ctr_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate); void kbase_hwcnt_dump_buffer_accumulate_strict( struct kbase_hwcnt_dump_buffer *dst, @@ -504,6 +533,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( { const struct kbase_hwcnt_metadata *metadata; size_t grp, blk, blk_inst; + size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || @@ -534,5 +564,12 @@ void kbase_hwcnt_dump_buffer_accumulate_strict( kbase_hwcnt_dump_buffer_block_accumulate_strict( dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { + if (kbase_hwcnt_clk_enable_map_enabled( + dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + else + dst->clk_cnt_buf[clk] = 0; + } } -KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h index 4d78c84..da12952 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -85,7 +84,6 @@ #include #include #include -#include "mali_malisw.h" /* Number of bytes in each bitfield */ #define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) @@ -101,6 +99,21 @@ */ #define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) +/* Minimum alignment of each block of hardware counters */ +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \ + (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +/** + * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value. + * @value: The value to align upwards. + * @alignment: The alignment boundary. + * + * Return: Input value if already aligned to the specified boundary, or next + * (incrementing upwards) aligned value. + */ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ + (value + ((alignment - (value % alignment)) % alignment)) + /** * struct kbase_hwcnt_block_description - Description of one or more identical, * contiguous, Hardware Counter Blocks. @@ -136,11 +149,13 @@ struct kbase_hwcnt_group_description { * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, * describing each Hardware Counter Group in the system. * @avail_mask: Flat Availability Mask for all block instances in the system. + * @clk_cnt: The number of clock domains in the system. The maximum is 64. */ struct kbase_hwcnt_description { size_t grp_cnt; const struct kbase_hwcnt_group_description *grps; u64 avail_mask; + u8 clk_cnt; }; /** @@ -220,6 +235,7 @@ struct kbase_hwcnt_group_metadata { * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. * @avail_mask: The Availability Mask for the system. + * @clk_cnt: The number of clock domains in the system. */ struct kbase_hwcnt_metadata { size_t grp_cnt; @@ -227,6 +243,7 @@ struct kbase_hwcnt_metadata { size_t enable_map_bytes; size_t dump_buf_bytes; u64 avail_mask; + u8 clk_cnt; }; /** @@ -234,13 +251,16 @@ struct kbase_hwcnt_metadata { * bitfields. * @metadata: Non-NULL pointer to metadata used to identify, and to describe * the layout of the enable map. - * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array - * of u64 bitfields, each bit of which enables one hardware + * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an + * array of u64 bitfields, each bit of which enables one hardware * counter. + * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * counter for a given clock domain. */ struct kbase_hwcnt_enable_map { const struct kbase_hwcnt_metadata *metadata; - u64 *enable_map; + u64 *hwcnt_enable_map; + u64 clk_enable_map; }; /** @@ -250,10 +270,13 @@ struct kbase_hwcnt_enable_map { * the layout of the Dump Buffer. * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array * of u32 values. + * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed + * for each clock domain. */ struct kbase_hwcnt_dump_buffer { const struct kbase_hwcnt_metadata *metadata; u32 *dump_buf; + u64 *clk_cnt_buf; }; /** @@ -349,8 +372,7 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); * @grp: Index of the group in the metadata. * @blk: Index of the block in the group. * - * Return: Number of u32 counter headers in each instance of block blk in - * group grp. + * Return: Number of counter headers in each instance of block blk in group grp. */ #define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt) @@ -361,19 +383,29 @@ void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); * @grp: Index of the group in the metadata. * @blk: Index of the block in the group. * - * Return: Number of u32 counters in each instance of block blk in group - * grp. + * Return: Number of counters in each instance of block blk in group grp. */ #define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \ ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt) +/** + * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: enable map stride in each instance of block blk in group grp. + */ +#define kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk) \ + ((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride) + /** * kbase_hwcnt_metadata_block_values_count() - Get the number of values. * @metadata: Non-NULL pointer to metadata. * @grp: Index of the group in the metadata. * @blk: Index of the block in the group. * - * Return: Number of u32 headers plus counters in each instance of block blk + * Return: Number of headers plus counters in each instance of block blk * in group grp. */ #define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \ @@ -473,7 +505,7 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); * block instance. */ #define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \ - ((map)->enable_map + \ + ((map)->hwcnt_enable_map + \ (map)->metadata->grp_metadata[(grp)].enable_map_index + \ (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \ (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst)) @@ -520,7 +552,11 @@ static inline void kbase_hwcnt_enable_map_block_disable_all( static inline void kbase_hwcnt_enable_map_disable_all( struct kbase_hwcnt_enable_map *dst) { - memset(dst->enable_map, 0, dst->metadata->enable_map_bytes); + if (dst->hwcnt_enable_map != NULL) + memset(dst->hwcnt_enable_map, 0, + dst->metadata->enable_map_bytes); + + dst->clk_enable_map = 0; } /** @@ -569,6 +605,8 @@ static inline void kbase_hwcnt_enable_map_enable_all( kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) kbase_hwcnt_enable_map_block_enable_all( dst, grp, blk, blk_inst); + + dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; } /** @@ -582,9 +620,13 @@ static inline void kbase_hwcnt_enable_map_copy( struct kbase_hwcnt_enable_map *dst, const struct kbase_hwcnt_enable_map *src) { - memcpy(dst->enable_map, - src->enable_map, - dst->metadata->enable_map_bytes); + if (dst->hwcnt_enable_map != NULL) { + memcpy(dst->hwcnt_enable_map, + src->hwcnt_enable_map, + dst->metadata->enable_map_bytes); + } + + dst->clk_enable_map = src->clk_enable_map; } /** @@ -602,8 +644,12 @@ static inline void kbase_hwcnt_enable_map_union( dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; size_t i; - for (i = 0; i < bitfld_count; i++) - dst->enable_map[i] |= src->enable_map[i]; + if (dst->hwcnt_enable_map != NULL) { + for (i = 0; i < bitfld_count; i++) + dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; + } + + dst->clk_enable_map |= src->clk_enable_map; } /** @@ -656,6 +702,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled( const struct kbase_hwcnt_enable_map *enable_map) { size_t grp, blk, blk_inst; + const u64 clk_enable_map_mask = + (1ull << enable_map->metadata->clk_cnt) - 1; + + if (enable_map->metadata->clk_cnt > 0 && + (enable_map->clk_enable_map & clk_enable_map_mask)) + return true; kbase_hwcnt_metadata_for_each_block( enable_map->metadata, grp, blk, blk_inst) { @@ -749,12 +801,10 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. * @metadata: Non-NULL pointer to metadata describing the system. * @n: Number of dump buffers to allocate - * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each - * dump buffer in the array will be initialised to undefined values, - * so must be used as a copy dest, or cleared before use. + * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. * - * A single contiguous page allocation will be used for all of the buffers - * inside the array, where: + * A single zeroed contiguous page allocation will be used for all of the + * buffers inside the array, where: * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes * * Return: 0 on success, else error code. @@ -1084,4 +1134,29 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict( } } +/** + * Iterate over each clock domain in the metadata. + * + * @md: Non-NULL pointer to metadata. + * @clk: size_t variable used as clock iterator. + */ +#define kbase_hwcnt_metadata_for_each_clock(md, clk) \ + for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) + +/** + * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled + * in clk_enable_map. + * @clk_enable_map: An enable map for clock domains. + * @index: Index of the enable map for clock domain. + * + * Return: true if the index of the clock domain is enabled, else false. + */ +static inline bool kbase_hwcnt_clk_enable_map_enabled( + const u64 clk_enable_map, const size_t index) +{ + if (clk_enable_map & (1ull << index)) + return true; + return false; +} + #endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c index 917e47c..52ecb7b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,17 +17,12 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_accumulator.h" #include "mali_kbase_hwcnt_context.h" #include "mali_kbase_hwcnt_types.h" -#include "mali_malisw.h" -#include "mali_kbase_debug.h" -#include "mali_kbase_linux.h" #include #include @@ -87,7 +83,6 @@ const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata( return hvirt->metadata; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata); /** * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. @@ -496,7 +491,6 @@ int kbase_hwcnt_virtualizer_client_set_counters( return errcode; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters); /** * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's @@ -686,7 +680,6 @@ int kbase_hwcnt_virtualizer_client_dump( return errcode; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump); int kbase_hwcnt_virtualizer_client_create( struct kbase_hwcnt_virtualizer *hvirt, @@ -719,7 +712,6 @@ int kbase_hwcnt_virtualizer_client_create( *out_hvcli = hvcli; return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create); void kbase_hwcnt_virtualizer_client_destroy( struct kbase_hwcnt_virtualizer_client *hvcli) @@ -735,7 +727,6 @@ void kbase_hwcnt_virtualizer_client_destroy( kbasep_hwcnt_virtualizer_client_free(hvcli); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy); int kbase_hwcnt_virtualizer_init( struct kbase_hwcnt_context *hctx, @@ -766,7 +757,6 @@ int kbase_hwcnt_virtualizer_init( *out_hvirt = virt; return 0; } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init); void kbase_hwcnt_virtualizer_term( struct kbase_hwcnt_virtualizer *hvirt) @@ -787,4 +777,12 @@ void kbase_hwcnt_virtualizer_term( kfree(hvirt); } -KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term); + +bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, + struct work_struct *work) +{ + if (WARN_ON(!hvirt) || WARN_ON(!work)) + return false; + + return kbase_hwcnt_context_queue_work(hvirt->hctx, work); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h index 8f628c3..e3a8137 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -31,6 +30,7 @@ #define _KBASE_HWCNT_VIRTUALIZER_H_ #include +#include struct kbase_hwcnt_context; struct kbase_hwcnt_virtualizer; @@ -142,4 +142,19 @@ int kbase_hwcnt_virtualizer_client_dump( u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf); +/** + * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async + * work on a workqueue specialized for + * hardware counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @work: Non-NULL pointer to work to queue. + * + * Return: false if work was already on a queue, true otherwise. + * + * This is a convenience function that directly calls the underlying + * kbase_hwcnt_context's kbase_hwcnt_context_queue_work. + */ +bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, + struct work_struct *work); + #endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c index 43f4b4d..83977f8 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,22 +17,20 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include -#ifdef CONFIG_COMPAT +#if IS_ENABLED(CONFIG_COMPAT) #include #endif #include #include #include #include +#include #include +#include #include #include #include @@ -39,24 +38,17 @@ #include "mali_kbase_dma_fence.h" #include -#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) +#include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) -/* random32 was renamed to prandom_u32 in 3.8 */ -#define prandom_u32 random32 -#endif +#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) /* Return whether katom will run on the GPU or not. Currently only soft jobs and - * dependency-only atoms do not run on the GPU */ + * dependency-only atoms do not run on the GPU + */ #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) -/* Minimum API version that supports the just-in-time memory allocation pressure - * limit feature. - */ -#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20) - /* * This is the kernel side of the API. Only entry points are: * - kbase_jd_submit(): Called from userspace to submit a single bag @@ -70,13 +62,22 @@ static void __user * get_compat_pointer(struct kbase_context *kctx, const u64 p) { -#ifdef CONFIG_COMPAT +#if IS_ENABLED(CONFIG_COMPAT) if (kbase_ctx_flag(kctx, KCTX_COMPAT)) return compat_ptr(p); #endif return u64_to_user_ptr(p); } +/* Mark an atom as complete, and trace it in kinstr_jm */ +static void jd_mark_atom_complete(struct kbase_jd_atom *katom) +{ + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + kbase_kinstr_jm_atom_complete(katom); + dev_dbg(katom->kctx->kbdev->dev, "Atom %pK status to completed\n", + (void *)katom); +} + /* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs * * Returns whether the JS needs a reschedule. @@ -88,7 +89,7 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) { struct kbase_context *kctx = katom->kctx; - dev_dbg(kctx->kbdev->dev, "JD run atom %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "JD run atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -97,30 +98,24 @@ static bool jd_run_atom(struct kbase_jd_atom *katom) /* Dependency only atom */ trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(katom->kctx, katom)); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", - (void *)katom); - return 0; + jd_mark_atom_complete(katom); + return false; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ if (katom->will_fail_event_code) { kbase_finish_soft_job(katom); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, - "Atom %p status to completed\n", (void *)katom); - return 0; + jd_mark_atom_complete(katom); + return false; } if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, - "Atom %p status to completed\n", (void *)katom); + jd_mark_atom_complete(katom); } - return 0; + return false; } katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", (void *)katom); + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); /* Queue an action about whether we should try scheduling a context */ return kbasep_js_add_job(kctx, katom); } @@ -205,7 +200,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) * jctx.lock must be held when this is called. */ -static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) +static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom) { int err_ret_val = -EINVAL; u32 res_no; @@ -242,7 +237,8 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st /* copy user buffer to the end of our real buffer. * Make sure the struct sizes haven't changed in a way - * we don't support */ + * we don't support + */ BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); input_extres = (struct base_external_resource *) (((unsigned char *)katom->extres) + @@ -258,9 +254,14 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #ifdef CONFIG_MALI_DMA_FENCE if (implicit_sync) { - info.resv_objs = kmalloc_array(katom->nr_extres, - sizeof(struct reservation_object *), - GFP_KERNEL); + info.resv_objs = + kmalloc_array(katom->nr_extres, +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + sizeof(struct reservation_object *), +#else + sizeof(struct dma_resv *), +#endif + GFP_KERNEL); if (!info.resv_objs) { err_ret_val = -ENOMEM; goto early_err_out; @@ -277,7 +278,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #endif /* CONFIG_MALI_DMA_FENCE */ /* Take the processes mmap lock */ - down_read(¤t->mm->mmap_sem); + down_read(kbase_mem_get_process_mmap_lock()); /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); @@ -314,8 +315,11 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #ifdef CONFIG_MALI_DMA_FENCE if (implicit_sync && reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) struct reservation_object *resv; - +#else + struct dma_resv *resv; +#endif resv = reg->gpu_alloc->imported.umm.dma_buf->resv; if (resv) kbase_dma_fence_add_reservation(resv, &info, @@ -328,7 +332,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st * at least not before the first write) as we overwrite elements * as we loop and could be overwriting ourself, so no writes * until the last read for an element. - * */ + */ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ katom->extres[res_no].alloc = alloc; } @@ -337,7 +341,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kbase_gpu_vm_unlock(katom->kctx); /* Release the processes mmap lock */ - up_read(¤t->mm->mmap_sem); + up_read(kbase_mem_get_process_mmap_lock()); #ifdef CONFIG_MALI_DMA_FENCE if (implicit_sync) { @@ -362,7 +366,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st #ifdef CONFIG_MALI_DMA_FENCE failed_dma_fence_setup: /* Lock the processes mmap lock */ - down_read(¤t->mm->mmap_sem); + down_read(kbase_mem_get_process_mmap_lock()); /* lock before we unmap */ kbase_gpu_vm_lock(katom->kctx); @@ -378,7 +382,7 @@ failed_dma_fence_setup: kbase_gpu_vm_unlock(katom->kctx); /* Release the processes mmap lock */ - up_read(¤t->mm->mmap_sem); + up_read(kbase_mem_get_process_mmap_lock()); early_err_out: kfree(katom->extres); @@ -455,9 +459,6 @@ static inline void jd_resolve_dep(struct list_head *out_list, #endif /* CONFIG_MALI_DMA_FENCE */ if (dep_satisfied) { - trace_sysgraph(SGR_DEP_RES, - dep_atom->kctx->id, - kbase_jd_atom_id(katom->kctx, dep_atom)); dep_atom->in_jd_list = true; list_add_tail(&dep_atom->jd_item, out_list); } @@ -481,7 +482,8 @@ static inline void jd_resolve_dep(struct list_head *out_list, static bool is_dep_valid(struct kbase_jd_atom *katom) { /* If there's no dependency then this is 'valid' from the perspective of - * early dependency submission */ + * early dependency submission + */ if (!katom) return true; @@ -490,7 +492,8 @@ static bool is_dep_valid(struct kbase_jd_atom *katom) return false; /* If dependency has completed and has failed or will fail then it is - * not valid */ + * not valid + */ if (katom->status >= KBASE_JD_ATOM_STATE_HW_COMPLETED && (katom->event_code != BASE_JD_EVENT_DONE || katom->will_fail_event_code)) @@ -544,10 +547,6 @@ static void jd_try_submitting_deps(struct list_head *out_list, #endif /* CONFIG_MALI_DMA_FENCE */ if (dep0_valid && dep1_valid && dep_satisfied) { - trace_sysgraph(SGR_DEP_RES, - dep_atom->kctx->id, - kbase_jd_atom_id(dep_atom->kctx, - dep_atom)); dep_atom->in_jd_list = true; list_add(&dep_atom->jd_item, out_list); } @@ -556,7 +555,7 @@ static void jd_try_submitting_deps(struct list_head *out_list, } } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * jd_update_jit_usage - Update just-in-time physical memory usage for an atom. * @@ -632,8 +631,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) u64 addr_end; if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { - const unsigned long extent_bytes = reg->extent - << PAGE_SHIFT; + const unsigned long extension_bytes = + reg->extension << PAGE_SHIFT; const u64 low_ptr = ptr[LOW]; const u64 high_ptr = ptr[HIGH]; @@ -654,8 +653,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) * this, but here to avoid future maintenance * hazards */ - WARN_ON(!is_power_of_2(extent_bytes)); - addr_end = ALIGN(read_val, extent_bytes); + WARN_ON(!is_power_of_2(extension_bytes)); + addr_end = ALIGN(read_val, extension_bytes); } else { addr_end = read_val = READ_ONCE(*ptr); } @@ -696,7 +695,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom) kbase_jit_retry_pending_alloc(kctx); } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* * Perform the necessary handling of an atom that has finished running @@ -721,12 +720,14 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); -#if MALI_JIT_PRESSURE_LIMIT - jd_update_jit_usage(katom); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)) + jd_update_jit_usage(katom); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* This is needed in case an atom is failed due to being invalid, this - * can happen *before* the jobs that the atom depends on have completed */ + * can happen *before* the jobs that the atom depends on have completed + */ for (i = 0; i < 2; i++) { if (kbase_jd_katom_dep_atom(&katom->dep[i])) { list_del(&katom->dep_item[i]); @@ -734,9 +735,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } } - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n", - (void *)katom); + jd_mark_atom_complete(katom); list_add_tail(&katom->jd_item, &completed_jobs); while (!list_empty(&completed_jobs)) { @@ -759,7 +758,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, list_del(runnable_jobs.next); node->in_jd_list = false; - dev_dbg(kctx->kbdev->dev, "List node %p has status %d\n", + dev_dbg(kctx->kbdev->dev, "List node %pK has status %d\n", node, node->status); KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); @@ -786,7 +785,8 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, !node->will_fail_event_code) { /* Node successfully submitted, try submitting * dependencies as they may now be representable - * in JS */ + * in JS + */ jd_try_submitting_deps(&runnable_jobs, node); } } @@ -802,10 +802,14 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, /* Decrement and check the TOTAL number of jobs. This includes * those not tracked by the scheduler: 'not ready to run' and - * 'dependency-only' jobs. */ + * 'dependency-only' jobs. + */ if (--kctx->jctx.job_nr == 0) - wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter - * that we've got no more jobs (so we can be safely terminated) */ + /* All events are safely queued now, and we can signal + * any waiter that we've got no more jobs (so we can be + * safely terminated) + */ + wake_up(&kctx->jctx.zero_jobs_wait); } return need_to_try_schedule_context; @@ -813,7 +817,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, KBASE_EXPORT_TEST_API(jd_done_nolock); -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) enum { CORE_REQ_DEP_ONLY, CORE_REQ_SOFT, @@ -868,8 +872,23 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif +/* Trace an atom submission. */ +static void jd_trace_atom_submit(struct kbase_context *const kctx, + struct kbase_jd_atom *const katom, + int *priority) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom)); + KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + if (priority) + KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); + kbase_kinstr_jm_atom_queue(katom); +} + static bool jd_submit_atom(struct kbase_context *const kctx, - const struct base_jd_atom_v2 *const user_atom, + const struct base_jd_atom *const user_atom, const struct base_jd_fragment *const user_jc_incr, struct kbase_jd_atom *const katom) { @@ -879,14 +898,17 @@ static bool jd_submit_atom(struct kbase_context *const kctx, int i; int sched_prio; bool will_fail = false; + unsigned long flags; + enum kbase_jd_atom_state status; - dev_dbg(kbdev->dev, "User did JD submit atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "User did JD submit atom %pK\n", (void *)katom); /* Update the TOTAL number of jobs. This includes those not tracked by - * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + * the scheduler: 'not ready to run' and 'dependency-only' jobs. + */ jctx->job_nr++; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE katom->start_timestamp.tv64 = 0; #else katom->start_timestamp = 0; @@ -899,6 +921,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->jc = user_atom->jc; katom->core_req = user_atom->core_req; katom->jobslot = user_atom->jobslot; + katom->seq_nr = user_atom->seq_nr; katom->atom_flags = 0; katom->retry_count = 0; katom->need_cache_flush_cores_retained = 0; @@ -911,19 +934,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx, trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Older API version atoms might have random values where jit_id now * lives, but we must maintain backwards compatibility - handle the * issue. */ - if (kctx->api_version < MIN_API_VERSION_WITH_JPL) { + if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) { katom->jit_ids[0] = 0; katom->jit_ids[1] = 0; } else { katom->jit_ids[0] = user_atom->jit_id[0]; katom->jit_ids[1] = user_atom->jit_id[1]; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ katom->renderpass_id = user_atom->renderpass_id; @@ -953,23 +976,14 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; dev_dbg(kbdev->dev, - "Atom %p status to completed\n", + "Atom %pK status to completed\n", (void *)katom); /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any - * dependencies. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - kbdev, - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX( - kbdev, - katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE( - kbdev, - katom, - TL_ATOM_STATE_IDLE); + * dependencies. + */ + jd_trace_atom_submit(kctx, katom, NULL); return jd_done_nolock(katom, NULL); } @@ -1005,19 +1019,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx, /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* This atom will be sent back to user space. * Do not record any dependencies. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - kbdev, - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, - TL_ATOM_STATE_IDLE); + jd_trace_atom_submit(kctx, katom, NULL); will_fail = true; @@ -1045,20 +1053,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, return jd_done_nolock(katom, NULL); } - if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { - /* This softjob has failed due to a previous - * dependency, however we should still run the - * prepare & finish functions - */ - if (kbase_prepare_soft_job(katom) != 0) { - katom->event_code = - BASE_JD_EVENT_JOB_INVALID; - return jd_done_nolock(katom, NULL); - } - } - katom->will_fail_event_code = katom->event_code; - return false; } /* These must occur after the above loop to ensure that an atom @@ -1067,22 +1062,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx, */ katom->event_code = BASE_JD_EVENT_DONE; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)katom); /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; - katom->sched_priority = sched_prio; + + /* Cap the priority to jctx.max_priority */ + katom->sched_priority = (sched_prio < kctx->jctx.max_priority) ? + kctx->jctx.max_priority : sched_prio; /* Create a new atom. */ - KBASE_TLSTREAM_TL_NEW_ATOM( - kbdev, - katom, - kbase_jd_atom_id(kctx, katom)); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE); - KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority); - KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx); + jd_trace_atom_submit(kctx, katom, &katom->sched_priority); #if !MALI_INCREMENTAL_RENDERING /* Reject atoms for incremental rendering if not supported */ @@ -1149,8 +1141,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx, } } -#if !MALI_JIT_PRESSURE_LIMIT - if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) && +#if !MALI_JIT_PRESSURE_LIMIT_BASE + if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) && (user_atom->jit_id[0] || user_atom->jit_id[1])) { /* JIT pressure limit is disabled, but we are receiving non-0 * JIT IDs - atom is invalid. @@ -1158,7 +1150,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, katom->event_code = BASE_JD_EVENT_JOB_INVALID; return jd_done_nolock(katom, NULL); } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /* Validate the atom. Function will return error if the atom is * malformed. @@ -1180,7 +1172,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx, } } -#ifdef CONFIG_GPU_TRACEPOINTS +#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) katom->work_id = atomic_inc_return(&jctx->work_id); trace_gpu_job_enqueue(kctx->id, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req)); @@ -1207,13 +1199,24 @@ static bool jd_submit_atom(struct kbase_context *const kctx, bool need_to_try_schedule_context; katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); need_to_try_schedule_context = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) return need_to_try_schedule_context; + + /* Synchronize with backend reset */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + status = katom->status; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + if (status == KBASE_JD_ATOM_STATE_HW_COMPLETED) { + dev_dbg(kctx->kbdev->dev, + "Atom %d cancelled on HW\n", + kbase_jd_atom_id(katom->kctx, katom)); + return need_to_try_schedule_context; + } } /* This is a pure dependency. Resolve it immediately */ @@ -1231,6 +1234,9 @@ int kbase_jd_submit(struct kbase_context *kctx, struct kbase_device *kbdev; u32 latest_flush; + bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) || + stride == offsetof(struct base_jd_atom_v2, renderpass_id)); + /* * kbase_jd_submit isn't expected to fail and so all errors with the * jobs are reported by immediately failing them (through event system) @@ -1245,7 +1251,9 @@ int kbase_jd_submit(struct kbase_context *kctx, } if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) && - stride != sizeof(struct base_jd_atom_v2)) { + stride != sizeof(struct base_jd_atom_v2) && + stride != offsetof(struct base_jd_atom, renderpass_id) && + stride != sizeof(struct base_jd_atom)) { dev_err(kbdev->dev, "Stride %u passed to job_submit isn't supported by the kernel\n", stride); @@ -1256,16 +1264,29 @@ int kbase_jd_submit(struct kbase_context *kctx, latest_flush = kbase_backend_get_current_flush_id(kbdev); for (i = 0; i < nr_atoms; i++) { - struct base_jd_atom_v2 user_atom; + struct base_jd_atom user_atom; struct base_jd_fragment user_jc_incr; struct kbase_jd_atom *katom; - if (copy_from_user(&user_atom, user_addr, stride) != 0) { - dev_err(kbdev->dev, - "Invalid atom address %p passed to job_submit\n", - user_addr); - err = -EFAULT; - break; + if (unlikely(jd_atom_is_v2)) { + if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) { + dev_dbg(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } + + /* no seq_nr in v2 */ + user_atom.seq_nr = 0; + } else { + if (copy_from_user(&user_atom, user_addr, stride) != 0) { + dev_dbg(kbdev->dev, + "Invalid atom address %p passed to job_submit\n", + user_addr); + err = -EFAULT; + break; + } } if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) { @@ -1399,7 +1420,7 @@ void kbase_jd_done_worker(struct work_struct *data) js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; - dev_dbg(kbdev->dev, "Enter atom %p done worker for kctx %p\n", + dev_dbg(kbdev->dev, "Enter atom %pK done worker for kctx %pK\n", (void *)katom, (void *)kctx); KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1423,7 +1444,7 @@ void kbase_jd_done_worker(struct work_struct *data) if (katom->event_code == BASE_JD_EVENT_STOPPED) { unsigned long flags; - dev_dbg(kbdev->dev, "Atom %p has been promoted to stopped\n", + dev_dbg(kbdev->dev, "Atom %pK has been promoted to stopped\n", (void *)katom); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -1431,7 +1452,7 @@ void kbase_jd_done_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); katom->status = KBASE_JD_ATOM_STATE_IN_JS; - dev_dbg(kctx->kbdev->dev, "Atom %p status to in JS\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK status to in JS\n", (void *)katom); kbase_js_unpull(kctx, katom); @@ -1518,7 +1539,9 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&jctx->lock); /* Job is now no longer running, so can now safely release the context - * reference, and handle any actions that were logged against the atom's retained state */ + * reference, and handle any actions that were logged against the + * atom's retained state + */ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); @@ -1526,7 +1549,8 @@ void kbase_jd_done_worker(struct work_struct *data) if (!atomic_dec_return(&kctx->work_count)) { /* If worker now idle then post all events that jd_done_nolock() - * has queued */ + * has queued + */ mutex_lock(&jctx->lock); while (!list_empty(&kctx->completed_jobs)) { struct kbase_jd_atom *atom = list_entry( @@ -1546,7 +1570,7 @@ void kbase_jd_done_worker(struct work_struct *data) KBASE_KTRACE_ADD_JM(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); - dev_dbg(kbdev->dev, "Leave atom %p done worker for kctx %p\n", + dev_dbg(kbdev->dev, "Leave atom %pK done worker for kctx %pK\n", (void *)katom, (void *)kctx); } @@ -1601,7 +1625,8 @@ static void jd_cancel_worker(struct work_struct *data) need_to_try_schedule_context = jd_done_nolock(katom, NULL); /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to * schedule the context. There's also no need for the jsctx_mutex to have been taken - * around this too. */ + * around this too. + */ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); /* katom may have been freed now, do not use! */ @@ -1652,7 +1677,7 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, atomic_inc(&kctx->work_count); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) /* a failed job happened and is waiting for dumping*/ if (!katom->will_fail_event_code && kbase_debug_job_fault_process(katom, katom->event_code)) @@ -1670,12 +1695,12 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) { struct kbase_context *kctx; - KBASE_DEBUG_ASSERT(NULL != kbdev); - KBASE_DEBUG_ASSERT(NULL != katom); + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); kctx = katom->kctx; - KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kbdev->dev, "JD: cancelling atom %p\n", (void *)katom); + dev_dbg(kbdev->dev, "JD: cancelling atom %pK\n", (void *)katom); KBASE_KTRACE_ADD_JM(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); /* This should only be done from a context that is not scheduled */ @@ -1731,7 +1756,7 @@ void kbase_jd_zap_context(struct kbase_context *kctx) flush_workqueue(kctx->dma_fence.wq); #endif -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) kbase_debug_job_fault_kctx_unblock(kctx); #endif @@ -1744,12 +1769,15 @@ int kbase_jd_init(struct kbase_context *kctx) { int i; int mali_err = 0; + struct priority_control_manager_device *pcm_device = NULL; KBASE_DEBUG_ASSERT(kctx); + pcm_device = kctx->kbdev->pcm_dev; + kctx->jctx.max_priority = KBASE_JS_ATOM_SCHED_PRIO_REALTIME; kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", WQ_HIGHPRI | WQ_UNBOUND, 1); - if (NULL == kctx->jctx.job_done_wq) { + if (kctx->jctx.job_done_wq == NULL) { mali_err = -ENOMEM; goto out1; } @@ -1785,6 +1813,11 @@ int kbase_jd_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->completed_jobs); atomic_set(&kctx->work_count, 0); + /* Check if there are platform rules for maximum priority */ + if (pcm_device) + kctx->jctx.max_priority = pcm_device->ops.pcm_scheduler_priority_check( + pcm_device, current, KBASE_JS_ATOM_SCHED_PRIO_REALTIME); + return 0; out1: diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c index e9a161f..67d1bd9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,9 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) #include #include @@ -29,7 +28,7 @@ #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE) #include #endif -#include +#include struct kbase_jd_debugfs_depinfo { u8 id; @@ -47,13 +46,13 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, case BASE_JD_REQ_SOFT_FENCE_TRIGGER: res = kbase_sync_fence_out_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Sa([%p]%d) ", + seq_printf(sfile, "Sa([%pK]%d) ", info.fence, info.status); break; case BASE_JD_REQ_SOFT_FENCE_WAIT: res = kbase_sync_fence_in_info_get(atom, &info); if (res == 0) - seq_printf(sfile, "Wa([%p]%d) ", + seq_printf(sfile, "Wa([%pK]%d) ", info.fence, info.status); break; default: @@ -66,42 +65,40 @@ static void kbase_jd_debugfs_fence_info(struct kbase_jd_atom *atom, struct kbase_fence_cb *cb; if (atom->dma_fence.fence) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence = atom->dma_fence.fence; #else struct dma_fence *fence = atom->dma_fence.fence; #endif seq_printf(sfile, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - "Sd(%u#%u: %s) ", +#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) + "Sd(%u#%u: %s) ", #else - "Sd(%llu#%u: %s) ", + "Sd(%llu#%u: %s) ", #endif - fence->context, - fence->seqno, - dma_fence_is_signaled(fence) ? - "signaled" : "active"); + fence->context, fence->seqno, + dma_fence_is_signaled(fence) ? "signaled" : + "active"); } list_for_each_entry(cb, &atom->dma_fence.callbacks, node) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence = cb->fence; #else struct dma_fence *fence = cb->fence; #endif seq_printf(sfile, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) - "Wd(%u#%u: %s) ", +#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) + "Wd(%u#%u: %s) ", #else - "Wd(%llu#%u: %s) ", + "Wd(%llu#%u: %s) ", #endif - fence->context, - fence->seqno, - dma_fence_is_signaled(fence) ? - "signaled" : "active"); + fence->context, fence->seqno, + dma_fence_is_signaled(fence) ? "signaled" : + "active"); } } #endif /* CONFIG_MALI_DMA_FENCE */ @@ -180,7 +177,8 @@ static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) /* start_timestamp is cleared as soon as the atom leaves UNUSED state * and set before a job is submitted to the h/w, a non-zero value means - * it is valid */ + * it is valid + */ if (ktime_to_ns(atom->start_timestamp)) start_timestamp = ktime_to_ns( ktime_sub(ktime_get(), atom->start_timestamp)); @@ -228,6 +226,12 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = { void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) { +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + const mode_t mode = S_IRUGO; +#else + const mode_t mode = S_IRUSR; +#endif + /* Caller already ensures this, but we keep the pattern for * maintenance safety. */ @@ -236,7 +240,7 @@ void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx) return; /* Expose all atoms */ - debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, + debugfs_create_file("atoms", mode, kctx->kctx_dentry, kctx, &kbasep_jd_debugfs_atoms_fops); } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h index 697bdef..8e6140c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_jd_debugfs.h - * Header file for job dispatcher-related entries in debugfs + * DOC: Header file for job dispatcher-related entries in debugfs */ #ifndef _KBASE_JD_DEBUGFS_H @@ -38,7 +36,7 @@ struct kbase_context; /** * kbasep_jd_debugfs_ctx_init() - Add debugfs entries for JD system * - * @kctx Pointer to kbase_context + * @kctx: Pointer to kbase_context */ void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c index 3f17dd7..6995050 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * HW access job manager common APIs */ @@ -29,6 +27,7 @@ #include "mali_kbase_hwaccess_jm.h" #include "mali_kbase_jm.h" +#if !MALI_USE_CSF /** * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot * @js on the active context. @@ -46,7 +45,7 @@ static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, kctx = kbdev->hwaccess.active_kctx[js]; dev_dbg(kbdev->dev, - "Trying to run the next %d jobs in kctx %p (s:%d)\n", + "Trying to run the next %d jobs in kctx %pK (s:%d)\n", nr_jobs_to_submit, (void *)kctx, js); if (!kctx) @@ -118,7 +117,7 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)kctx, js); kbdev->hwaccess.active_kctx[js] = NULL; } @@ -130,7 +129,7 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, { lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Atom %p is returning with event code 0x%x\n", + dev_dbg(kbdev->dev, "Atom %pK is returning with event code 0x%x\n", (void *)katom, katom->event_code); if (katom->event_code != BASE_JD_EVENT_STOPPED && @@ -149,3 +148,4 @@ struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, return kbase_js_complete_atom(katom, end_timestamp); } +#endif /* !MALI_USE_CSF */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h index a3c7744..c6b28f3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_jm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2014, 2016, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - /* * Job manager common APIs */ @@ -28,6 +26,7 @@ #ifndef _KBASE_JM_H_ #define _KBASE_JM_H_ +#if !MALI_USE_CSF /** * kbase_jm_kick() - Indicate that there are jobs ready to run. * @kbdev: Device pointer @@ -73,7 +72,9 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); * kbase_jm_kick_all() otherwise it will do nothing. */ void kbase_jm_try_kick_all(struct kbase_device *kbdev); +#endif /* !MALI_USE_CSF */ +#if !MALI_USE_CSF /** * kbase_jm_idle_ctx() - Mark a context as idle. * @kbdev: Device pointer @@ -111,5 +112,6 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev, */ struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, ktime_t *end_timestamp); +#endif /* !MALI_USE_CSF */ #endif /* _KBASE_JM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c index 0b0c5bf..3682486 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /* * Job Scheduler Implementation */ @@ -37,6 +34,7 @@ #include "mali_kbase_jm.h" #include "mali_kbase_hwaccess_jm.h" +#include /* * Private types @@ -45,26 +43,30 @@ /* Bitpattern indicating the result of releasing a context */ enum { /* The context was descheduled - caller should try scheduling in a new - * one to keep the runpool full */ + * one to keep the runpool full + */ KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), /* Ctx attributes were changed - caller should try scheduling all - * contexts */ + * contexts + */ KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) }; typedef u32 kbasep_js_release_result; const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { - KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ - KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ - KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ + KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ + KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ + KBASE_JS_ATOM_SCHED_PRIO_LOW, /* BASE_JD_PRIO_LOW */ + KBASE_JS_ATOM_SCHED_PRIO_REALTIME /* BASE_JD_PRIO_REALTIME */ }; const base_jd_prio kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { - BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ - BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ - BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ + BASE_JD_PRIO_REALTIME, /* KBASE_JS_ATOM_SCHED_PRIO_REALTIME */ + BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ + BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ + BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ }; @@ -79,7 +81,7 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, struct kbase_jd_atom *katom); static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, - kbasep_js_ctx_job_cb callback); + kbasep_js_ctx_job_cb *callback); /* Helper for ktrace */ #if KBASE_KTRACE_ENABLE @@ -160,7 +162,7 @@ jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) none_to_pull = RB_EMPTY_ROOT(&rb->runnable_tree); dev_dbg(kctx->kbdev->dev, - "Slot %d (prio %d) is %spullable in kctx %p\n", + "Slot %d (prio %d) is %spullable in kctx %pK\n", js, prio, none_to_pull ? "not " : "", kctx); return none_to_pull; @@ -184,7 +186,7 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (!jsctx_rb_none_to_pull_prio(kctx, js, prio)) return false; @@ -210,9 +212,8 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) * * The HW access lock must always be held when calling this function. */ -static void -jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, - kbasep_js_ctx_job_cb callback) +static void jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, + int prio, kbasep_js_ctx_job_cb *callback) { struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; @@ -234,7 +235,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, WARN_ON(!(entry->core_req & BASE_JD_REQ_END_RENDERPASS)); dev_dbg(kctx->kbdev->dev, - "Del runnable atom %p from X_DEP list\n", + "Del runnable atom %pK from X_DEP list\n", (void *)entry); list_del(&entry->queue); @@ -250,7 +251,7 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, WARN_ON(!(entry->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); dev_dbg(kctx->kbdev->dev, - "Del blocked atom %p from X_DEP list\n", + "Del blocked atom %pK from X_DEP list\n", (void *)entry); list_del(queue->x_dep_head.next); @@ -271,13 +272,12 @@ jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback * for each entry, and remove the entry from the queue. */ -static inline void -jsctx_queue_foreach(struct kbase_context *kctx, int js, - kbasep_js_ctx_job_cb callback) +static inline void jsctx_queue_foreach(struct kbase_context *kctx, int js, + kbasep_js_ctx_job_cb *callback) { int prio; - for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) jsctx_queue_foreach_prio(kctx, js, prio, callback); } @@ -301,7 +301,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); dev_dbg(kctx->kbdev->dev, - "Peeking runnable tree of kctx %p for prio %d (s:%d)\n", + "Peeking runnable tree of kctx %pK for prio %d (s:%d)\n", (void *)kctx, prio, js); node = rb_first(&rb->runnable_tree); @@ -319,7 +319,7 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * @js: Job slot id to check. * * Check the ring buffers for all priorities, starting from - * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a + * KBASE_JS_ATOM_SCHED_PRIO_REALTIME, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * * Caller must hold the hwaccess_lock. @@ -333,7 +333,7 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -363,7 +363,7 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kctx->kbdev->dev, "Erasing atom %p from runnable tree of kctx %p\n", + dev_dbg(kctx->kbdev->dev, "Erasing atom %pK from runnable tree of kctx %pK\n", (void *)katom, (void *)kctx); /* Atoms must be pulled in the correct order. */ @@ -385,7 +385,7 @@ jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Adding atom %p to runnable tree of kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Adding atom %pK to runnable tree of kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); while (*new) { @@ -448,7 +448,8 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) #ifdef CONFIG_MALI_DEBUG /* Soft-stop will be disabled on a single context by default unless - * softstop_always is set */ + * softstop_always is set + */ jsdd->softstop_always = false; #endif /* CONFIG_MALI_DEBUG */ jsdd->nr_all_contexts_running = 0; @@ -531,14 +532,15 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) kbdev->gpu_props.props.raw_props.js_features[i]); /* On error, we could continue on: providing none of the below resources - * rely on the ones above */ + * rely on the ones above + */ mutex_init(&jsdd->runpool_mutex); mutex_init(&jsdd->queue_mutex); sema_init(&jsdd->schedule_sem, 1); for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { - for (j = 0; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { + for (j = KBASE_JS_ATOM_SCHED_PRIO_FIRST; j < KBASE_JS_ATOM_SCHED_PRIO_COUNT; ++j) { INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i][j]); INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i][j]); } @@ -595,16 +597,18 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx) sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); /* Initially, the context is disabled from submission until the create - * flags are set */ + * flags are set + */ kbase_ctx_flag_set(kctx, KCTX_SUBMIT_DISABLED); /* On error, we could continue on: providing none of the below resources - * rely on the ones above */ + * rely on the ones above + */ mutex_init(&js_kctx_info->ctx.jsctx_mutex); init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; @@ -678,7 +682,7 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable tail kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add pullable tail kctx %pK (s:%d)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -720,7 +724,7 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add pullable head kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add pullable head kctx %pK (s:%d)\n", (void *)kctx, js); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) @@ -796,7 +800,7 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, bool ret = false; lockdep_assert_held(&kbdev->hwaccess_lock); - dev_dbg(kbdev->dev, "Add unpullable tail kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "Add unpullable tail kctx %pK (s:%d)\n", (void *)kctx, js); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], @@ -879,7 +883,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( lockdep_assert_held(&kbdev->hwaccess_lock); - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { + for (i = KBASE_JS_ATOM_SCHED_PRIO_FIRST; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { if (list_empty(&kbdev->js_data.ctx_list_pullable[js][i])) continue; @@ -889,7 +893,7 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); dev_dbg(kbdev->dev, - "Popped %p from the pullable queue (s:%d)\n", + "Popped %pK from the pullable queue (s:%d)\n", (void *)kctx, js); return kctx; } @@ -943,25 +947,25 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, if (is_scheduled) { if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return false; } } katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", (void *)kctx, js); return false; /* No pullable atoms */ } if (kctx->blocked_js[js][katom->sched_priority]) { dev_dbg(kbdev->dev, - "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", (void *)kctx, katom->sched_priority, js); return false; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_ctx_pullable\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_ctx_pullable\n", (void *)katom); return false; /* next atom blocked */ } @@ -970,20 +974,20 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, - "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return false; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", (void *)katom, js); return false; } } - dev_dbg(kbdev->dev, "JS: Atom %p is pullable in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is pullable in kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); return true; @@ -1007,7 +1011,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_prio = dep_atom->sched_priority; dev_dbg(kbdev->dev, - "Checking dep %d of atom %p (s:%d) on %p (s:%d)\n", + "Checking dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, (void *)katom, js, (void *)dep_atom, dep_js); /* Dependent atom must already have been submitted */ @@ -1020,7 +1024,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } /* Dependencies with different priorities can't - be represented in the ringbuffer */ + * be represented in the ringbuffer + */ if (prio != dep_prio) { dev_dbg(kbdev->dev, "Different atom priorities\n"); @@ -1030,7 +1035,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, if (js == dep_js) { /* Only one same-slot dependency can be - * represented in the ringbuffer */ + * represented in the ringbuffer + */ if (has_dep) { dev_dbg(kbdev->dev, "Too many same-slot deps\n"); @@ -1038,7 +1044,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, break; } /* Each dependee atom can only have one - * same-slot dependency */ + * same-slot dependency + */ if (dep_atom->post_dep) { dev_dbg(kbdev->dev, "Too many same-slot successors\n"); @@ -1048,7 +1055,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, has_dep = true; } else { /* Only one cross-slot dependency can be - * represented in the ringbuffer */ + * represented in the ringbuffer + */ if (has_x_dep) { dev_dbg(kbdev->dev, "Too many cross-slot deps\n"); @@ -1056,7 +1064,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, break; } /* Each dependee atom can only have one - * cross-slot dependency */ + * cross-slot dependency + */ if (dep_atom->x_post_dep) { dev_dbg(kbdev->dev, "Too many cross-slot successors\n"); @@ -1064,7 +1073,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, break; } /* The dependee atom can not already be in the - * HW access ringbuffer */ + * HW access ringbuffer + */ if (dep_atom->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { dev_dbg(kbdev->dev, @@ -1074,7 +1084,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, break; } /* The dependee atom can not already have - * completed */ + * completed + */ if (dep_atom->status != KBASE_JD_ATOM_STATE_IN_JS) { dev_dbg(kbdev->dev, @@ -1092,7 +1103,8 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } /* If dependencies can be represented by ringbuffer then clear them from - * atom structure */ + * atom structure + */ if (ret) { for (i = 0; i < 2; i++) { struct kbase_jd_atom *dep_atom = katom->dep[i].atom; @@ -1101,7 +1113,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, int dep_js = kbase_js_get_slot(kbdev, dep_atom); dev_dbg(kbdev->dev, - "Clearing dep %d of atom %p (s:%d) on %p (s:%d)\n", + "Clearing dep %d of atom %pK (s:%d) on %pK (s:%d)\n", i, (void *)katom, js, (void *)dep_atom, dep_js); @@ -1116,7 +1128,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, katom->atom_flags |= KBASE_KATOM_FLAG_X_DEP_BLOCKED; - dev_dbg(kbdev->dev, "Set X_DEP flag on atom %p\n", + dev_dbg(kbdev->dev, "Set X_DEP flag on atom %pK\n", (void *)katom); katom->x_pre_dep = dep_atom; @@ -1140,7 +1152,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } } else { dev_dbg(kbdev->dev, - "Deps of atom %p (s:%d) could not be represented\n", + "Deps of atom %pK (s:%d) could not be represented\n", (void *)katom, js); } @@ -1181,7 +1193,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) /* Determine the new priority for context, as per the priority * of currently in-use atoms. */ - for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { if (kctx->atoms_count[prio]) { new_priority = prio; @@ -1192,6 +1204,7 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx) kbase_js_set_ctx_priority(kctx, new_priority); } +KBASE_EXPORT_TEST_API(kbase_js_update_ctx_priority); /** * js_add_start_rp() - Add an atom that starts a renderpass to the job scheduler @@ -1222,7 +1235,7 @@ static int js_add_start_rp(struct kbase_jd_atom *const start_katom) if (rp->state != KBASE_JD_RP_COMPLETE) return -EINVAL; - dev_dbg(kctx->kbdev->dev, "JS add start atom %p of RP %d\n", + dev_dbg(kctx->kbdev->dev, "JS add start atom %pK of RP %d\n", (void *)start_katom, start_katom->renderpass_id); /* The following members are read when updating the job slot @@ -1265,7 +1278,7 @@ static int js_add_end_rp(struct kbase_jd_atom *const end_katom) rp = &kctx->jctx.renderpasses[end_katom->renderpass_id]; - dev_dbg(kbdev->dev, "JS add end atom %p in state %d of RP %d\n", + dev_dbg(kbdev->dev, "JS add end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (rp->state == KBASE_JD_RP_COMPLETE) @@ -1332,7 +1345,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Refcount ctx.nr_jobs */ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); ++(js_kctx_info->ctx.nr_jobs); - dev_dbg(kbdev->dev, "Add atom %p to kctx %p; now %d in ctx\n", + dev_dbg(kbdev->dev, "Add atom %pK to kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Lock for state available during IRQ */ @@ -1345,13 +1358,14 @@ bool kbasep_js_add_job(struct kbase_context *kctx, /* Dependencies could not be represented */ --(js_kctx_info->ctx.nr_jobs); dev_dbg(kbdev->dev, - "Remove atom %p from kctx %p; now %d in ctx\n", + "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); /* Setting atom status back to queued as it still has unresolved - * dependencies */ + * dependencies + */ atom->status = KBASE_JD_ATOM_STATE_QUEUED; - dev_dbg(kbdev->dev, "Atom %p status to queued\n", (void *)atom); + dev_dbg(kbdev->dev, "Atom %pK status to queued\n", (void *)atom); /* Undo the count, as the atom will get added again later but * leave the context priority adjusted or boosted, in case if @@ -1389,7 +1403,8 @@ bool kbasep_js_add_job(struct kbase_context *kctx, kbdev, kctx, atom->slot_nr); } /* If this context is active and the atom is the first on its slot, - * kick the job manager to attempt to fast-start the atom */ + * kick the job manager to attempt to fast-start the atom + */ if (enqueue_required && kctx == kbdev->hwaccess.active_kctx[atom->slot_nr]) kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); @@ -1404,22 +1419,25 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (kbase_ctx_flag(kctx, KCTX_DYING)) { /* A job got added while/after kbase_job_zap_context() * was called on a non-scheduled context. Kill that job - * by killing the context. */ + * by killing the context. + */ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); } else if (js_kctx_info->ctx.nr_jobs == 1) { /* Handle Refcount going from 0 to 1: schedule the - * context on the Queue */ + * context on the Queue + */ KBASE_DEBUG_ASSERT(!kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + dev_dbg(kbdev->dev, "JS: Enqueue Context %pK", kctx); - /* Queue was updated - caller must try to - * schedule the head context */ + /* Queue was updated - caller must try to schedule the + * head context + */ WARN_ON(!enqueue_required); } } out_unlock: - dev_dbg(kbdev->dev, "Enqueue of kctx %p is %srequired\n", + dev_dbg(kbdev->dev, "Enqueue of kctx %pK is %srequired\n", kctx, enqueue_required ? "" : "not "); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -1448,7 +1466,7 @@ void kbasep_js_remove_job(struct kbase_device *kbdev, KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); --(js_kctx_info->ctx.nr_jobs); dev_dbg(kbdev->dev, - "Remove atom %p from kctx %p; now %d in ctx\n", + "Remove atom %pK from kctx %pK; now %d in ctx\n", (void *)atom, (void *)kctx, js_kctx_info->ctx.nr_jobs); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -1478,7 +1496,8 @@ bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, * * This is because it returns false for soft-stopped atoms, but we * want to override that, because we're cancelling an atom regardless of - * whether it was soft-stopped or not */ + * whether it was soft-stopped or not + */ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, &katom_retained_state); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -1525,7 +1544,8 @@ static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( if (js_devdata->nr_user_contexts_running != 0 && runpool_ctx_attr_change) { /* A change in runpool ctx attributes might mean we can - * run more jobs than before */ + * run more jobs than before + */ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; KBASE_KTRACE_ADD_JM_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, @@ -1624,7 +1644,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( /* Make a set of checks to see if the context should be scheduled out. * Note that there'll always be at least 1 reference to the context - * which was previously acquired by kbasep_js_schedule_ctx(). */ + * which was previously acquired by kbasep_js_schedule_ctx(). + */ if (new_ref_count == 1 && (!kbasep_js_is_submit_allowed(js_devdata, kctx) || #ifdef CONFIG_MALI_ARBITER_SUPPORT @@ -1635,8 +1656,9 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( int slot; /* Last reference, and we've been told to remove this context - * from the Run Pool */ - dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because refcount=%d, jobs=%d, allowed=%d", + * from the Run Pool + */ + dev_dbg(kbdev->dev, "JS: RunPool Remove Context %pK because refcount=%d, jobs=%d, allowed=%d", kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, kbasep_js_is_submit_allowed(js_devdata, kctx)); @@ -1646,7 +1668,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( for (slot = 0; slot < num_slots; slot++) { if (kbdev->hwaccess.active_kctx[slot] == kctx) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)kctx, slot); kbdev->hwaccess.active_kctx[slot] = NULL; } @@ -1662,7 +1684,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); /* Releasing the context and katom retained state can allow - * more jobs to run */ + * more jobs to run + */ release_result |= kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, @@ -1702,7 +1725,8 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( kbase_ctx_flag_clear(kctx, KCTX_SCHEDULED); /* Signal any waiter that the context is not scheduled, so is * safe for termination - once the jsctx_mutex is also dropped, - * and jobs have finished. */ + * and jobs have finished. + */ wake_up(&js_kctx_info->ctx.is_scheduled_wait); /* Queue an action to occur after we've dropped the lock */ @@ -1744,9 +1768,10 @@ void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, if (kbase_ctx_flag(kctx, KCTX_DYING)) { /* Dying: don't requeue, but kill all jobs on the context. This - * happens asynchronously */ + * happens asynchronously + */ dev_dbg(kbdev->dev, - "JS: ** Killing Context %p on RunPool Remove **", kctx); + "JS: ** Killing Context %pK on RunPool Remove **", kctx); kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); } } @@ -1798,7 +1823,8 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, } /* Variant of kbasep_js_runpool_release_ctx() that doesn't call into - * kbase_js_sched_all() */ + * kbase_js_sched_all() + */ static void kbasep_js_runpool_release_ctx_no_schedule( struct kbase_device *kbdev, struct kbase_context *kctx) { @@ -1851,7 +1877,7 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, bool kctx_suspended = false; int as_nr; - dev_dbg(kbdev->dev, "Scheduling kctx %p (s:%d)\n", kctx, js); + dev_dbg(kbdev->dev, "Scheduling kctx %pK (s:%d)\n", kctx, js); js_devdata = &kbdev->js_data; js_kctx_info = &kctx->jctx.sched_info; @@ -1867,7 +1893,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, kbdev, kctx); if (as_nr != KBASEP_AS_NR_INVALID) { /* Attempt to retain the context again, this should - * succeed */ + * succeed + */ mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); as_nr = kbase_ctx_sched_retain_ctx(kctx); @@ -1926,7 +1953,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx); /* Cause any future waiter-on-termination to wait until the context is - * descheduled */ + * descheduled + */ wake_up(&js_kctx_info->ctx.is_scheduled_wait); /* Re-check for suspending: a suspend could've occurred, and all the @@ -1939,7 +1967,8 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, * was taken (i.e. this condition doesn't execute), then the * kbasep_js_suspend() code will cleanup this context instead (by virtue * of it being called strictly after the suspend flag is set, and will - * wait for this lock to drop) */ + * wait for this lock to drop) + */ #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_suspending(kbdev) || kbase_pm_is_gpu_lost(kbdev)) { #else @@ -1967,13 +1996,15 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, mutex_unlock(&js_devdata->runpool_mutex); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); /* Note: after this point, the context could potentially get scheduled - * out immediately */ + * out immediately + */ if (kctx_suspended) { /* Finishing forcing out the context due to a suspend. Use a * variant of kbasep_js_runpool_release_ctx() that doesn't * schedule a new context, to prevent a risk of recursion back - * into this function */ + * into this function + */ kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); return false; } @@ -1992,7 +2023,7 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, kbase_backend_use_ctx_sched(kbdev, kctx, js)) { dev_dbg(kbdev->dev, - "kctx %p already has ASID - mark as active (s:%d)\n", + "kctx %pK already has ASID - mark as active (s:%d)\n", (void *)kctx, js); if (kbdev->hwaccess.active_kctx[js] != kctx) { @@ -2059,7 +2090,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, kbase_js_sync_timers(kbdev); /* Fast-starting requires the jsctx_mutex to be dropped, - * because it works on multiple ctxs */ + * because it works on multiple ctxs + */ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -2071,7 +2103,8 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); } else { /* Already scheduled in - We need to retain it to keep the - * corresponding address space */ + * corresponding address space + */ WARN_ON(!kbase_ctx_sched_inc_refcount(kctx)); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); @@ -2116,7 +2149,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) js_devdata->runpool_irq.submit_allowed = 0; /* Retain each of the contexts, so we can cause it to leave even if it - * had no refcount to begin with */ + * had no refcount to begin with + */ for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { struct kbase_context *kctx = kbdev->as_to_kctx[i]; @@ -2137,7 +2171,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); /* De-ref the previous retain to ensure each context gets pulled out - * sometime later. */ + * sometime later. + */ for (i = 0; i < BASE_MAX_NR_AS; ++i, retained = retained >> 1) { @@ -2148,7 +2183,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) } /* Caller must wait for all Power Manager active references to be - * dropped */ + * dropped + */ } void kbasep_js_resume(struct kbase_device *kbdev) @@ -2162,7 +2198,7 @@ void kbasep_js_resume(struct kbase_device *kbdev) mutex_lock(&js_devdata->queue_mutex); for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { - for (prio = KBASE_JS_ATOM_SCHED_PRIO_HIGH; + for (prio = KBASE_JS_ATOM_SCHED_PRIO_FIRST; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_context *kctx, *n; unsigned long flags; @@ -2283,7 +2319,8 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to - * pullable list */ + * pullable list + */ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { enqueue_required = true; } else { @@ -2297,7 +2334,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, int js = katom->slot_nr; struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - dev_dbg(kctx->kbdev->dev, "Add atom %p to X_DEP list (s:%d)\n", + dev_dbg(kctx->kbdev->dev, "Add atom %pK to X_DEP list (s:%d)\n", (void *)katom, js); list_add_tail(&katom->queue, &queue->x_dep_head); @@ -2307,7 +2344,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, add_required = false; } } else { - dev_dbg(kctx->kbdev->dev, "Atom %p not added to X_DEP list\n", + dev_dbg(kctx->kbdev->dev, "Atom %pK not added to X_DEP list\n", (void *)katom); } @@ -2321,7 +2358,7 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, } dev_dbg(kctx->kbdev->dev, - "Enqueue of kctx %p is %srequired to submit atom %p\n", + "Enqueue of kctx %pK is %srequired to submit atom %pK\n", kctx, enqueue_required ? "" : "not ", katom); return enqueue_required; @@ -2348,7 +2385,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) if (!kbase_js_atom_blocked_on_x_dep(katom)) { dev_dbg(kctx->kbdev->dev, - "Del atom %p from X_DEP list in js_move_to_tree\n", + "Del atom %pK from X_DEP list in js_move_to_tree\n", (void *)katom); list_del(&katom->queue); @@ -2366,7 +2403,7 @@ static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) } } else { dev_dbg(kctx->kbdev->dev, - "Atom %p blocked on x-dep in js_move_to_tree\n", + "Atom %pK blocked on x-dep in js_move_to_tree\n", (void *)katom); break; } @@ -2409,10 +2446,8 @@ static void kbase_js_evict_deps(struct kbase_context *kctx, KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { /* Remove dependency.*/ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - trace_sysgraph(SGR_DEP_RES, kctx->id, - kbase_jd_atom_id(kctx, x_dep)); - dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %p\n", + dev_dbg(kctx->kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); /* Fail if it had a data dependency. */ @@ -2434,14 +2469,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_DEBUG_ASSERT(kctx); kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: pulling an atom from kctx %pK (s:%d)\n", (void *)kctx, js); js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->hwaccess_lock); if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) { - dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %p\n", + dev_dbg(kbdev->dev, "JS: No submit allowed for kctx %pK\n", (void *)kctx); return NULL; } @@ -2454,25 +2489,26 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom = jsctx_rb_peek(kctx, js); if (!katom) { - dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: No pullable atom in kctx %pK (s:%d)\n", (void *)kctx, js); return NULL; } if (kctx->blocked_js[js][katom->sched_priority]) { dev_dbg(kbdev->dev, - "JS: kctx %p is blocked from submitting atoms at priority %d (s:%d)\n", + "JS: kctx %pK is blocked from submitting atoms at priority %d (s:%d)\n", (void *)kctx, katom->sched_priority, js); return NULL; } if (atomic_read(&katom->blocked)) { - dev_dbg(kbdev->dev, "JS: Atom %p is blocked in js_pull\n", + dev_dbg(kbdev->dev, "JS: Atom %pK is blocked in js_pull\n", (void *)katom); return NULL; } /* Due to ordering restrictions when unpulling atoms on failure, we do * not allow multiple runs of fail-dep atoms from the same context to be - * present on the same slot */ + * present on the same slot + */ if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { struct kbase_jd_atom *prev_atom = kbase_backend_inspect_tail(kbdev, js); @@ -2486,14 +2522,14 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || katom->x_pre_dep->will_fail_event_code) { dev_dbg(kbdev->dev, - "JS: X pre-dep %p is not present in slot FIFO or will fail\n", + "JS: X pre-dep %pK is not present in slot FIFO or will fail\n", (void *)katom->x_pre_dep); return NULL; } if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kbdev, js)) { dev_dbg(kbdev->dev, - "JS: Atom %p has cross-slot fail dependency and atoms on slot (s:%d)\n", + "JS: Atom %pK has cross-slot fail dependency and atoms on slot (s:%d)\n", (void *)katom, js); return NULL; } @@ -2518,7 +2554,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) katom->ticks = 0; - dev_dbg(kbdev->dev, "JS: successfully pulled atom %p from kctx %p (s:%d)\n", + dev_dbg(kbdev->dev, "JS: successfully pulled atom %pK from kctx %pK (s:%d)\n", (void *)katom, (void *)kctx, js); return katom; @@ -2561,7 +2597,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) return; dev_dbg(kctx->kbdev->dev, - "JS return start atom %p in state %d of RP %d\n", + "JS return start atom %pK in state %d of RP %d\n", (void *)start_katom, (int)rp->state, start_katom->renderpass_id); @@ -2589,7 +2625,7 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) /* Prevent the tiler job being pulled for execution in the * job scheduler again. */ - dev_dbg(kbdev->dev, "Blocking start atom %p\n", + dev_dbg(kbdev->dev, "Blocking start atom %pK\n", (void *)start_katom); atomic_inc(&start_katom->blocked); @@ -2601,14 +2637,14 @@ static void js_return_of_start_rp(struct kbase_jd_atom *const start_katom) /* Was the fragment job chain submitted to kbase yet? */ end_katom = rp->end_katom; if (end_katom) { - dev_dbg(kctx->kbdev->dev, "JS return add end atom %p\n", + dev_dbg(kctx->kbdev->dev, "JS return add end atom %pK\n", (void *)end_katom); if (rp->state == KBASE_JD_RP_RETRY_OOM) { /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %p\n", + dev_dbg(kbdev->dev, "Unblocking end atom %pK\n", (void *)end_katom); atomic_dec(&end_katom->blocked); WARN_ON(!(end_katom->atom_flags & @@ -2670,7 +2706,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) return; dev_dbg(kctx->kbdev->dev, - "JS return end atom %p in state %d of RP %d\n", + "JS return end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state != KBASE_JD_RP_OOM && @@ -2692,14 +2728,14 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, - "Reset backing to %zu pages for region %p\n", + "Reset backing to %zu pages for region %pK\n", reg->threshold_pages, (void *)reg); if (!WARN_ON(reg->flags & KBASE_REG_VA_FREED)) kbase_mem_shrink(kctx, reg, reg->threshold_pages); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_dbg(kbdev->dev, "Deleting region %p from list\n", + dev_dbg(kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); @@ -2717,7 +2753,7 @@ static void js_return_of_end_rp(struct kbase_jd_atom *const end_katom) */ start_katom = rp->start_katom; if (!WARN_ON(!start_katom)) { - dev_dbg(kbdev->dev, "Unblocking start atom %p\n", + dev_dbg(kbdev->dev, "Unblocking start atom %pK\n", (void *)start_katom); atomic_dec(&start_katom->blocked); (void)kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, @@ -2743,7 +2779,7 @@ static void js_return_worker(struct work_struct *data) unsigned long flags; base_jd_core_req core_req = katom->core_req; - dev_dbg(kbdev->dev, "%s for atom %p with event code 0x%x\n", + dev_dbg(kbdev->dev, "%s for atom %pK with event code 0x%x\n", __func__, (void *)katom, katom->event_code); if (katom->event_code != BASE_JD_EVENT_END_RP_DONE) @@ -2771,13 +2807,15 @@ static void js_return_worker(struct work_struct *data) timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); /* If this slot has been blocked due to soft-stopped atoms, and all - * atoms have now been processed, then unblock the slot */ + * atoms have now been processed, then unblock the slot + */ if (!kctx->atoms_pulled_slot_pri[js][prio] && kctx->blocked_js[js][prio]) { kctx->blocked_js[js][prio] = false; /* Only mark the slot as pullable if the context is not idle - - * that case is handled below */ + * that case is handled below + */ if (atomic_read(&kctx->atoms_pulled) && kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= kbase_js_ctx_list_add_pullable_nolock( @@ -2786,12 +2824,12 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled)) { dev_dbg(kbdev->dev, - "No atoms currently pulled from context %p\n", + "No atoms currently pulled from context %pK\n", (void *)kctx); if (!kctx->slots_pullable) { dev_dbg(kbdev->dev, - "Context %p %s counted as runnable\n", + "Context %pK %s counted as runnable\n", (void *)kctx, kbase_ctx_flag(kctx, KCTX_RUNNABLE_REF) ? "is" : "isn't"); @@ -2827,7 +2865,7 @@ static void js_return_worker(struct work_struct *data) if (context_idle) { dev_dbg(kbdev->dev, - "Context %p %s counted as active\n", + "Context %pK %s counted as active\n", (void *)kctx, kbase_ctx_flag(kctx, KCTX_ACTIVE) ? "is" : "isn't"); @@ -2866,13 +2904,13 @@ static void js_return_worker(struct work_struct *data) kbase_backend_complete_wq_post_sched(kbdev, core_req); - dev_dbg(kbdev->dev, "Leaving %s for atom %p\n", + dev_dbg(kbdev->dev, "Leaving %s for atom %pK\n", __func__, (void *)katom); } void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { - dev_dbg(kctx->kbdev->dev, "Unpulling atom %p in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "Unpulling atom %pK in kctx %pK\n", (void *)katom, (void *)kctx); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -2927,7 +2965,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, return false; dev_dbg(kctx->kbdev->dev, - "Start atom %p is done in state %d of RP %d\n", + "Start atom %pK is done in state %d of RP %d\n", (void *)start_katom, (int)rp->state, start_katom->renderpass_id); @@ -2939,7 +2977,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, unsigned long flags; dev_dbg(kctx->kbdev->dev, - "Start atom %p completed before soft-stop\n", + "Start atom %pK completed before soft-stop\n", (void *)start_katom); kbase_gpu_vm_lock(kctx); @@ -2951,7 +2989,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, struct kbase_va_region, link); WARN_ON(reg->flags & KBASE_REG_VA_FREED); - dev_dbg(kctx->kbdev->dev, "Deleting region %p from list\n", + dev_dbg(kctx->kbdev->dev, "Deleting region %pK from list\n", (void *)reg); list_del_init(®->link); kbase_va_region_alloc_put(kctx, reg); @@ -2961,7 +2999,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, kbase_gpu_vm_unlock(kctx); } else { dev_dbg(kctx->kbdev->dev, - "Start atom %p did not exceed memory threshold\n", + "Start atom %pK did not exceed memory threshold\n", (void *)start_katom); WARN_ON(rp->state != KBASE_JD_RP_START && @@ -2978,7 +3016,7 @@ static bool js_complete_start_rp(struct kbase_context *kctx, /* Allow the end of the renderpass to be pulled for * execution again to continue incremental rendering. */ - dev_dbg(kbdev->dev, "Unblocking end atom %p!\n", + dev_dbg(kbdev->dev, "Unblocking end atom %pK!\n", (void *)end_katom); atomic_dec(&end_katom->blocked); @@ -3022,7 +3060,7 @@ static void js_complete_end_rp(struct kbase_context *kctx, if (WARN_ON(rp->end_katom != end_katom)) return; - dev_dbg(kbdev->dev, "End atom %p is done in state %d of RP %d\n", + dev_dbg(kbdev->dev, "End atom %pK is done in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); if (WARN_ON(rp->state == KBASE_JD_RP_COMPLETE) || @@ -3056,7 +3094,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, kbdev = kctx->kbdev; atom_slot = katom->slot_nr; - dev_dbg(kbdev->dev, "%s for atom %p (s:%d)\n", + dev_dbg(kbdev->dev, "%s for atom %pK (s:%d)\n", __func__, (void *)katom, atom_slot); /* Update the incremental rendering state machine. @@ -3075,7 +3113,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %p is in runnable_tree\n", + dev_dbg(kbdev->dev, "Atom %pK is in runnable_tree\n", (void *)katom); context_idle = !atomic_dec_return(&kctx->atoms_pulled); @@ -3091,11 +3129,12 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, } /* If this slot has been blocked due to soft-stopped atoms, and - * all atoms have now been processed, then unblock the slot */ + * all atoms have now been processed, then unblock the slot + */ if (!kctx->atoms_pulled_slot_pri[atom_slot][prio] && kctx->blocked_js[atom_slot][prio]) { dev_dbg(kbdev->dev, - "kctx %p is no longer blocked from submitting on slot %d at priority %d\n", + "kctx %pK is no longer blocked from submitting on slot %d at priority %d\n", (void *)kctx, atom_slot, prio); kctx->blocked_js[atom_slot][prio] = false; @@ -3149,7 +3188,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, * jd_done_worker(). */ if (context_idle) { - dev_dbg(kbdev->dev, "kctx %p is no longer active\n", + dev_dbg(kbdev->dev, "kctx %pK is no longer active\n", (void *)kctx); kbase_ctx_flag_clear(kctx, KCTX_ACTIVE); } @@ -3200,7 +3239,7 @@ static bool js_end_rp_is_complete(struct kbase_jd_atom *const end_katom) return true; dev_dbg(kbdev->dev, - "JS complete end atom %p in state %d of RP %d\n", + "JS complete end atom %pK in state %d of RP %d\n", (void *)end_katom, (int)rp->state, end_katom->renderpass_id); @@ -3229,7 +3268,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, struct kbase_jd_atom *x_dep = katom->x_post_dep; kbdev = kctx->kbdev; - dev_dbg(kbdev->dev, "Atom %p complete in kctx %p (post-dep %p)\n", + dev_dbg(kbdev->dev, "Atom %pK complete in kctx %pK (post-dep %pK)\n", (void *)katom, (void *)kctx, (void *)x_dep); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); @@ -3245,7 +3284,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, katom->event_code = katom->will_fail_event_code; katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; - dev_dbg(kbdev->dev, "Atom %p status to HW completed\n", (void *)katom); + dev_dbg(kbdev->dev, "Atom %pK status to HW completed\n", (void *)katom); if (katom->event_code != BASE_JD_EVENT_DONE) { kbase_js_evict_deps(kctx, katom, katom->slot_nr, @@ -3267,9 +3306,7 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, false); x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - trace_sysgraph(SGR_DEP_RES, kctx->id, - kbase_jd_atom_id(katom->kctx, x_dep)); - dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %p\n", + dev_dbg(kbdev->dev, "Cleared X_DEP flag on atom %pK\n", (void *)x_dep); kbase_js_move_to_tree(x_dep); @@ -3280,13 +3317,13 @@ struct kbase_jd_atom *kbase_js_complete_atom(struct kbase_jd_atom *katom, x_dep->slot_nr); if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { - dev_dbg(kbdev->dev, "Atom %p is in runnable tree\n", + dev_dbg(kbdev->dev, "Atom %pK is in runnable tree\n", (void *)x_dep); return x_dep; } } else { dev_dbg(kbdev->dev, - "No cross-slot dep to unblock for atom %p\n", + "No cross-slot dep to unblock for atom %pK\n", (void *)katom); } @@ -3317,13 +3354,13 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - dev_dbg(kbdev->dev, "Atom %p is not blocked on a cross-slot dependency", + dev_dbg(kbdev->dev, "Atom %pK is not blocked on a cross-slot dependency", (void *)katom); return false; } if (!(katom->core_req & BASE_JD_REQ_END_RENDERPASS)) { - dev_dbg(kbdev->dev, "Atom %p is blocked on a cross-slot dependency", + dev_dbg(kbdev->dev, "Atom %pK is blocked on a cross-slot dependency", (void *)katom); return true; } @@ -3349,12 +3386,12 @@ bool kbase_js_atom_blocked_on_x_dep(struct kbase_jd_atom *const katom) * if it only depends on the tiler job chain. */ if (katom->x_pre_dep != rp->start_katom) { - dev_dbg(kbdev->dev, "Dependency is on %p not start atom %p\n", + dev_dbg(kbdev->dev, "Dependency is on %pK not start atom %pK\n", (void *)katom->x_pre_dep, (void *)rp->start_katom); return true; } - dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %p\n", + dev_dbg(kbdev->dev, "Ignoring cross-slot dep on atom %pK\n", (void *)katom->x_pre_dep); return false; @@ -3368,7 +3405,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool ctx_waiting[BASE_JM_MAX_NR_SLOTS]; int js; - dev_dbg(kbdev->dev, "%s kbdev %p mask 0x%x\n", + dev_dbg(kbdev->dev, "%s kbdev %pK mask 0x%x\n", __func__, (void *)kbdev, (unsigned int)js_mask); js_devdata = &kbdev->js_data; @@ -3403,7 +3440,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) context_idle = true; dev_dbg(kbdev->dev, - "kctx %p is not active (s:%d)\n", + "kctx %pK is not active (s:%d)\n", (void *)kctx, js); if (kbase_pm_context_active_handle_suspend( @@ -3412,7 +3449,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) dev_dbg(kbdev->dev, "Suspend pending (s:%d)\n", js); /* Suspend pending - return context to - * queue and stop scheduling */ + * queue and stop scheduling + */ mutex_lock( &kctx->jctx.sched_info.ctx.jsctx_mutex); if (kbase_js_ctx_list_add_pullable_head( @@ -3432,7 +3470,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) &kctx->jctx.sched_info.ctx.jsctx_mutex); dev_dbg(kbdev->dev, - "kctx %p cannot be used at this time\n", + "kctx %pK cannot be used at this time\n", kctx); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3474,7 +3512,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) bool pullable; dev_dbg(kbdev->dev, - "No atoms pulled from kctx %p (s:%d)\n", + "No atoms pulled from kctx %pK (s:%d)\n", (void *)kctx, js); pullable = kbase_js_ctx_pullable(kctx, js, @@ -3483,7 +3521,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Failed to pull jobs - push to head of list. * Unless this context is already 'active', in * which case it's effectively already scheduled - * so push it to the back of the list. */ + * so push it to the back of the list. + */ if (pullable && kctx == last_active[js] && kbase_ctx_flag(kctx, (KCTX_PULLED_SINCE_ACTIVE_JS0 << @@ -3508,7 +3547,8 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) * slot, then we need to remove the active * marker to prevent it from submitting atoms in * the IRQ handler, which would prevent this - * context from making progress. */ + * context from making progress. + */ if (last_active[js] && kctx != last_active[js] && kbase_js_ctx_pullable( last_active[js], js, true)) @@ -3534,7 +3574,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) break; /* Could not run atoms on this slot */ } - dev_dbg(kbdev->dev, "Push kctx %p to back of list\n", + dev_dbg(kbdev->dev, "Push kctx %pK to back of list\n", (void *)kctx); if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= @@ -3556,7 +3596,7 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { if (kbdev->hwaccess.active_kctx[js] == last_active[js] && ctx_waiting[js]) { - dev_dbg(kbdev->dev, "Marking kctx %p as inactive (s:%d)\n", + dev_dbg(kbdev->dev, "Marking kctx %pK as inactive (s:%d)\n", (void *)last_active[js], js); kbdev->hwaccess.active_kctx[js] = NULL; } @@ -3580,13 +3620,14 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* First, atomically do the following: * - mark the context as dying - * - try to evict it from the queue */ + * - try to evict it from the queue + */ mutex_lock(&kctx->jctx.lock); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); kbase_ctx_flag_set(kctx, KCTX_DYING); - dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %pK", kctx); /* * At this point we know: @@ -3650,13 +3691,14 @@ void kbase_js_zap_context(struct kbase_context *kctx) KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK scheduled=0", kctx); /* Only cancel jobs when we evicted from the * queue. No Power Manager active reference was held. * - * Having is_dying set ensures that this kills, and - * doesn't requeue */ + * Having is_dying set ensures that this kills, and doesn't + * requeue + */ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); @@ -3667,9 +3709,10 @@ void kbase_js_zap_context(struct kbase_context *kctx) bool was_retained; /* Case c: didn't evict, but it is scheduled - it's in the Run - * Pool */ + * Pool + */ KBASE_KTRACE_ADD_JM(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, kbase_ctx_flag(kctx, KCTX_SCHEDULED)); - dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK is in RunPool", kctx); /* Disable the ctx from submitting any more jobs */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -3678,18 +3721,21 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* Retain and (later) release the context whilst it is is now * disallowed from submitting jobs - ensures that someone - * somewhere will be removing the context later on */ + * somewhere will be removing the context later on + */ was_retained = kbase_ctx_sched_inc_refcount_nolock(kctx); /* Since it's scheduled and we have the jsctx_mutex, it must be - * retained successfully */ + * retained successfully + */ KBASE_DEBUG_ASSERT(was_retained); - dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + dev_dbg(kbdev->dev, "Zap: Ctx %pK Kill Any Running jobs", kctx); /* Cancel any remaining running jobs for this kctx - if any. * Submit is disallowed which takes effect immediately, so no - * more new jobs will appear after we do this. */ + * more new jobs will appear after we do this. + */ kbase_backend_jm_kill_running_jobs_from_kctx(kctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -3697,7 +3743,7 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_devdata->queue_mutex); mutex_unlock(&kctx->jctx.lock); - dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", + dev_dbg(kbdev->dev, "Zap: Ctx %pK Release (may or may not schedule out immediately)", kctx); kbasep_js_runpool_release_ctx(kbdev, kctx); @@ -3711,7 +3757,8 @@ void kbase_js_zap_context(struct kbase_context *kctx) * to be destroyed, and the context to be de-scheduled (if it was on the * runpool). * - * kbase_jd_zap_context() will do this. */ + * kbase_jd_zap_context() will do this. + */ } static inline int trace_get_refcnt(struct kbase_device *kbdev, @@ -3739,7 +3786,7 @@ static inline int trace_get_refcnt(struct kbase_device *kbdev, * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. */ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, - kbasep_js_ctx_job_cb callback) + kbasep_js_ctx_job_cb *callback) { struct kbase_device *kbdev; unsigned long flags; @@ -3758,3 +3805,18 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } + +base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio priority) +{ + struct priority_control_manager_device *pcm_device = kbdev->pcm_dev; + int req_priority, out_priority; + base_jd_prio out_jd_priority = priority; + + if (pcm_device) { + req_priority = kbasep_js_atom_prio_to_sched_prio(priority); + out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority); + out_jd_priority = kbasep_js_sched_prio_to_atom_prio(out_priority); + } + return out_jd_priority; +} + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h index 541acd4..96974c8 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_js.h - * Job Scheduler APIs. + * DOC: Job Scheduler APIs. */ #ifndef _KBASE_JS_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c index 141d04a..7775648 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - #include #include @@ -29,8 +27,11 @@ */ /** - * @brief Check whether a ctx has a certain attribute, and if so, retain that + * Check whether a ctx has a certain attribute, and if so, retain that * attribute on the runpool. + * @kbdev: Device pointer + * @kctx: KBase context + * @attribute: Atribute to check/retain * * Requires: * - jsctx mutex @@ -75,8 +76,11 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s } /** - * @brief Check whether a ctx has a certain attribute, and if so, release that + * Check whether a ctx has a certain attribute, and if so, release that * attribute on the runpool. + * @kbdev: Device pointer + * @kctx: KBase context + * @attribute: Atribute to release * * Requires: * - jsctx mutex @@ -120,8 +124,11 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, } /** - * @brief Retain a certain attribute on a ctx, also retaining it on the runpool + * Retain a certain attribute on a ctx, also retaining it on the runpool * if the context is scheduled. + * @kbdev: Device pointer + * @kctx: KBase context + * @attribute: Atribute to retain * * Requires: * - jsctx mutex @@ -156,9 +163,12 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc return runpool_state_changed; } -/* - * @brief Release a certain attribute on a ctx, also releasing it from the runpool +/** + * Release a certain attribute on a ctx, also releasing it from the runpool * if the context is scheduled. + * @kbdev: Device pointer + * @kctx: KBase context + * @attribute: Atribute to release * * Requires: * - jsctx mutex @@ -211,7 +221,8 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb /* We don't need to know about state changed, because retaining a * context occurs on scheduling it, and that itself will also try - * to run new atoms */ + * to run new atoms + */ CSTD_UNUSED(runpool_state_changed); } } @@ -251,9 +262,9 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); } - /* We don't need to know about state changed, because retaining an - * atom occurs on adding it, and that itself will also try to run - * new atoms */ + /* We don't need to know about state changed, because retaining an atom + * occurs on adding it, and that itself will also try to run new atoms + */ CSTD_UNUSED(runpool_state_changed); } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h index 25fd397..6f29241 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,37 +17,19 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_js_ctx_attr.h - * Job Scheduler Context Attribute APIs + * DOC: Job Scheduler Context Attribute APIs */ #ifndef _KBASE_JS_CTX_ATTR_H_ #define _KBASE_JS_CTX_ATTR_H_ -/** - * @addtogroup base_api - * @{ - */ - -/** - * @addtogroup base_kbase_api - * @{ - */ - -/** - * @addtogroup kbase_js - * @{ - */ - /** * Retain all attributes of a context + * @kbdev: KBase device + * @kctx: KBase context * * This occurs on scheduling in the context on the runpool (but after * is_scheduled is set) @@ -60,6 +43,8 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb /** * Release all attributes of a context + * @kbdev: KBase device + * @kctx: KBase context * * This occurs on scheduling out the context from the runpool (but before * is_scheduled is cleared) @@ -79,6 +64,9 @@ bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct k /** * Retain all attributes of an atom + * @kbdev: KBase device + * @kctx: KBase context + * @katom: Atom * * This occurs on adding an atom to a context * @@ -90,6 +78,9 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase /** * Release all attributes of an atom, given its retained state. + * @kbdev: KBase device + * @kctx: KBase context + * @katom_retained_state: Retained state * * This occurs after (permanently) removing an atom from a context * @@ -107,7 +98,7 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase */ bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); -/** +/* * Requires: * - runpool_irq spinlock */ @@ -122,7 +113,7 @@ static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; } -/** +/* * Requires: * - runpool_irq spinlock */ @@ -132,7 +123,7 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kb return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); } -/** +/* * Requires: * - jsctx mutex */ @@ -148,8 +139,4 @@ static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); } - /** @} *//* end group kbase_js */ - /** @} *//* end group base_kbase_api */ - /** @} *//* end group base_api */ - #endif /* _KBASE_JS_DEFS_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c new file mode 100644 index 0000000..1b23b41 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.c @@ -0,0 +1,894 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * mali_kbase_kinstr_jm.c + * Kernel driver public interface to job manager atom tracing + */ + +#include "mali_kbase_kinstr_jm.h" +#include + +#include "mali_kbase.h" +#include "mali_kbase_linux.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE +#include +#else +// Stringify the expression if no message is given. +#define static_assert(e, ...) __static_assert(e, #__VA_ARGS__, #e) +#define __static_assert(e, msg, ...) _Static_assert(e, msg) +#endif + +#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE +typedef unsigned int __poll_t; +#endif + +#ifndef ENOTSUP +#define ENOTSUP EOPNOTSUPP +#endif + +/* The module printing prefix */ +#define PR_ "mali_kbase_kinstr_jm: " + +/* Allows us to perform ASM goto for the tracing + * https://www.kernel.org/doc/Documentation/static-keys.txt + */ +DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key); + +#define KBASE_KINSTR_JM_VERSION 2 + +/** + * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing + * @readers: a bitlocked list of opened readers. Readers are attached to the + * private data of a file descriptor that the user opens with the + * KBASE_IOCTL_KINSTR_JM_FD IO control call. + * @refcount: reference count for the context. Any reader will have a link + * back to the context so that they can remove themselves from the + * list. + * + * This is opaque outside this compilation unit + */ +struct kbase_kinstr_jm { + struct hlist_bl_head readers; + struct kref refcount; +}; + +/** + * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a + * new state + * @timestamp: Raw monotonic nanoseconds of the state change + * @state: The state that the atom has moved to + * @atom: The atom number that has changed state + * @flags: Flags associated with the state change. See + * KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines. + * @reserved: Reserved for future use. + * @data: Extra data for the state change. Active member depends on state. + * @data.start: Extra data for the state change. Active member depends on + * state. + * @data.start.slot: Extra data for the state change. Active member depends on + * state. + * @data.padding: Padding + * + * We can add new fields to the structure and old user code will gracefully + * ignore the new fields. + * + * We can change the size of the structure and old user code will gracefully + * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`. + * + * If we remove fields, the version field in `struct + * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will + * gracefully fail and tell the user that the kernel API is too new and has + * backwards-incompatible changes. Note that one userspace can opt to handle + * multiple kernel major versions of the structure. + * + * If we need to change the _meaning_ of one of the fields, i.e. the state + * machine has had a incompatible change, we can keep the same members in the + * structure and update the version as above. User code will no longer + * recognise that it has the supported field and can gracefully explain to the + * user that the kernel API is no longer supported. + * + * When making changes to this structure, make sure they are either: + * - additions to the end (for minor version bumps (i.e. only a size increase)) + * such that the layout of existing fields doesn't change, or; + * - update the version reported to userspace so that it can fail explicitly. + */ +struct kbase_kinstr_jm_atom_state_change { + u64 timestamp; + s8 state; /* enum kbase_kinstr_jm_reader_atom_state */ + u8 atom; + u8 flags; + u8 reserved[1]; + /* Tagged union based on state. Ensure members are aligned correctly! */ + union { + struct { + u8 slot; + } start; + u8 padding[4]; + } data; +}; +static_assert( + ((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >= + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT); + +#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0) + +/** + * struct reader_changes - The circular buffer of kernel atom state changes + * @data: The allocated buffer. This is allocated when the user requests + * the reader file descriptor. It is released when the user calls + * close() on the fd. When accessing this, lock the producer spin + * lock to prevent races on the allocated memory. The consume lock + * does not need to be held because newly-inserted data will always + * be outside the currenly-read range. + * @producer: The producing spinlock which allows us to push changes into the + * buffer at the same time as a user read occurring. This needs to + * be locked when saving/restoring the IRQ because we can receive an + * interrupt from the GPU when an atom completes. The CPU could have + * a task preempted that is holding this lock. + * @consumer: The consuming mutex which locks around the user read(). + * Must be held when updating the tail of the circular buffer. + * @head: The head of the circular buffer. Can be used with Linux @c CIRC_ + * helpers. The producer should lock and update this with an SMP + * store when a new change lands. The consumer can read with an + * SMP load. This allows the producer to safely insert new changes + * into the circular buffer. + * @tail: The tail of the circular buffer. Can be used with Linux @c CIRC_ + * helpers. The producer should do a READ_ONCE load and the consumer + * should SMP store. + * @size: The number of changes that are allowed in @c data. Can be used + * with Linux @c CIRC_ helpers. Will always be a power of two. The + * producer lock should be held when updating this and stored with + * an SMP release memory barrier. This means that the consumer can + * do an SMP load. + * @threshold: The number of changes above which threads polling on the reader + * file descriptor will be woken up. + */ +struct reader_changes { + struct kbase_kinstr_jm_atom_state_change *data; + spinlock_t producer; + struct mutex consumer; + u32 head; + u32 tail; + u32 size; + u32 threshold; +}; + +/** + * reader_changes_is_valid_size() - Determines if requested changes buffer size + * is valid. + * @size: The requested memory size + * + * We have a constraint that the underlying physical buffer must be a + * power of two so that we can use the efficient circular buffer helpers that + * the kernel provides. It also needs to be representable within a u32. + * + * Return: + * * true - the size is valid + * * false - the size is invalid + */ +static inline bool reader_changes_is_valid_size(const size_t size) +{ + typedef struct reader_changes changes_t; + const size_t elem_size = sizeof(*((changes_t *)0)->data); + const size_t size_size = sizeof(((changes_t *)0)->size); + const size_t size_max = (1ull << (size_size * 8)) - 1; + + return is_power_of_2(size) && /* Is a power of two */ + ((size / elem_size) <= size_max); /* Small enough */ +} + +/** + * reader_changes_init() - Initializes the reader changes and allocates the + * changes buffer + * @changes: The context pointer, must point to a zero-inited allocated reader + * changes structure. We may support allocating the structure in the + * future. + * @size: The requested changes buffer size + * + * Return: + * (0, U16_MAX] - the number of data elements allocated + * -EINVAL - a pointer was invalid + * -ENOTSUP - we do not support allocation of the context + * -ERANGE - the requested memory size was invalid + * -ENOMEM - could not allocate the memory + * -EADDRINUSE - the buffer memory was already allocated + */ +static int reader_changes_init(struct reader_changes *const changes, + const size_t size) +{ + BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0); + + if (!reader_changes_is_valid_size(size)) { + pr_warn(PR_ "invalid size %zu\n", size); + return -ERANGE; + } + + changes->data = vmalloc(size); + if (!changes->data) + return -ENOMEM; + + spin_lock_init(&changes->producer); + mutex_init(&changes->consumer); + + changes->size = size / sizeof(*changes->data); + changes->threshold = min(((size_t)(changes->size)) / 4, + ((size_t)(PAGE_SIZE)) / sizeof(*changes->data)); + + return changes->size; +} + +/** + * reader_changes_term() - Cleans up a reader changes structure + * @changes: The context to clean up + * + * Releases the allocated state changes memory + */ +static void reader_changes_term(struct reader_changes *const changes) +{ + struct kbase_kinstr_jm_atom_state_change *data = NULL; + unsigned long irq; + + /* + * Although changes->data is used on the consumer side, too, no active + * consumer is possible by the time we clean up the reader changes, so + * no need to take the consumer lock. However, we do need the producer + * lock because the list removal can race with list traversal. + */ + spin_lock_irqsave(&changes->producer, irq); + swap(changes->data, data); + spin_unlock_irqrestore(&changes->producer, irq); + + mutex_destroy(&changes->consumer); + vfree(data); +} + +/** + * reader_changes_count_locked() - Retrieves the count of state changes from the + * tail to the physical end of the buffer + * @changes: The state changes context + * + * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to + * determine the count, so there may be more items. However, that's the maximum + * number that can be read in one contiguous read. + * + * Return: the number of changes in the circular buffer until the end of the + * allocation + */ +static u32 reader_changes_count_locked(struct reader_changes *const changes) +{ + u32 head; + + lockdep_assert_held_once(&changes->consumer); + + head = smp_load_acquire(&changes->head); + + return CIRC_CNT_TO_END(head, changes->tail, changes->size); +} + +/** + * reader_changes_count() - Retrieves the count of state changes from the + * tail to the physical end of the buffer + * @changes: The state changes context + * + * Return: the number of changes in the circular buffer until the end of the + * allocation + */ +static u32 reader_changes_count(struct reader_changes *const changes) +{ + u32 ret; + + mutex_lock(&changes->consumer); + ret = reader_changes_count_locked(changes); + mutex_unlock(&changes->consumer); + return ret; +} + +/** + * reader_changes_push() - Pushes a change into the reader circular buffer. + * @changes: The buffer to insert the change into + * @change: Kernel atom change to insert + * @wait_queue: The queue to be kicked when changes should be read from + * userspace. Kicked when a threshold is reached or there is + * overflow. + */ +static void reader_changes_push( + struct reader_changes *const changes, + const struct kbase_kinstr_jm_atom_state_change *const change, + wait_queue_head_t *const wait_queue) +{ + u32 head, tail, size, space; + unsigned long irq; + struct kbase_kinstr_jm_atom_state_change *data; + + spin_lock_irqsave(&changes->producer, irq); + + /* We may be called for a reader_changes that's awaiting cleanup. */ + data = changes->data; + if (!data) + goto unlock; + + size = changes->size; + head = changes->head; + tail = smp_load_acquire(&changes->tail); + + space = CIRC_SPACE(head, tail, size); + if (space >= 1) { + data[head] = *change; + if (space == 1) { + data[head].flags |= + KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW; + pr_warn(PR_ "overflow of circular buffer\n"); + } + smp_store_release(&changes->head, (head + 1) & (size - 1)); + } + + /* Wake for either overflow or over-threshold cases. */ + if (CIRC_CNT(head + 1, tail, size) >= changes->threshold) + wake_up_interruptible(wait_queue); + +unlock: + spin_unlock_irqrestore(&changes->producer, irq); +} + +/** + * struct reader - Allows the kernel state changes to be read by user space. + * @node: The node in the @c readers locked list + * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu) + * @changes: The circular buffer of user changes + * @wait_queue: A wait queue for poll + * @context: a pointer to the parent context that created this reader. Can be + * used to remove the reader from the list of readers. Reference + * counted. + * + * The reader is a circular buffer in kernel space. State changes are pushed + * into the buffer. The flow from user space is: + * + * * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will + * allocate the kernel side circular buffer with a size specified in the + * ioctl argument. + * * The user will then poll the file descriptor for data + * * Upon receiving POLLIN, perform a read() on the file descriptor to get + * the data out. + * * The buffer memory will be freed when the file descriptor is closed + */ +struct reader { + struct hlist_bl_node node; + struct rcu_head rcu_head; + struct reader_changes changes; + wait_queue_head_t wait_queue; + struct kbase_kinstr_jm *context; +}; + +static struct kbase_kinstr_jm * +kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx); +static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx); +static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, + struct reader *const reader); +static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, + struct reader *const reader); + +/** + * reader_term() - Terminate a instrumentation job manager reader context. + * @reader: Pointer to context to be terminated. + */ +static void reader_term(struct reader *const reader) +{ + if (!reader) + return; + + kbase_kinstr_jm_readers_del(reader->context, reader); + reader_changes_term(&reader->changes); + kbase_kinstr_jm_ref_put(reader->context); + + kfree_rcu(reader, rcu_head); +} + +/** + * reader_init() - Initialise a instrumentation job manager reader context. + * @out_reader: Non-NULL pointer to where the pointer to the created context + * will be stored on success. + * @ctx: the pointer to the parent context. Reference count will be + * increased if initialization is successful + * @num_changes: The number of changes to allocate a buffer for + * + * Return: 0 on success, else error code. + */ +static int reader_init(struct reader **const out_reader, + struct kbase_kinstr_jm *const ctx, + size_t const num_changes) +{ + struct reader *reader = NULL; + const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change); + int status; + + if (!out_reader || !ctx || !num_changes) + return -EINVAL; + + reader = kzalloc(sizeof(*reader), GFP_KERNEL); + if (!reader) + return -ENOMEM; + + INIT_HLIST_BL_NODE(&reader->node); + init_waitqueue_head(&reader->wait_queue); + + reader->context = kbase_kinstr_jm_ref_get(ctx); + + status = reader_changes_init(&reader->changes, num_changes * change_size); + if (status < 0) + goto fail; + + status = kbase_kinstr_jm_readers_add(ctx, reader); + if (status < 0) + goto fail; + + *out_reader = reader; + + return 0; + +fail: + kbase_kinstr_jm_ref_put(reader->context); + kfree(reader); + return status; +} + +/** + * reader_release() - Invoked when the reader file descriptor is released + * @node: The inode that the file descriptor that the file corresponds to. In + * our case our reader file descriptor is backed by an anonymous node so + * not much is in this. + * @file: the file data. Our reader context is held in the private data + * Return: zero on success + */ +static int reader_release(struct inode *const node, struct file *const file) +{ + struct reader *const reader = file->private_data; + + reader_term(reader); + file->private_data = NULL; + + return 0; +} + +/** + * reader_changes_copy_to_user() - Copy any changes from a changes structure to + * the user-provided buffer. + * @changes: The changes structure from which to copy. + * @buffer: The user buffer to copy the data to. + * @buffer_size: The number of bytes in the buffer. + * Return: The number of bytes copied or negative errno on failure. + */ +static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes, + char __user *buffer, + size_t buffer_size) +{ + ssize_t ret = 0; + struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE( + changes->data); + size_t const entry_size = sizeof(*src_buf); + size_t changes_tail, changes_count, read_size; + + /* Needed for the quick buffer capacity calculation below. + * Note that we can't use is_power_of_2() since old compilers don't + * understand it's a constant expression. + */ +#define is_power_of_two(x) ((x) && !((x) & ((x) - 1))) + static_assert(is_power_of_two( + sizeof(struct kbase_kinstr_jm_atom_state_change))); +#undef is_power_of_two + + lockdep_assert_held_once(&changes->consumer); + + /* Read continuously until either: + * - we've filled the output buffer, or + * - there are no changes when we check. + * + * If more changes arrive while we're copying to the user, we can copy + * those as well, space permitting. + */ + do { + changes_tail = changes->tail; + changes_count = reader_changes_count_locked(changes); + read_size = min(changes_count * entry_size, + buffer_size & ~(entry_size - 1)); + + if (!read_size) + break; + + if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size)) + return -EFAULT; + + buffer += read_size; + buffer_size -= read_size; + ret += read_size; + changes_tail = (changes_tail + read_size / entry_size) & + (changes->size - 1); + smp_store_release(&changes->tail, changes_tail); + } while (read_size); + + return ret; +} + +/** + * reader_read() - Handles a read call on the reader file descriptor + * + * @filp: The file that the read was performed on + * @buffer: The destination buffer + * @buffer_size: The maximum number of bytes to read + * @offset: The offset into the 'file' to read from. + * + * Note the destination buffer needs to be fully mapped in userspace or the read + * will fault. + * + * Return: + * * The number of bytes read or: + * * -EBADF - the file descriptor did not have an attached reader + * * -EFAULT - memory access fault + * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there + * is no data available + * + * Note: The number of bytes read will always be a multiple of the size of an + * entry. + */ +static ssize_t reader_read(struct file *const filp, + char __user *const buffer, + size_t const buffer_size, + loff_t *const offset) +{ + struct reader *const reader = filp->private_data; + struct reader_changes *changes; + ssize_t ret; + + if (!reader) + return -EBADF; + + if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change)) + return -ENOBUFS; + +#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE + if (!access_ok(buffer, buffer_size)) + return -EIO; +#else + if (!access_ok(VERIFY_WRITE, buffer, buffer_size)) + return -EIO; +#endif + + changes = &reader->changes; + + mutex_lock(&changes->consumer); + if (!reader_changes_count_locked(changes)) { + if (filp->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + goto exit; + } + + if (wait_event_interruptible( + reader->wait_queue, + !!reader_changes_count_locked(changes))) { + ret = -EINTR; + goto exit; + } + } + + ret = reader_changes_copy_to_user(changes, buffer, buffer_size); + +exit: + mutex_unlock(&changes->consumer); + return ret; +} + +/** + * reader_poll() - Handles a poll call on the reader file descriptor + * @file: The file that the poll was performed on + * @wait: The poll table + * + * The results of the poll will be unreliable if there is no mapped memory as + * there is no circular buffer to push atom state changes into. + * + * Return: + * * 0 - no data ready + * * POLLIN - state changes have been buffered + * * -EBADF - the file descriptor did not have an attached reader + * * -EINVAL - the IO control arguments were invalid + */ +static __poll_t reader_poll(struct file *const file, + struct poll_table_struct *const wait) +{ + struct reader *reader; + struct reader_changes *changes; + + if (unlikely(!file || !wait)) + return -EINVAL; + + reader = file->private_data; + if (unlikely(!reader)) + return -EBADF; + + changes = &reader->changes; + + if (reader_changes_count(changes) >= changes->threshold) + return POLLIN; + + poll_wait(file, &reader->wait_queue, wait); + + return (reader_changes_count(changes) > 0) ? POLLIN : 0; +} + +/* The file operations virtual function table */ +static const struct file_operations file_operations = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = reader_read, + .poll = reader_poll, + .release = reader_release +}; + +/* The maximum amount of readers that can be created on a context. */ +static const size_t kbase_kinstr_jm_readers_max = 16; + +/** + * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped + * @ref: the context reference count + */ +static void kbase_kinstr_jm_release(struct kref *const ref) +{ + struct kbase_kinstr_jm *const ctx = + container_of(ref, struct kbase_kinstr_jm, refcount); + + kfree(ctx); +} + +/** + * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context + * @ctx: the context to reference count + * Return: the reference counted context + */ +static struct kbase_kinstr_jm * +kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx) +{ + if (likely(ctx)) + kref_get(&ctx->refcount); + return ctx; +} + +/** + * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context + * @ctx: the context to lower the reference count on + */ +static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx) +{ + if (likely(ctx)) + kref_put(&ctx->refcount, kbase_kinstr_jm_release); +} + +/** + * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers + * @ctx: the instrumentation context + * @reader: the reader to add + * + * Return: + * 0 - success + * -ENOMEM - too many readers already added. + */ +static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx, + struct reader *const reader) +{ + struct hlist_bl_head *const readers = &ctx->readers; + struct hlist_bl_node *node; + struct reader *temp; + size_t count = 0; + + hlist_bl_lock(readers); + + hlist_bl_for_each_entry_rcu(temp, node, readers, node) + ++count; + + if (kbase_kinstr_jm_readers_max < count) { + hlist_bl_unlock(readers); + return -ENOMEM; + } + + hlist_bl_add_head_rcu(&reader->node, readers); + + hlist_bl_unlock(readers); + + static_branch_inc(&basep_kinstr_jm_reader_static_key); + + return 0; +} + +/** + * readers_del() - Deletes a reader from the list of readers + * @ctx: the instrumentation context + * @reader: the reader to delete + */ +static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx, + struct reader *const reader) +{ + struct hlist_bl_head *const readers = &ctx->readers; + + hlist_bl_lock(readers); + hlist_bl_del_rcu(&reader->node); + hlist_bl_unlock(readers); + + static_branch_dec(&basep_kinstr_jm_reader_static_key); +} + +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, + union kbase_kinstr_jm_fd *jm_fd_arg) +{ + struct kbase_kinstr_jm_fd_in const *in; + struct reader *reader; + size_t const change_size = sizeof(struct + kbase_kinstr_jm_atom_state_change); + int status; + int fd; + int i; + + if (!ctx || !jm_fd_arg) + return -EINVAL; + + in = &jm_fd_arg->in; + + if (!is_power_of_2(in->count)) + return -EINVAL; + + for (i = 0; i < sizeof(in->padding); ++i) + if (in->padding[i]) + return -EINVAL; + + status = reader_init(&reader, ctx, in->count); + if (status < 0) + return status; + + jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION; + jm_fd_arg->out.size = change_size; + memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding)); + + fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader, + O_CLOEXEC); + if (fd < 0) + reader_term(reader); + + return fd; +} + +int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx) +{ + struct kbase_kinstr_jm *ctx = NULL; + + if (!out_ctx) + return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + INIT_HLIST_BL_HEAD(&ctx->readers); + kref_init(&ctx->refcount); + + *out_ctx = ctx; + + return 0; +} + +void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx) +{ + kbase_kinstr_jm_ref_put(ctx); +} + +void kbasep_kinstr_jm_atom_state( + struct kbase_jd_atom *const katom, + const enum kbase_kinstr_jm_reader_atom_state state) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm; + const u8 id = kbase_jd_atom_id(kctx, katom); + struct kbase_kinstr_jm_atom_state_change change = { + .timestamp = ktime_get_raw_ns(), .atom = id, .state = state + }; + struct reader *reader; + struct hlist_bl_node *node; + + WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state, + PR_ "unsupported katom (%u) state (%i)", id, state); + + switch (state) { + case KBASE_KINSTR_JM_READER_ATOM_STATE_START: + change.data.start.slot = katom->slot_nr; + break; + default: + break; + } + + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node) + reader_changes_push( + &reader->changes, &change, &reader->wait_queue); + rcu_read_unlock(); +} + +KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state); + +void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + const int slot = katom->slot_nr; + struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + + BUILD_BUG_ON(SLOT_RB_SIZE != 2); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) + return; + if (WARN_ON(!submitted)) + return; + + if (submitted == katom) + kbase_kinstr_jm_atom_state_start(katom); +} + +void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom) +{ + struct kbase_context *const kctx = katom->kctx; + struct kbase_device *const kbdev = kctx->kbdev; + const int slot = katom->slot_nr; + struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0); + struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1); + + BUILD_BUG_ON(SLOT_RB_SIZE != 2); + + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS)) + return; + if (WARN_ON(!submitted)) + return; + if (WARN_ON((submitted != katom) && (queued != katom))) + return; + + if (queued == katom) + return; + + if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_kinstr_jm_atom_state_stop(katom); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_kinstr_jm_atom_state_start(queued); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h new file mode 100644 index 0000000..2c904e5 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_kinstr_jm.h @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * mali_kbase_kinstr_jm.h + * Kernel driver public interface to job manager atom tracing. This API provides + * a method to get the atom state changes into user space. + * + * The flow of operation is: + * + * | kernel | user | + * | ----------------------------------- | ----------------------------------- | + * | Initialize API with | | + * | kbase_kinstr_jm_init() | | + * | | | + * | Kernel code injects states with | | + * | kbase_kinstr_jm_atom_state_*() APIs | | + * | | Call ioctl() to get file descriptor | + * | | via KBASE_IOCTL_KINSTR_JM_FD | + * | Allocates a reader attached to FD | | + * | Allocates circular buffer and | | + * | patches, via ASM goto, the | | + * | kbase_kinstr_jm_atom_state_*() | | + * | | loop: | + * | | Call poll() on FD for POLLIN | + * | When threshold of changes is hit, | | + * | the poll is interrupted with | | + * | POLLIN. If circular buffer is | | + * | full then store the missed count | | + * | and interrupt poll | Call read() to get data from | + * | | circular buffer via the fd | + * | Kernel advances tail of circular | | + * | buffer | | + * | | Close file descriptor | + * | Deallocates circular buffer | | + * | | | + * | Terminate API with | | + * | kbase_kinstr_jm_term() | | + * + * All tracepoints are guarded on a static key. The static key is activated when + * a user space reader gets created. This means that there is negligible cost + * inserting the tracepoints into code when there are no readers. + */ + +#ifndef _KBASE_KINSTR_JM_H_ +#define _KBASE_KINSTR_JM_H_ + +#include + +#ifdef __KERNEL__ +#include +#include +#else +/* empty wrapper macros for userspace */ +#define static_branch_unlikely(key) (1) +#define KERNEL_VERSION(a, b, c) (0) +#define LINUX_VERSION_CODE (1) +#endif /* __KERNEL__ */ + +/* Forward declarations */ +struct kbase_context; +struct kbase_kinstr_jm; +struct kbase_jd_atom; +union kbase_kinstr_jm_fd; + +/** + * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context. + * @ctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx); + +/** + * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context. + * @ctx: Pointer to context to be terminated. + */ +void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx); + +/** + * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to + * read the atom state changes from userspace + * + * @ctx: Pointer to the initialized context + * @jm_fd_arg: Pointer to the union containing the in/out params + * Return: -1 on failure, valid file descriptor on success + */ +int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx, + union kbase_kinstr_jm_fd *jm_fd_arg); + +/** + * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state + * @atom: The atom that has changed state + * @state: The new state of the atom + * + * This performs the actual storage of the state ready for user space to + * read the data. It is only called when the static key is enabled from + * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this + * function directly. + */ +void kbasep_kinstr_jm_atom_state( + struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state); + +/* Allows ASM goto patching to reduce tracing overhead. This is + * incremented/decremented when readers are created and terminated. This really + * shouldn't be changed externally, but if you do, make sure you use + * a static_key_inc()/static_key_dec() pair. + */ +extern struct static_key_false basep_kinstr_jm_reader_static_key; + +/** + * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state + * @atom: The atom that has changed state + * @state: The new state of the atom + * + * This uses a static key to reduce overhead when tracing is disabled + */ +static inline void kbase_kinstr_jm_atom_state( + struct kbase_jd_atom *const atom, + const enum kbase_kinstr_jm_reader_atom_state state) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_state(atom, state); +} + +/** + * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a + * hardware or software queue. + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_queue( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE); +} + +/** + * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an + * atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_start( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START); +} + +/** + * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an + * atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_stop( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP); +} + +/** + * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed + * on an atom + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_state_complete( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state( + atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE); +} + +/** + * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for + * execution + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_queue(atom); +} + +/** + * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully + * completed + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_complete( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_complete(atom); +} + +/** + * kbase_kinstr_jm_atom_sw_start() - A software atom has started work + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_sw_start( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_start(atom); +} + +/** + * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work + * @atom: The atom that has changed state + */ +static inline void kbase_kinstr_jm_atom_sw_stop( + struct kbase_jd_atom *const atom) +{ + kbase_kinstr_jm_atom_state_stop(atom); +} + +/** + * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted + * @atom: The atom that has been submitted + * + * This private implementation should not be called directly, it is protected + * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead. + */ +void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom); + +/** + * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted + * @atom: The atom that has been submitted + */ +static inline void kbase_kinstr_jm_atom_hw_submit( + struct kbase_jd_atom *const atom) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_hw_submit(atom); +} + +/** + * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released + * @atom: The atom that has been released + * + * This private implementation should not be called directly, it is protected + * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead. + */ +void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom); + +/** + * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released + * @atom: The atom that has been released + */ +static inline void kbase_kinstr_jm_atom_hw_release( + struct kbase_jd_atom *const atom) +{ + if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key)) + kbasep_kinstr_jm_atom_hw_release(atom); +} + +#endif /* _KBASE_KINSTR_JM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h index 003ac9e..1d8d196 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_linux.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2014, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_linux.h - * Base kernel APIs, Linux implementation. + * DOC: Base kernel APIs, Linux implementation. */ #ifndef _KBASE_LINUX_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c index 4a1004b..a68e4ea 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -29,7 +28,7 @@ #include #include #include -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) #include #endif @@ -43,6 +42,7 @@ #include #include #include +#include /* * Alignment of objects allocated by the GPU inside a just-in-time memory @@ -89,7 +89,7 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) #error "Unknown CPU VA width for this architecture" #endif -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) if (kbase_ctx_flag(kctx, KCTX_COMPAT)) cpu_va_bits = 32; #endif @@ -98,27 +98,34 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) } /* This function finds out which RB tree the given pfn from the GPU VA belongs - * to based on the memory zone the pfn refers to */ + * to based on the memory zone the pfn refers to + */ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn) { struct rb_root *rbtree = NULL; + struct kbase_reg_zone *exec_va_zone = + kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA * zone if this has been initialized. */ - if (gpu_pfn >= kctx->exec_va_start) + if (gpu_pfn >= exec_va_zone->base_pfn) rbtree = &kctx->reg_rbtree_exec; else { u64 same_va_end; -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) +#if IS_ENABLED(CONFIG_64BIT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif /* CONFIG_64BIT */ same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; -#ifdef CONFIG_64BIT - else - same_va_end = kctx->same_va_end; +#if IS_ENABLED(CONFIG_64BIT) + } else { + struct kbase_reg_zone *same_va_zone = + kbase_ctx_reg_zone_get(kctx, + KBASE_REG_ZONE_SAME_VA); + same_va_end = kbase_reg_zone_end_pfn(same_va_zone); + } #endif /* CONFIG_64BIT */ if (gpu_pfn >= same_va_end) @@ -228,7 +235,7 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; struct rb_root *rbtree = NULL; - KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(kctx != NULL); lockdep_assert_held(&kctx->reg_lock); @@ -288,7 +295,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( struct rb_root *rbtree = NULL; /* Note that this search is a linear search, as we do not have a target - address in mind, so does not benefit from the rbtree search */ + * address in mind, so does not benefit from the rbtree search + */ rbtree = reg_reqs->rbtree; for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { @@ -303,7 +311,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( * (start_pfn + align_mask) & ~(align_mask) * * Otherwise, it aligns to n*align + offset, for the - * lowest value n that makes this still >start_pfn */ + * lowest value n that makes this still >start_pfn + */ start_pfn += align_mask; start_pfn -= (start_pfn - align_offset) & (align_mask); @@ -341,7 +350,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( } /** - * @brief Remove a region object from the global list. + * Remove a region object from the global list. + * @reg: Region object to remove * * The region reg is removed, possibly by merging with other free and * compatible adjacent regions. It must be called with the context @@ -367,8 +377,9 @@ int kbase_remove_va_region(struct kbase_va_region *reg) if (rbprev) { prev = rb_entry(rbprev, struct kbase_va_region, rblink); if (prev->flags & KBASE_REG_FREE) { - /* We're compatible with the previous VMA, - * merge with it */ + /* We're compatible with the previous VMA, merge with + * it + */ WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != (reg->flags & KBASE_REG_ZONE_MASK)); prev->nr_pages += reg->nr_pages; @@ -511,8 +522,8 @@ int kbase_add_va_region(struct kbase_context *kctx, int gpu_pc_bits = kbdev->gpu_props.props.core_props.log2_program_counter_size; - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); lockdep_assert_held(&kctx->reg_lock); @@ -614,13 +625,15 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, size_t align_offset = align; size_t align_mask = align - 1; +#if !MALI_USE_CSF if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", __func__, (unsigned long)align); - align_mask = reg->extent - 1; - align_offset = reg->extent - reg->initial_commit; + align_mask = reg->extension - 1; + align_offset = reg->extension - reg->initial_commit; } +#endif /* !MALI_USE_CSF */ tmp = kbase_region_tracker_find_region_meeting_reqs(reg, nr_pages, align_offset, align_mask, @@ -643,7 +656,7 @@ exit: return err; } -/** +/* * @brief Initialize the internal region tracker data structure. */ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, @@ -698,6 +711,9 @@ void kbase_region_tracker_term(struct kbase_context *kctx) kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); +#if MALI_USE_CSF + WARN_ON(!list_empty(&kctx->csf.event_pages_head)); +#endif kbase_gpu_vm_unlock(kctx); } @@ -720,23 +736,26 @@ int kbase_region_tracker_init(struct kbase_context *kctx) u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; u64 same_va_pages; + u64 same_va_base = 1u; int err; /* Take the lock as kbase_free_alloced_region requires it */ kbase_gpu_vm_lock(kctx); - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, - same_va_pages, - KBASE_REG_ZONE_SAME_VA); + same_va_reg = + kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base, + same_va_pages, KBASE_REG_ZONE_SAME_VA); if (!same_va_reg) { err = -ENOMEM; goto fail_unlock; } + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, + same_va_pages); -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) /* 32-bit clients have custom VA zones */ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif @@ -760,19 +779,28 @@ int kbase_region_tracker_init(struct kbase_context *kctx) err = -ENOMEM; goto fail_free_same_va; } -#ifdef CONFIG_64BIT + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, + KBASE_REG_ZONE_CUSTOM_VA_BASE, + custom_va_size); +#if IS_ENABLED(CONFIG_64BIT) } else { custom_va_size = 0; } #endif + /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is + * initially U64_MAX + */ + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); + /* Other zones are 0: kbase_create_context() uses vzalloc */ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); - kctx->same_va_end = same_va_pages + 1; - kctx->gpu_va_end = kctx->same_va_end + custom_va_size; - kctx->exec_va_start = U64_MAX; + kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; kctx->jit_va = false; +#if MALI_USE_CSF + INIT_LIST_HEAD(&kctx->csf.event_pages_head); +#endif kbase_gpu_vm_unlock(kctx); return 0; @@ -784,44 +812,147 @@ fail_unlock: return err; } -#ifdef CONFIG_64BIT +static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) +{ + struct kbase_reg_zone *exec_va_zone; + + lockdep_assert_held(&kctx->reg_lock); + exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + + return (exec_va_zone->base_pfn != U64_MAX); +} + +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} + +/** + * Determine if any allocations have been made on a context's region tracker + * @kctx: KBase context + * + * Check the context to determine if any allocations have been made yet from + * any of its zones. This check should be done before resizing a zone, e.g. to + * make space to add a second zone. + * + * Whilst a zone without allocations can be resized whilst other zones have + * allocations, we still check all of @kctx 's zones anyway: this is a stronger + * guarantee and should be adhered to when creating new zones anyway. + * + * Allocations from kbdev zones are not counted. + * + * Return: true if any allocs exist on any zone, false otherwise + */ +static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) +{ + unsigned int zone_idx; + + lockdep_assert_held(&kctx->reg_lock); + + for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { + struct kbase_reg_zone *zone; + struct kbase_va_region *reg; + u64 zone_base_addr; + unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); + unsigned long reg_zone; + + zone = kbase_ctx_reg_zone_get(kctx, zone_bits); + zone_base_addr = zone->base_pfn << PAGE_SHIFT; + + reg = kbase_region_tracker_find_region_base_address( + kctx, zone_base_addr); + + if (!zone->va_size_pages) { + WARN(reg, + "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", + (unsigned long long)zone_base_addr, zone_bits); + continue; + } + + if (WARN(!reg, + "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", + (unsigned long long)zone_base_addr, zone_bits)) + return true; /* Safest return value */ + + reg_zone = reg->flags & KBASE_REG_ZONE_MASK; + if (WARN(reg_zone != zone_bits, + "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", + (unsigned long long)zone_base_addr, zone_bits, + reg_zone)) + return true; /* Safest return value */ + + /* Unless the region is completely free, of the same size as + * the original zone, then it has allocs + */ + if ((!(reg->flags & KBASE_REG_FREE)) || + (reg->nr_pages != zone->va_size_pages)) + return true; + } + + /* All zones are the same size as originally made, so there are no + * allocs + */ + return false; +} + +#if IS_ENABLED(CONFIG_64BIT) static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 jit_va_pages) { - struct kbase_va_region *same_va; + struct kbase_va_region *same_va_reg; + struct kbase_reg_zone *same_va_zone; + u64 same_va_zone_base_addr; + const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; struct kbase_va_region *custom_va_reg; + u64 jit_va_start; lockdep_assert_held(&kctx->reg_lock); - /* First verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) - return -EINVAL; - /* - * Modify the same VA free region after creation. Be careful to ensure - * that allocations haven't been made as they could cause an overlap - * to happen with existing same VA allocations and the custom VA zone. + * Modify the same VA free region after creation. The caller has + * ensured that allocations haven't been made, as any allocations could + * cause an overlap to happen with existing same VA allocations and the + * custom VA zone. */ - same_va = kbase_region_tracker_find_region_base_address(kctx, - PAGE_SIZE); - if (!same_va) + same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); + same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; + + same_va_reg = kbase_region_tracker_find_region_base_address( + kctx, same_va_zone_base_addr); + if (WARN(!same_va_reg, + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", + (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) return -ENOMEM; - if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) + /* kbase_region_tracker_has_allocs() in the caller has already ensured + * that all of the zones have no allocs, so no need to check that again + * on same_va_reg + */ + WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || + same_va_reg->nr_pages != same_va_zone->va_size_pages); + + if (same_va_reg->nr_pages < jit_va_pages || + same_va_zone->va_size_pages < jit_va_pages) return -ENOMEM; /* It's safe to adjust the same VA zone now */ - same_va->nr_pages -= jit_va_pages; - kctx->same_va_end -= jit_va_pages; + same_va_reg->nr_pages -= jit_va_pages; + same_va_zone->va_size_pages -= jit_va_pages; + jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); /* * Create a custom VA zone at the end of the VA for allocations which * JIT can use so it doesn't have to allocate VA from the kernel. */ - custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - kctx->same_va_end, - jit_va_pages, - KBASE_REG_ZONE_CUSTOM_VA); + custom_va_reg = + kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start, + jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); /* * The context will be destroyed if we fail here so no point @@ -829,6 +960,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, */ if (!custom_va_reg) return -ENOMEM; + /* Since this is 64-bit, the custom zone will not have been + * initialized, so initialize it now + */ + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, + jit_va_pages); kbase_region_tracker_insert(custom_va_reg); return 0; @@ -847,16 +983,34 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) return -EINVAL; -#if MALI_JIT_PRESSURE_LIMIT if (phys_pages_limit > jit_va_pages) -#else - if (phys_pages_limit != jit_va_pages) -#endif /* MALI_JIT_PRESSURE_LIMIT */ return -EINVAL; +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (phys_pages_limit != jit_va_pages) + kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_lock(kctx); -#ifdef CONFIG_64BIT + /* Verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EINVAL; + goto exit_unlock; + } + + /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no + * allocs, we can ensure there are no allocs anywhere. + * + * This check is also useful in 32-bit, just to make sure init of the + * zone is always done before any allocs. + */ + if (kbase_region_tracker_has_allocs(kctx)) { + err = -ENOMEM; + goto exit_unlock; + } + +#if IS_ENABLED(CONFIG_64BIT) if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); #endif @@ -870,13 +1024,14 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, kctx->trim_level = trim_level; kctx->jit_va = true; kctx->jit_group_id = group_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE kctx->jit_phys_pages_limit = phys_pages_limit; dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", phys_pages_limit); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } +exit_unlock: kbase_gpu_vm_unlock(kctx); return err; @@ -884,24 +1039,33 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) { - struct kbase_va_region *shrinking_va_reg; struct kbase_va_region *exec_va_reg; - u64 exec_va_start, exec_va_base_addr; + struct kbase_reg_zone *exec_va_zone; + struct kbase_reg_zone *target_zone; + struct kbase_va_region *target_reg; + u64 target_zone_base_addr; + unsigned long target_zone_bits; + u64 exec_va_start; int err; - /* The EXEC_VA zone shall be created by making space at the end of the - * address space. Firstly, verify that the number of EXEC_VA pages - * requested by the client is reasonable and then make sure that it is - * not greater than the address space itself before calculating the base - * address of the new zone. + /* The EXEC_VA zone shall be created by making space either: + * - for 64-bit clients, at the end of the process's address space + * - for 32-bit clients, in the CUSTOM zone + * + * Firstly, verify that the number of EXEC_VA pages requested by the + * client is reasonable and then make sure that it is not greater than + * the address space itself before calculating the base address of the + * new zone. */ if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) return -EINVAL; kbase_gpu_vm_lock(kctx); - /* First verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) { + /* Verify that we've not already created a EXEC_VA zone, and that the + * EXEC_VA zone must come before JIT's CUSTOM_VA. + */ + if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { err = -EPERM; goto exit_unlock; } @@ -911,28 +1075,50 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages goto exit_unlock; } - exec_va_start = kctx->gpu_va_end - exec_va_pages; - exec_va_base_addr = exec_va_start << PAGE_SHIFT; - - shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, - exec_va_base_addr); - if (!shrinking_va_reg) { + /* Verify no allocations have already been made */ + if (kbase_region_tracker_has_allocs(kctx)) { err = -ENOMEM; goto exit_unlock; } - /* Make sure that the EXEC_VA region is still uninitialized */ - if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_EXEC_VA) { - err = -EPERM; +#if IS_ENABLED(CONFIG_64BIT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { +#endif + /* 32-bit client: take from CUSTOM_VA zone */ + target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; +#if IS_ENABLED(CONFIG_64BIT) + } else { + /* 64-bit client: take from SAME_VA zone */ + target_zone_bits = KBASE_REG_ZONE_SAME_VA; + } +#endif + target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); + target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; + + target_reg = kbase_region_tracker_find_region_base_address( + kctx, target_zone_base_addr); + if (WARN(!target_reg, + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", + (unsigned long long)target_zone_base_addr, target_zone_bits)) { + err = -ENOMEM; goto exit_unlock; } + /* kbase_region_tracker_has_allocs() above has already ensured that all + * of the zones have no allocs, so no need to check that again on + * target_reg + */ + WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || + target_reg->nr_pages != target_zone->va_size_pages); - if (shrinking_va_reg->nr_pages <= exec_va_pages) { + if (target_reg->nr_pages <= exec_va_pages || + target_zone->va_size_pages <= exec_va_pages) { err = -ENOMEM; goto exit_unlock; } + /* Taken from the end of the target zone */ + exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_start, exec_va_pages, @@ -941,13 +1127,17 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages err = -ENOMEM; goto exit_unlock; } + /* Update EXEC_VA zone + * + * not using kbase_ctx_reg_zone_init() - it was already initialized + */ + exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + exec_va_zone->base_pfn = exec_va_start; + exec_va_zone->va_size_pages = exec_va_pages; - shrinking_va_reg->nr_pages -= exec_va_pages; -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) - kctx->same_va_end -= exec_va_pages; -#endif - kctx->exec_va_start = exec_va_start; + /* Update target zone and corresponding region */ + target_reg->nr_pages -= exec_va_pages; + target_zone->va_size_pages -= exec_va_pages; kbase_region_tracker_insert(exec_va_reg); err = 0; @@ -957,12 +1147,40 @@ exit_unlock: return err; } +#if MALI_USE_CSF +void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) +{ + kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); +} + +int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) +{ + struct kbase_va_region *shared_reg; + u64 shared_reg_start_pfn; + u64 shared_reg_size; + + shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; + shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; + + kbdev->csf.shared_reg_rbtree = RB_ROOT; + + shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, + shared_reg_start_pfn, + shared_reg_size, + KBASE_REG_ZONE_MCU_SHARED); + if (!shared_reg) + return -ENOMEM; + + kbase_region_tracker_insert(shared_reg); + return 0; +} +#endif int kbase_mem_init(struct kbase_device *kbdev) { int err = 0; struct kbasep_mem_device *memdev; -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) struct device_node *mgm_node = NULL; #endif @@ -976,6 +1194,12 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); + spin_lock_init(&kbdev->gpu_mem_usage_lock); + kbdev->total_gpu_pages = 0; + kbdev->process_root = RB_ROOT; + kbdev->dma_buf_root = RB_ROOT; + mutex_init(&kbdev->dma_buf_lock); + #ifdef IR_THRESHOLD atomic_set(&memdev->ir_threshold, IR_THRESHOLD); #else @@ -984,7 +1208,7 @@ int kbase_mem_init(struct kbase_device *kbdev) kbdev->mgm_dev = &kbase_native_mgm_dev; -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) /* Check to see whether or not a platform-specific memory group manager * is configured and available. */ @@ -1053,13 +1277,22 @@ void kbase_mem_term(struct kbase_device *kbdev) kbase_mem_pool_group_term(&kbdev->mem_pools); + WARN_ON(kbdev->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); + mutex_destroy(&kbdev->dma_buf_lock); + if (kbdev->mgm_dev) module_put(kbdev->mgm_dev->owner); } KBASE_EXPORT_TEST_API(kbase_mem_term); /** - * @brief Allocate a free region object. + * Allocate a free region object. + * @rbtree: Backlink to the red-black tree of memory regions. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA * * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. @@ -1132,7 +1365,8 @@ static struct kbase_context *kbase_reg_flags_to_kctx( } /** - * @brief Free a region object. + * Free a region object. + * @reg: Region * * The described region must be freed of any mapping. * @@ -1143,6 +1377,13 @@ static struct kbase_context *kbase_reg_flags_to_kctx( */ void kbase_free_alloced_region(struct kbase_va_region *reg) { +#if MALI_USE_CSF + if ((reg->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_MCU_SHARED) { + kfree(reg); + return; + } +#endif if (!(reg->flags & KBASE_REG_FREE)) { struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); @@ -1152,8 +1393,12 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; - dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", (void *)reg); +#if MALI_USE_CSF + if (reg->flags & KBASE_REG_CSF_EVENT) + kbase_unlink_event_mem_page(kctx, reg); +#endif mutex_lock(&kctx->jit_evict_lock); @@ -1233,8 +1478,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 else attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); if (err) @@ -1260,7 +1505,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 if (err) goto bad_insert; - kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); + /* Note: mapping count is tracked at alias + * creation time + */ } else { err = kbase_mmu_insert_single_page(kctx, reg->start_pfn + i * stride, @@ -1319,13 +1566,6 @@ bad_insert: reg->start_pfn, reg->nr_pages, kctx->as_nr); - if (alloc->type == KBASE_MEM_TYPE_ALIAS) { - KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); - while (i--) - if (alloc->imported.alias.aliased[i].alloc) - kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc); - } - kbase_remove_va_region(reg); return err; @@ -1339,7 +1579,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { int err = 0; - size_t i; if (reg->start_pfn == 0) return 0; @@ -1364,10 +1603,9 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* Update tracking, and other cleanup, depending on memory type. */ switch (reg->gpu_alloc->type) { case KBASE_MEM_TYPE_ALIAS: - KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); - for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) - if (reg->gpu_alloc->imported.alias.aliased[i].alloc) - kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); + /* We mark the source allocs as unmapped from the GPU when + * putting reg's allocs + */ break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { struct kbase_alloc_import_user_buf *user_buf = @@ -1404,7 +1642,7 @@ static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( unsigned long map_start; size_t map_size; - lockdep_assert_held(¤t->mm->mmap_sem); + lockdep_assert_held(kbase_mem_get_process_mmap_lock()); if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ return NULL; @@ -1676,9 +1914,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re { int err; - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); - dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); @@ -1724,7 +1962,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_EXPORT_TEST_API(kbase_mem_free_region); /** - * @brief Free the region from the GPU and unregister it. + * Free the region from the GPU and unregister it. + * @kctx: KBase context + * @gpu_addr: GPU address to free * * This function implements the free operation on a memory segment. * It will loudly fail if called with outstanding mappings. @@ -1735,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { @@ -1743,7 +1983,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) return -EINVAL; } - if (0 == gpu_addr) { + if (gpu_addr == 0) { dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); return -EINVAL; } @@ -1796,7 +2036,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_free); int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long flags) { - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(reg != NULL); KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); @@ -1835,9 +2075,25 @@ int kbase_update_region_flags(struct kbase_context *kctx, reg->flags |= KBASE_REG_SHARE_IN; } +#if !MALI_USE_CSF if (flags & BASE_MEM_TILER_ALIGN_TOP) reg->flags |= KBASE_REG_TILER_ALIGN_TOP; +#endif /* !MALI_USE_CSF */ + +#if MALI_USE_CSF + if (flags & BASE_MEM_CSF_EVENT) { + reg->flags |= KBASE_REG_CSF_EVENT; + reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + if (!(reg->flags & KBASE_REG_SHARE_BOTH)) { + /* On non coherent platforms need to map as uncached on + * both sides. + */ + reg->flags &= ~KBASE_REG_CPU_CACHED; + reg->flags &= ~KBASE_REG_GPU_CACHED; + } + } +#endif /* Set up default MEMATTR usage */ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { @@ -1851,6 +2107,13 @@ int kbase_update_region_flags(struct kbase_context *kctx, "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); return -EINVAL; } +#if MALI_USE_CSF + } else if (reg->flags & KBASE_REG_CSF_EVENT) { + WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH)); + + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); +#endif } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && (reg->flags & KBASE_REG_SHARE_BOTH)) { reg->flags |= @@ -1905,7 +2168,8 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, &kctx->kbdev->memdev.used_pages); /* Increase mm counters before we allocate pages so that this - * allocation is visible to the OOM killer */ + * allocation is visible to the OOM killer + */ kbase_process_page_usage_inc(kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; @@ -2033,6 +2297,9 @@ no_new_partial: (u64)new_page_count); alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + done: return 0; @@ -2209,6 +2476,9 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( (u64)new_page_count); alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + done: return new_pages; @@ -2303,7 +2573,7 @@ int kbase_free_phy_pages_helper( } /* early out if nothing to do */ - if (0 == nr_pages_to_free) + if (nr_pages_to_free == 0) return 0; start_free = alloc->pages + alloc->nents - nr_pages_to_free; @@ -2374,6 +2644,8 @@ int kbase_free_phy_pages_helper( kbdev, kctx->id, (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } return 0; @@ -2496,9 +2768,19 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, kbdev, kctx->id, (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); +#if MALI_USE_CSF +/** + * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. + * @alloc: The allocation for the imported user buffer. + */ +static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); +#endif void kbase_mem_kref_free(struct kref *kref) { @@ -2540,8 +2822,10 @@ void kbase_mem_kref_free(struct kref *kref) aliased = alloc->imported.alias.aliased; if (aliased) { for (i = 0; i < alloc->imported.alias.nents; i++) - if (aliased[i].alloc) + if (aliased[i].alloc) { + kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc); kbase_mem_phy_alloc_put(aliased[i].alloc); + } vfree(aliased); } break; @@ -2558,12 +2842,17 @@ void kbase_mem_kref_free(struct kref *kref) alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, + alloc); } dma_buf_detach(alloc->imported.umm.dma_buf, alloc->imported.umm.dma_attachment); dma_buf_put(alloc->imported.umm.dma_buf); break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: +#if MALI_USE_CSF + kbase_jd_user_buf_unpin_pages(alloc); +#endif if (alloc->imported.user_buf.mm) mmdrop(alloc->imported.user_buf.mm); if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) @@ -2587,7 +2876,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_kref_free); int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) { - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(reg != NULL); KBASE_DEBUG_ASSERT(vsize > 0); /* validate user provided arguments */ @@ -2600,7 +2889,7 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) goto out_term; - KBASE_DEBUG_ASSERT(0 != vsize); + KBASE_DEBUG_ASSERT(vsize != 0); if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) goto out_term; @@ -2643,22 +2932,37 @@ bool kbase_check_alloc_flags(unsigned long flags) /* GPU executable memory cannot: * - Be written by the GPU * - Be grown on GPU page fault - * - Have the top of its initial commit aligned to 'extent' */ + */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + return false; + +#if !MALI_USE_CSF + /* GPU executable memory also cannot have the top of its initial + * commit aligned to 'extension' + */ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & - (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_TILER_ALIGN_TOP))) + BASE_MEM_TILER_ALIGN_TOP)) return false; +#endif /* !MALI_USE_CSF */ /* To have an allocation lie within a 4GB chunk is required only for - * TLS memory, which will never be used to contain executable code - * and also used for Tiler heap. + * TLS memory, which will never be used to contain executable code. */ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & - (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) + BASE_MEM_PROT_GPU_EX)) return false; +#if !MALI_USE_CSF + /* TLS memory should also not be used for tiler heap */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + BASE_MEM_TILER_ALIGN_TOP)) + return false; +#endif /* !MALI_USE_CSF */ + /* GPU should have at least read or write access otherwise there is no - reason for allocating. */ + * reason for allocating. + */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; @@ -2666,14 +2970,15 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) return false; - /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported - * memory */ + /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory + */ if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) return false; /* Should not combine BASE_MEM_COHERENT_LOCAL with - * BASE_MEM_COHERENT_SYSTEM */ + * BASE_MEM_COHERENT_SYSTEM + */ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) return false; @@ -2699,12 +3004,15 @@ bool kbase_check_import_flags(unsigned long flags) if (flags & BASE_MEM_GROW_ON_GPF) return false; +#if !MALI_USE_CSF /* Imported memory cannot be aligned to the end of its initial commit */ if (flags & BASE_MEM_TILER_ALIGN_TOP) return false; +#endif /* !MALI_USE_CSF */ /* GPU should have at least read or write access otherwise there is no - reason for importing. */ + * reason for importing. + */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; @@ -2716,19 +3024,19 @@ bool kbase_check_import_flags(unsigned long flags) } int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 large_extent) + u64 va_pages, u64 commit_pages, u64 large_extension) { struct device *dev = kctx->kbdev->dev; int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; struct kbase_va_region test_reg; - /* kbase_va_region's extent member can be of variable size, so check against that type */ - test_reg.extent = large_extent; + /* kbase_va_region's extension member can be of variable size, so check against that type */ + test_reg.extension = large_extension; #define KBASE_MSG_PRE "GPU allocation attempted with " - if (0 == va_pages) { + if (va_pages == 0) { dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); return -EINVAL; } @@ -2740,7 +3048,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, } /* Note: commit_pages is checked against va_pages during - * kbase_alloc_phy_pages() */ + * kbase_alloc_phy_pages() + */ /* Limit GPU executable allocs to GPU PC size */ if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { @@ -2751,47 +3060,73 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, return -EINVAL; } - if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent == 0) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); + if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) { + dev_warn(dev, KBASE_MSG_PRE + "BASE_MEM_GROW_ON_GPF but extension == 0\n"); + return -EINVAL; + } + +#if !MALI_USE_CSF + if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) { + dev_warn(dev, KBASE_MSG_PRE + "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); return -EINVAL; } if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent != 0) { - dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); + test_reg.extension != 0) { + dev_warn( + dev, KBASE_MSG_PRE + "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n"); + return -EINVAL; + } +#else + if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) { + dev_warn(dev, KBASE_MSG_PRE + "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); return -EINVAL; } +#endif /* !MALI_USE_CSF */ +#if !MALI_USE_CSF /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ if (flags & BASE_MEM_TILER_ALIGN_TOP) { #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " - unsigned long small_extent; - - if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", - (unsigned long long)large_extent, - BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); + unsigned long small_extension; + + if (large_extension > + BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { + dev_warn(dev, + KBASE_MSG_PRE_FLAG + "extension==%lld pages exceeds limit %lld", + (unsigned long long)large_extension, + BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES); return -EINVAL; } /* For use with is_power_of_2, which takes unsigned long, so - * must ensure e.g. on 32-bit kernel it'll fit in that type */ - small_extent = (unsigned long)large_extent; + * must ensure e.g. on 32-bit kernel it'll fit in that type + */ + small_extension = (unsigned long)large_extension; - if (!is_power_of_2(small_extent)) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", - small_extent); + if (!is_power_of_2(small_extension)) { + dev_warn(dev, + KBASE_MSG_PRE_FLAG + "extension==%ld not a non-zero power of 2", + small_extension); return -EINVAL; } - if (commit_pages > large_extent) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", - (unsigned long)commit_pages, - (unsigned long)large_extent); + if (commit_pages > large_extension) { + dev_warn(dev, + KBASE_MSG_PRE_FLAG + "commit_pages==%ld exceeds extension==%ld", + (unsigned long)commit_pages, + (unsigned long)large_extension); return -EINVAL; } #undef KBASE_MSG_PRE_FLAG } +#endif /* !MALI_USE_CSF */ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { @@ -2805,7 +3140,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, } /** - * @brief Acquire the per-context region list lock + * Acquire the per-context region list lock + * @kctx: KBase context */ void kbase_gpu_vm_lock(struct kbase_context *kctx) { @@ -2816,7 +3152,8 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); /** - * @brief Release the per-context region list lock + * Release the per-context region list lock + * @kctx: KBase context */ void kbase_gpu_vm_unlock(struct kbase_context *kctx) { @@ -2826,7 +3163,7 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_jit_debugfs_data { int (*func)(struct kbase_jit_debugfs_data *); struct mutex lock; @@ -2879,7 +3216,7 @@ static ssize_t kbase_jit_debugfs_common_read(struct file *file, } size = scnprintf(data->buffer, sizeof(data->buffer), - "%llu,%llu,%llu", data->active_value, + "%llu,%llu,%llu\n", data->active_value, data->pool_value, data->destroy_value); } @@ -2983,19 +3320,23 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) { struct kbase_context *kctx = data->kctx; struct kbase_va_region *reg; +#if !MALI_USE_CSF mutex_lock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ mutex_lock(&kctx->jit_evict_lock); list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { data->active_value += reg->used_pages; } mutex_unlock(&kctx->jit_evict_lock); +#if !MALI_USE_CSF mutex_unlock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ return 0; } @@ -3012,7 +3353,9 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) struct kbase_context *kctx = data->kctx; struct kbase_va_region *reg; +#if !MALI_USE_CSF mutex_lock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { @@ -3031,14 +3374,16 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) } mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); +#if !MALI_USE_CSF mutex_unlock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ return 0; } KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, kbase_jit_debugfs_trim_get); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ void kbase_jit_debugfs_init(struct kbase_context *kctx) { @@ -3078,7 +3423,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) */ debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_phys_fops); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* * Debugfs entry for getting the number of pages used * by JIT allocations for estimating the physical pressure @@ -3093,7 +3438,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) */ debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_trim_fops); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } #endif /* CONFIG_DEBUG_FS */ @@ -3138,8 +3483,13 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->jit_destroy_head); INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); +#if MALI_USE_CSF + INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); + INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); +#else /* !MALI_USE_CSF */ INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); +#endif /* MALI_USE_CSF */ mutex_unlock(&kctx->jit_evict_lock); kctx->jit_max_allocations = 0; @@ -3153,25 +3503,29 @@ int kbase_jit_init(struct kbase_context *kctx) * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets * the alignment requirements. */ -static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, - struct kbase_va_region *walker, const struct base_jit_alloc_info *info) +static bool meet_size_and_tiler_align_top_requirements( + const struct kbase_va_region *walker, + const struct base_jit_alloc_info *info) { bool meet_reqs = true; if (walker->nr_pages != info->va_pages) meet_reqs = false; - else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { - size_t align = info->extent; + +#if !MALI_USE_CSF + if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { + size_t align = info->extension; size_t align_mask = align - 1; if ((walker->start_pfn + info->commit_pages) & align_mask) meet_reqs = false; } +#endif /* !MALI_USE_CSF */ return meet_reqs; } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Function will guarantee *@freed will not exceed @pages_needed */ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, @@ -3185,7 +3539,9 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, size_t to_free = 0u; size_t max_allowed_pages = old_pages; +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ lockdep_assert_held(&kctx->reg_lock); /* Is this a JIT allocation that has been reported on? */ @@ -3213,20 +3569,20 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { /* The GPU could report being ready to write to the next - * 'extent' sized chunk, but didn't actually write to it, so we - * can report up to 'extent' size pages more than the backed + * 'extension' sized chunk, but didn't actually write to it, so we + * can report up to 'extension' size pages more than the backed * size. * * Note, this is allowed to exceed reg->nr_pages. */ - max_allowed_pages += reg->extent; + max_allowed_pages += reg->extension; /* Also note that in these GPUs, the GPU may make a large (>1 * page) initial allocation but not actually write out to all * of it. Hence it might report that a much higher amount of * memory was used than actually was written to. This does not * result in a real warning because on growing this memory we - * round up the size of the allocation up to an 'extent' sized + * round up the size of the allocation up to an 'extension' sized * chunk, hence automatically bringing the backed size up to * the reported size. */ @@ -3308,8 +3664,12 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, struct kbase_va_region *reg, *tmp; size_t total_freed = 0; - kbase_gpu_vm_lock(kctx); - mutex_lock(&kctx->jit_evict_lock); +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { int err; size_t freed = 0u; @@ -3328,18 +3688,17 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, if (!pages_needed) break; } - mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); trace_mali_jit_trim(total_freed); return total_freed; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ static int kbase_jit_grow(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - struct kbase_va_region *reg) + const struct base_jit_alloc_info *info, + struct kbase_va_region *reg, + struct kbase_sub_alloc **prealloc_sas) { size_t delta; size_t pages_required; @@ -3347,15 +3706,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, struct kbase_mem_pool *pool; int ret = -ENOMEM; struct tagged_addr *gpu_pages; - struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; - int i; if (info->commit_pages > reg->nr_pages) { /* Attempted to grow larger than maximum size */ return -EINVAL; } - kbase_gpu_vm_lock(kctx); + lockdep_assert_held(&kctx->reg_lock); /* Make the physical backing no longer reclaimable */ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) @@ -3372,14 +3729,6 @@ static int kbase_jit_grow(struct kbase_context *kctx, pages_required = delta; #ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), - GFP_KERNEL); - if (!prealloc_sas[i]) - goto update_failed; - } - if (pages_required >= (SZ_2M / SZ_4K)) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ @@ -3405,15 +3754,18 @@ static int kbase_jit_grow(struct kbase_context *kctx, */ while (kbase_mem_pool_size(pool) < pages_required) { int pool_delta = pages_required - kbase_mem_pool_size(pool); + int ret; kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); + kbase_gpu_vm_unlock(kctx); + ret = kbase_mem_pool_grow(pool, pool_delta); + kbase_gpu_vm_lock(kctx); - if (kbase_mem_pool_grow(pool, pool_delta)) - goto update_failed_unlocked; + if (ret) + goto update_failed; - kbase_gpu_vm_lock(kctx); spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); } @@ -3456,14 +3808,9 @@ done: /* Update attributes of JIT allocation taken from the pool */ reg->initial_commit = info->commit_pages; - reg->extent = info->extent; + reg->extension = info->extension; update_failed: - kbase_gpu_vm_unlock(kctx); -update_failed_unlocked: - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) - kfree(prealloc_sas[i]); - return ret; } @@ -3492,9 +3839,9 @@ static void trace_jit_stats(struct kbase_context *kctx, max_allocations, alloc_count, va_pages, ph_pages); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** - * get_jit_backed_pressure() - calculate the physical backing of all JIT + * get_jit_phys_backing() - calculate the physical backing of all JIT * allocations * * @kctx: Pointer to the kbase context whose active JIT allocations will be @@ -3502,83 +3849,50 @@ static void trace_jit_stats(struct kbase_context *kctx, * * Return: number of pages that are committed by JIT allocations */ -static size_t get_jit_backed_pressure(struct kbase_context *kctx) +static size_t get_jit_phys_backing(struct kbase_context *kctx) { - size_t backed_pressure = 0; - int jit_id; - - lockdep_assert_held(&kctx->jctx.lock); + struct kbase_va_region *walker; + size_t backing = 0; - kbase_gpu_vm_lock(kctx); - for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) { - struct kbase_va_region *reg = kctx->jit_alloc[jit_id]; + lockdep_assert_held(&kctx->jit_evict_lock); - if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) { - /* If region has no report, be pessimistic */ - if (reg->used_pages == reg->nr_pages) { - backed_pressure += reg->nr_pages; - } else { - backed_pressure += - kbase_reg_current_backed_size(reg); - } - } + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + backing += kbase_reg_current_backed_size(walker); } - kbase_gpu_vm_unlock(kctx); - return backed_pressure; + return backing; } -/** - * jit_trim_necessary_pages() - calculate and trim the least pages possible to - * satisfy a new JIT allocation - * - * @kctx: Pointer to the kbase context - * @info: Pointer to JIT allocation information for the new allocation - * - * Before allocating a new just-in-time memory region or reusing a previous - * one, ensure that the total JIT physical page usage also will not exceed the - * pressure limit. - * - * If there are no reported-on allocations, then we already guarantee this will - * be the case - because our current pressure then only comes from the va_pages - * of each JIT region, hence JIT physical page usage is guaranteed to be - * bounded by this. - * - * However as soon as JIT allocations become "reported on", the pressure is - * lowered to allow new JIT regions to be allocated. It is after such a point - * that the total JIT physical page usage could (either now or in the future on - * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly - * allocated JIT regions. Hence, trim any "reported on" regions. - * - * Any pages freed will go into the pool and be allocated from there in - * kbase_mem_alloc(). - */ -static void jit_trim_necessary_pages(struct kbase_context *kctx, - const struct base_jit_alloc_info *info) +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages) { - size_t backed_pressure = 0; - size_t needed_pages = 0; + size_t jit_backing = 0; + size_t pages_to_trim = 0; - backed_pressure = get_jit_backed_pressure(kctx); +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + jit_backing = get_jit_phys_backing(kctx); /* It is possible that this is the case - if this is the first * allocation after "ignore_pressure_limit" allocation. */ - if (backed_pressure > kctx->jit_phys_pages_limit) { - needed_pages += - (backed_pressure - kctx->jit_phys_pages_limit) - + info->va_pages; + if (jit_backing > kctx->jit_phys_pages_limit) { + pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + + needed_pages; } else { - size_t backed_diff = - kctx->jit_phys_pages_limit - backed_pressure; + size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; - if (info->va_pages > backed_diff) - needed_pages += info->va_pages - backed_diff; + if (needed_pages > backed_diff) + pages_to_trim += needed_pages - backed_diff; } - if (needed_pages) { - size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, - needed_pages); + if (pages_to_trim) { + size_t trimmed_pages = + kbase_mem_jit_trim_pages(kctx, pages_to_trim); /* This should never happen - we already asserted that * we are not violating JIT pressure limit in earlier @@ -3586,10 +3900,10 @@ static void jit_trim_necessary_pages(struct kbase_context *kctx, * must have enough unused pages to satisfy the new * allocation */ - WARN_ON(trimmed_pages < needed_pages); + WARN_ON(trimmed_pages < pages_to_trim); } } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * jit_allow_allocate() - check whether basic conditions are satisfied to allow @@ -3606,10 +3920,14 @@ static bool jit_allow_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { +#if MALI_USE_CSF + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); +#else lockdep_assert_held(&kctx->jctx.lock); +#endif -#if MALI_JIT_PRESSURE_LIMIT - if (likely(!ignore_pressure_limit) && +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit && ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { dev_dbg(kctx->kbdev->dev, @@ -3618,7 +3936,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx, kctx->jit_phys_pages_limit); return false; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ @@ -3644,123 +3962,156 @@ static bool jit_allow_allocate(struct kbase_context *kctx, return true; } +static struct kbase_va_region * +find_reasonable_region(const struct base_jit_alloc_info *info, + struct list_head *pool_head, bool ignore_usage_id) +{ + struct kbase_va_region *closest_reg = NULL; + struct kbase_va_region *walker; + size_t current_diff = SIZE_MAX; + + list_for_each_entry(walker, pool_head, jit_node) { + if ((ignore_usage_id || + walker->jit_usage_id == info->usage_id) && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements(walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been met, + * it's suitable but other allocations might be a + * better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + closest_reg = walker; + } + + /* The allocation is an exact match */ + if (current_diff == 0) + break; + } + } + + return closest_reg; +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { struct kbase_va_region *reg = NULL; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; +#if MALI_USE_CSF + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); +#else lockdep_assert_held(&kctx->jctx.lock); +#endif if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; -#if MALI_JIT_PRESSURE_LIMIT - if (!ignore_pressure_limit) - jit_trim_necessary_pages(kctx, info); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) + goto end; + } +#endif + kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); /* * Scan the pool for an existing allocation which meets our * requirements and remove it. */ - if (info->usage_id != 0) { + if (info->usage_id != 0) /* First scan for an allocation with the same usage ID */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_usage_id == info->usage_id && - walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match */ - if (current_diff == 0) - break; - } - } - } + reg = find_reasonable_region(info, &kctx->jit_pool_head, false); - if (!reg) { + if (!reg) /* No allocation with the same usage ID, or usage IDs not in * use. Search for an allocation we can reuse. */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match, so stop - * looking. - */ - if (current_diff == 0) - break; - } - } - } + reg = find_reasonable_region(info, &kctx->jit_pool_head, true); if (reg) { +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t needed_pages = 0; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + int ret; + /* * Remove the found region from the pool and add it to the * active list. */ list_move(®->jit_node, &kctx->jit_active_head); + WARN_ON(reg->gpu_alloc->evicted); + /* * Remove the allocation from the eviction list as it's no * longer eligible for eviction. This must be done before * dropping the jit_evict_lock */ list_del_init(®->gpu_alloc->evict_node); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + if (info->commit_pages > reg->gpu_alloc->nents) + needed_pages = info->commit_pages - + reg->gpu_alloc->nents; + + /* Update early the recycled JIT region's estimate of + * used_pages to ensure it doesn't get trimmed + * undesirably. This is needed as the recycled JIT + * region has been added to the active list but the + * number of used pages for it would be zero, so it + * could get trimmed instead of other allocations only + * to be regrown later resulting in a breach of the JIT + * physical pressure limit. + * Also that trimming would disturb the accounting of + * physical pages, i.e. the VM stats, as the number of + * backing pages would have changed when the call to + * kbase_mem_evictable_unmark_reclaim is made. + * + * The second call to update pressure at the end of + * this function would effectively be a nop. + */ + kbase_jit_report_update_pressure( + kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + + kbase_jit_request_phys_increase_locked(kctx, + needed_pages); + } +#endif mutex_unlock(&kctx->jit_evict_lock); - if (kbase_jit_grow(kctx, info, reg) < 0) { + /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', + * so any state protected by that lock might need to be + * re-evaluated if more code is added here in future. + */ + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) + kbase_jit_done_phys_increase(kctx, needed_pages); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_unlock(kctx); + + if (ret < 0) { /* * An update to an allocation from the pool failed, * chances are slim a new allocation would fair any @@ -3770,10 +4121,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Undo the early change made to the recycled JIT + * region's estimate of used_pages. + */ + if (!ignore_pressure_limit) { + kbase_jit_report_update_pressure( + kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_lock(&kctx->jit_evict_lock); list_move(®->jit_node, &kctx->jit_pool_head); mutex_unlock(&kctx->jit_evict_lock); - return NULL; + reg = NULL; + goto end; } } else { /* No suitable JIT allocation was found so create a new one */ @@ -3783,15 +4145,28 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, BASEP_MEM_NO_USER_FREE; u64 gpu_addr; - mutex_unlock(&kctx->jit_evict_lock); - +#if !MALI_USE_CSF if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) flags |= BASE_MEM_TILER_ALIGN_TOP; +#endif /* !MALI_USE_CSF */ flags |= base_mem_group_id_set(kctx->jit_group_id); +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + flags |= BASEP_MEM_PERFORM_JIT_TRIM; + /* The corresponding call to 'done_phys_increase' would + * be made inside the kbase_mem_alloc(). + */ + kbase_jit_request_phys_increase_locked( + kctx, info->commit_pages); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, - info->extent, &flags, &gpu_addr); + info->extension, &flags, &gpu_addr); if (!reg) { /* Most likely not enough GPU virtual space left for * the new JIT allocation. @@ -3799,12 +4174,22 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); - return NULL; + goto end; } - mutex_lock(&kctx->jit_evict_lock); - list_add(®->jit_node, &kctx->jit_active_head); - mutex_unlock(&kctx->jit_evict_lock); + if (!ignore_pressure_limit) { + /* Due to enforcing of pressure limit, kbase_mem_alloc + * was instructed to perform the trimming which in turn + * would have ensured that the new JIT allocation is + * already in the jit_active_head list, so nothing to + * do here. + */ + WARN_ON(list_empty(®->jit_node)); + } else { + mutex_lock(&kctx->jit_evict_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } } trace_mali_jit_alloc(reg, info->id); @@ -3816,13 +4201,18 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; -#if MALI_JIT_PRESSURE_LIMIT + reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; +#if MALI_JIT_PRESSURE_LIMIT_BASE if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; reg->heap_info_gpu_addr = info->heap_info_gpu_addr; kbase_jit_report_update_pressure(kctx, reg, info->va_pages, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +end: + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); return reg; } @@ -3844,15 +4234,18 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) + if (delta) { + mutex_lock(&kctx->reg_lock); kbase_mem_shrink(kctx, reg, old_pages - delta); + mutex_unlock(&kctx->reg_lock); + } } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE reg->heap_info_gpu_addr = 0; kbase_jit_report_update_pressure(kctx, reg, 0, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; @@ -3863,6 +4256,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) kbase_gpu_vm_lock(kctx); reg->flags |= KBASE_REG_DONT_NEED; + reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); kbase_gpu_vm_unlock(kctx); @@ -3875,6 +4269,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) /* This allocation can't already be on a list. */ WARN_ON(!list_empty(®->gpu_alloc->evict_node)); list_add(®->gpu_alloc->evict_node, &kctx->evict_list); + atomic_add(reg->gpu_alloc->nents, &kctx->evict_nents); list_move(®->jit_node, &kctx->jit_pool_head); @@ -3962,6 +4357,9 @@ void kbase_jit_term(struct kbase_context *kctx) kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } +#if MALI_JIT_PRESSURE_LIMIT_BASE + WARN_ON(kctx->jit_phys_pages_to_be_allocated); +#endif mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); @@ -3972,7 +4370,7 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned int flags) { @@ -4015,16 +4413,18 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, out: return; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_used_pages, unsigned int flags) { u64 diff; +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ trace_mali_jit_report_pressure(reg, new_used_pages, kctx->jit_current_phys_pressure + new_used_pages - @@ -4053,19 +4453,22 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, } } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -bool kbase_has_exec_va_zone(struct kbase_context *kctx) +#if MALI_USE_CSF +static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) { - bool has_exec_va_zone; + if (alloc->nents) { + struct page **pages = alloc->imported.user_buf.pages; + long i; - kbase_gpu_vm_lock(kctx); - has_exec_va_zone = (kctx->exec_va_start != U64_MAX); - kbase_gpu_vm_unlock(kctx); + WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); - return has_exec_va_zone; + for (i = 0; i < alloc->nents; i++) + put_page(pages[i]); + } } - +#endif int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, struct kbase_va_region *reg) @@ -4090,7 +4493,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) return -EINVAL; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, @@ -4102,24 +4505,30 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); #endif -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) +#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL); -#else +#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL, NULL); +#else + pinned_pages = get_user_pages_remote(mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL, NULL); #endif if (pinned_pages <= 0) @@ -4232,12 +4641,16 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, DMA_BIDIRECTIONAL); if (writeable) set_page_dirty_lock(pages[i]); +#if !MALI_USE_CSF put_page(pages[i]); pages[i] = NULL; +#endif size -= local_size; } +#if !MALI_USE_CSF alloc->nents = 0; +#endif } int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, @@ -4296,7 +4709,8 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( goto exit; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + if (reg->gpu_alloc->imported.user_buf + .current_mapping_usage_count == 1) { err = kbase_jd_user_buf_map(kctx, reg); if (err) { reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; @@ -4331,7 +4745,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { alloc->imported.user_buf.current_mapping_usage_count--; - if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + if (alloc->imported.user_buf.current_mapping_usage_count == 0) { bool writeable = true; if (!kbase_is_region_invalid_or_free(reg) && diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h index 6e921ec..e9ac809 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_mem.h - * Base kernel memory APIs + * DOC: Base kernel memory APIs */ #ifndef _KBASE_MEM_H_ @@ -35,7 +31,7 @@ #endif #include -#include "mali_base_kernel.h" +#include #include #include "mali_kbase_pm.h" #include "mali_kbase_defs.h" @@ -48,10 +44,13 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ -/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. -The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and -page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table -updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ +/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by + * 8 pages. The MMU reads in 8 page table entries from memory at a time, if we + * have more than one page fault within the same 8 pages and page tables are + * updated accordingly, the MMU does not re-read the page table entries from + * memory for the subsequent page table updates and generates duplicate page + * faults as the page table information used by the MMU is not valid. + */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ @@ -60,7 +59,8 @@ updates and generates duplicate page faults as the page table information used b #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) -/** + +/* * A CPU mapping */ struct kbase_cpu_mapping { @@ -81,16 +81,15 @@ enum kbase_memory_type { }; /* internal structure, mirroring base_mem_aliasing_info, - * but with alloc instead of a gpu va (handle) */ + * but with alloc instead of a gpu va (handle) + */ struct kbase_aliased { struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ u64 offset; /* in pages */ u64 length; /* in pages */ }; -/** - * @brief Physical pages tracking object properties - */ +/* Physical pages tracking object properties */ #define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1u << 0) #define KBASE_MEM_PHY_ALLOC_LARGE (1u << 1) @@ -105,7 +104,13 @@ struct kbase_aliased { * updated as part of the change. * * @kref: number of users of this alloc - * @gpu_mappings: count number of times mapped on the GPU + * @gpu_mappings: count number of times mapped on the GPU. Indicates the number + * of references there are to the physical pages from different + * GPU VA regions. + * @kernel_mappings: count number of times mapped on the CPU, specifically in + * the kernel. Indicates the number of references there are + * to the physical pages to prevent flag changes or shrink + * while maps are still held. * @nents: 0..N * @pages: N elements, only 0..nents are valid * @mappings: List of CPU mappings of this physical memory allocation. @@ -128,6 +133,7 @@ struct kbase_aliased { struct kbase_mem_phy_alloc { struct kref kref; atomic_t gpu_mappings; + atomic_t kernel_mappings; size_t nents; struct tagged_addr *pages; struct list_head mappings; @@ -141,6 +147,7 @@ struct kbase_mem_phy_alloc { union { struct { + struct kbase_context *kctx; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; unsigned int current_mapping_usage_count; @@ -210,12 +217,36 @@ static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc * KBASE_DEBUG_ASSERT(alloc); /* we only track mappings of NATIVE buffers */ if (alloc->type == KBASE_MEM_TYPE_NATIVE) - if (0 > atomic_dec_return(&alloc->gpu_mappings)) { + if (atomic_dec_return(&alloc->gpu_mappings) < 0) { pr_err("Mismatched %s:\n", __func__); dump_stack(); } } +/** + * kbase_mem_phy_alloc_kernel_mapped - Increment kernel_mappings + * counter for a memory region to prevent commit and flag changes + * + * @alloc: Pointer to physical pages tracking object + */ +static inline void +kbase_mem_phy_alloc_kernel_mapped(struct kbase_mem_phy_alloc *alloc) +{ + atomic_inc(&alloc->kernel_mappings); +} + +/** + * kbase_mem_phy_alloc_kernel_unmapped - Decrement kernel_mappings + * counter for a memory region to allow commit and flag changes + * + * @alloc: Pointer to physical pages tracking object + */ +static inline void +kbase_mem_phy_alloc_kernel_unmapped(struct kbase_mem_phy_alloc *alloc) +{ + WARN_ON(atomic_dec_return(&alloc->kernel_mappings) < 0); +} + /** * kbase_mem_is_imported - Indicate whether a memory type is imported * @@ -248,7 +279,7 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m } /** - * A GPU memory region, and attributes for CPU mappings. + * struct kbase_va_region - A GPU memory region, and attributes for CPU mappings * * @rblink: Node in a red-black tree of memory regions within the same zone of * the GPU's virtual address space. @@ -262,13 +293,31 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m * @threshold_pages: If non-zero and the amount of memory committed to a region * that can grow on page fault exceeds this number of pages * then the driver switches to incremental rendering. - * @extent: Number of pages allocated on page fault. + * @flags: Flags + * @extension: Number of pages allocated on page fault. * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region. * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region. * @jit_node: Links to neighboring regions in the just-in-time memory pool. * @jit_usage_id: The last just-in-time memory usage ID for this region. * @jit_bin_id: The just-in-time memory bin this region came from. * @va_refcnt: Number of users of this region. Protected by reg_lock. + * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of + * an allocated region + * The object can be one of: + * - u32 value defining the size of the region + * - u64 pointer first unused byte in the region + * The interpretation of the object depends on + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE flag in + * jit_info_flags - if it is set, the heap info object + * should be interpreted as size. + * @used_pages: The current estimate of the number of pages used, which in + * normal use is either: + * - the initial estimate == va_pages + * - the actual pages used, as found by a JIT usage report + * Note that since the value is calculated from GPU memory after a + * JIT usage report, at any point in time it is allowed to take a + * random value that is no greater than va_pages (e.g. it may be + * greater than gpu_alloc->nents) */ struct kbase_va_region { struct rb_node rblink; @@ -308,8 +357,13 @@ struct kbase_va_region { #define KBASE_REG_SHARE_BOTH (1ul << 10) /* Space for 4 different zones */ -#define KBASE_REG_ZONE_MASK (3ul << 11) -#define KBASE_REG_ZONE(x) (((x) & 3) << 11) +#define KBASE_REG_ZONE_MASK ((KBASE_REG_ZONE_MAX - 1ul) << 11) +#define KBASE_REG_ZONE(x) (((x) & (KBASE_REG_ZONE_MAX - 1ul)) << 11) +#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11) + +#if ((KBASE_REG_ZONE_MAX - 1) & 0x3) != (KBASE_REG_ZONE_MAX - 1) +#error KBASE_REG_ZONE_MAX too large for allocation of KBASE_REG_<...> bits +#endif /* GPU read access */ #define KBASE_REG_GPU_RD (1ul<<13) @@ -328,14 +382,29 @@ struct kbase_va_region { /* Imported buffer is padded? */ #define KBASE_REG_IMPORT_PAD (1ul << 21) +#if MALI_USE_CSF +/* CSF event memory */ +#define KBASE_REG_CSF_EVENT (1ul << 22) +#else /* Bit 22 is reserved. * - * Do not remove, use the next unreserved bit for new flags */ + * Do not remove, use the next unreserved bit for new flags + */ #define KBASE_REG_RESERVED_BIT_22 (1ul << 22) +#endif -/* The top of the initial commit is aligned to extent pages. - * Extent must be a power of 2 */ +#if !MALI_USE_CSF +/* The top of the initial commit is aligned to extension pages. + * Extent must be a power of 2 + */ #define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) +#else +/* Bit 23 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) +#endif /* !MALI_USE_CSF */ /* Whilst this flag is set the GPU allocation is not supposed to be freed by * user space. The flag will remain set for the lifetime of JIT allocations. @@ -367,6 +436,9 @@ struct kbase_va_region { */ #define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) +/* Allocation is actively used for JIT memory */ +#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) + #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -390,15 +462,21 @@ struct kbase_va_region { #define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) #define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ +#if MALI_USE_CSF +#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3) +#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) +#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \ + KBASE_REG_ZONE_MCU_SHARED_BASE) +#endif unsigned long flags; - size_t extent; + size_t extension; struct kbase_mem_phy_alloc *cpu_alloc; struct kbase_mem_phy_alloc *gpu_alloc; struct list_head jit_node; u16 jit_usage_id; u8 jit_bin_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Pointer to an object in GPU memory defining an end of an allocated * region * @@ -423,7 +501,7 @@ struct kbase_va_region { * gpu_alloc->nents) */ size_t used_pages; -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ int va_refcnt; }; @@ -471,7 +549,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get( WARN_ON(!region->va_refcnt); /* non-atomic as kctx->reg_lock is held */ - dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %p\n", + dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n", region->va_refcnt, (void *)region); region->va_refcnt++; @@ -488,7 +566,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put( /* non-atomic as kctx->reg_lock is held */ region->va_refcnt--; - dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %p\n", + dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n", region->va_refcnt, (void *)region); if (!region->va_refcnt) kbase_region_refcnt_free(region); @@ -580,6 +658,7 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create( kref_init(&alloc->kref); atomic_set(&alloc->gpu_mappings, 0); + atomic_set(&alloc->kernel_mappings, 0); alloc->nents = 0; alloc->pages = (void *)(alloc + 1); INIT_LIST_HEAD(&alloc->mappings); @@ -1019,7 +1098,9 @@ struct kbase_va_region *kbase_find_region_enclosing_address( struct rb_root *rbtree, u64 gpu_addr); /** - * @brief Check that a pointer is actually a valid region. + * Check that a pointer is actually a valid region. + * @kctx: kbase context containing the region + * @gpu_addr: pointer to check * * Must be called with context lock held. */ @@ -1048,7 +1129,7 @@ bool kbase_check_import_flags(unsigned long flags); * @flags: The flags passed from user space * @va_pages: The size of the requested region, in pages. * @commit_pages: Number of pages to commit initially. - * @extent: Number of pages to grow by on GPU page fault and/or alignment + * @extension: Number of pages to grow by on GPU page fault and/or alignment * (depending on flags) * * Makes checks on the size parameters passed in from user space for a memory @@ -1057,7 +1138,7 @@ bool kbase_check_import_flags(unsigned long flags); * Return: 0 if sizes are valid for these flags, negative error code otherwise */ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 extent); + u64 va_pages, u64 commit_pages, u64 extension); /** * kbase_update_region_flags - Convert user space flags to kernel region flags @@ -1080,14 +1161,21 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx); int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); /** - * @brief Register region and map it on the GPU. + * Register region and map it on the GPU. + * @kctx: kbase context containing the region + * @reg: the region to add + * @addr: the address to insert the region at + * @nr_pages: the number of pages in the region + * @align: the minimum alignment in pages * * Call kbase_add_va_region() and map the region on the GPU. */ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); /** - * @brief Remove the region from the GPU and unregister it. + * Remove the region from the GPU and unregister it. + * @kctx: KBase context + * @reg: The region to remove * * Must be called with context lock held. */ @@ -1136,20 +1224,23 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); -/** Dump the MMU tables to a buffer +/** + * kbase_mmu_dump() - Dump the MMU tables to a buffer. * - * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the - * buffer is too small then the return value will be NULL. + * This function allocates a buffer (of @c nr_pages pages) to hold a dump + * of the MMU tables and fills it. If the buffer is too small + * then the return value will be NULL. * * The GPU vm lock must be held when calling this function. * - * The buffer returned should be freed with @ref vfree when it is no longer required. + * The buffer returned should be freed with @ref vfree when it is no longer + * required. * - * @param[in] kctx The kbase context to dump - * @param[in] nr_pages The number of pages to allocate for the buffer. + * @kctx: The kbase context to dump + * @nr_pages: The number of pages to allocate for the buffer. * - * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too - * small) + * Return: The address of the buffer containing the MMU dump or NULL on error + * (including if the @c nr_pages is too small) */ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); @@ -1174,25 +1265,27 @@ void kbase_os_mem_map_lock(struct kbase_context *kctx); void kbase_os_mem_map_unlock(struct kbase_context *kctx); /** - * @brief Update the memory allocation counters for the current process + * kbasep_os_process_page_usage_update() - Update the memory allocation + * counters for the current process. * - * OS specific call to updates the current memory allocation counters for the current process with - * the supplied delta. + * OS specific call to updates the current memory allocation counters + * for the current process with the supplied delta. * - * @param[in] kctx The kbase context - * @param[in] pages The desired delta to apply to the memory usage counters. + * @kctx: The kbase context + * @pages: The desired delta to apply to the memory usage counters. */ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); /** - * @brief Add to the memory allocation counters for the current process + * kbase_process_page_usage_inc() - Add to the memory allocation counters for + * the current process * - * OS specific call to add to the current memory allocation counters for the current process by - * the supplied amount. + * OS specific call to add to the current memory allocation counters for + * the current process by the supplied amount. * - * @param[in] kctx The kernel base context used for the allocation. - * @param[in] pages The desired delta to apply to the memory usage counters. + * @kctx: The kernel base context used for the allocation. + * @pages: The desired delta to apply to the memory usage counters. */ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) @@ -1201,13 +1294,14 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int } /** - * @brief Subtract from the memory allocation counters for the current process + * kbase_process_page_usage_dec() - Subtract from the memory allocation + * counters for the current process. * - * OS specific call to subtract from the current memory allocation counters for the current process by - * the supplied amount. + * OS specific call to subtract from the current memory allocation counters + * for the current process by the supplied amount. * - * @param[in] kctx The kernel base context used for the allocation. - * @param[in] pages The desired delta to apply to the memory usage counters. + * @kctx: The kernel base context used for the allocation. + * @pages: The desired delta to apply to the memory usage counters. */ static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) @@ -1332,15 +1426,15 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( struct kbase_sub_alloc **prealloc_sa); /** -* @brief Free physical pages. -* -* Frees \a nr_pages and updates the alloc object. -* -* @param[in] alloc allocation object to free pages from -* @param[in] nr_pages_to_free number of physical pages to free -* -* Return: 0 on success, otherwise a negative error code -*/ + * kbase_free_phy_pages_helper() - Free physical pages. + * + * Frees \a nr_pages and updates the alloc object. + * + * @alloc: allocation object to free pages from + * @nr_pages_to_free: number of physical pages to free + * + * Return: 0 on success, otherwise a negative error code + */ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); /** @@ -1370,7 +1464,8 @@ static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the * private field stays the same. So we have to be clever and * use the fact that we only store DMA addresses of whole pages, - * so the low bits should be zero */ + * so the low bits should be zero + */ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); set_page_private(p, dma_addr >> PAGE_SHIFT); } else { @@ -1392,26 +1487,11 @@ static inline void kbase_clear_dma_addr(struct page *p) } /** - * @brief Process a page fault. - * - * @param[in] data work_struct passed by queue_work() - */ -void page_fault_worker(struct work_struct *data); - -/** - * @brief Process a bus fault. - * - * @param[in] data work_struct passed by queue_work() - */ -void bus_fault_worker(struct work_struct *data); - -/** - * @brief Flush MMU workqueues. + * kbase_flush_mmu_wqs() - Flush MMU workqueues. + * @kbdev: Device pointer. * * This function will cause any outstanding page or bus faults to be processed. * It should be called prior to powering off the GPU. - * - * @param[in] kbdev Device pointer */ void kbase_flush_mmu_wqs(struct kbase_device *kbdev); @@ -1437,7 +1517,7 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) /** * kbase_jit_debugfs_init - Add per context debugfs entry for JIT. * @kctx: kbase context @@ -1497,7 +1577,7 @@ bool kbase_jit_evict(struct kbase_context *kctx); */ void kbase_jit_term(struct kbase_context *kctx); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of * kbase_trace_jit_report_gpu_mem() that should only be called once the @@ -1508,7 +1588,7 @@ void kbase_jit_term(struct kbase_context *kctx); */ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned int flags); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used @@ -1530,7 +1610,7 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, * been included. Also gives no opportunity for the compiler to mess up * inlining it. */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ do { \ if (trace_mali_jit_report_gpu_mem_enabled()) \ @@ -1540,9 +1620,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, #else #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \ CSTD_NOP(kctx, reg, flags) -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** * kbase_jit_report_update_pressure - safely update the JIT physical page * pressure and JIT region's estimate of used_pages @@ -1562,7 +1642,127 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_used_pages, unsigned int flags); -#endif /* MALI_JIT_PRESSURE_LIMIT */ + +/** + * jit_trim_necessary_pages() - calculate and trim the least pages possible to + * satisfy a new JIT allocation + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of JIT physical pages by which trimming is requested. + * The actual number of pages trimmed could differ. + * + * Before allocating a new just-in-time memory region or reusing a previous + * one, ensure that the total JIT physical page usage also will not exceed the + * pressure limit. + * + * If there are no reported-on allocations, then we already guarantee this will + * be the case - because our current pressure then only comes from the va_pages + * of each JIT region, hence JIT physical page usage is guaranteed to be + * bounded by this. + * + * However as soon as JIT allocations become "reported on", the pressure is + * lowered to allow new JIT regions to be allocated. It is after such a point + * that the total JIT physical page usage could (either now or in the future on + * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly + * allocated JIT regions. Hence, trim any "reported on" regions. + * + * Any pages freed will go into the pool and be allocated from there in + * kbase_mem_alloc(). + */ +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages); + +/* + * Same as kbase_jit_request_phys_increase(), except that Caller is supposed + * to take jit_evict_lock also on @kctx before calling this function. + */ +static inline void +kbase_jit_request_phys_increase_locked(struct kbase_context *kctx, + size_t needed_pages) +{ +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + kctx->jit_phys_pages_to_be_allocated += needed_pages; + + kbase_jit_trim_necessary_pages(kctx, + kctx->jit_phys_pages_to_be_allocated); +} + +/** + * kbase_jit_request_phys_increase() - Increment the backing pages count and do + * the required trimming before allocating pages for a JIT allocation. + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of pages to be allocated for the JIT allocation. + * + * This function needs to be called before allocating backing pages for a + * just-in-time memory region. The backing pages are currently allocated when, + * + * - A new JIT region is created. + * - An old JIT region is reused from the cached pool. + * - GPU page fault occurs for the active JIT region. + * - Backing is grown for the JIT region through the commit ioctl. + * + * This function would ensure that the total JIT physical page usage does not + * exceed the pressure limit even when the backing pages get allocated + * simultaneously for multiple JIT allocations from different threads. + * + * There should be a matching call to kbase_jit_done_phys_increase(), after + * the pages have been allocated and accounted against the active JIT + * allocation. + * + * Caller is supposed to take reg_lock on @kctx before calling this function. + */ +static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx, + size_t needed_pages) +{ +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + + mutex_lock(&kctx->jit_evict_lock); + kbase_jit_request_phys_increase_locked(kctx, needed_pages); + mutex_unlock(&kctx->jit_evict_lock); +} + +/** + * kbase_jit_done_phys_increase() - Decrement the backing pages count after the + * allocation of pages for a JIT allocation. + * + * @kctx: Pointer to the kbase context + * @needed_pages: Number of pages that were allocated for the JIT allocation. + * + * This function should be called after backing pages have been allocated and + * accounted against the active JIT allocation. + * The call should be made when the following have been satisfied: + * when the allocation is on the jit_active_head. + * when additional needed_pages have been allocated. + * kctx->reg_lock was held during the above and has not yet been unlocked. + * Failure to call this function before unlocking the kctx->reg_lock when + * either the above have changed may result in over-accounting the memory. + * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of + * the memory. + * + * A matching call to kbase_jit_request_phys_increase() should have been made, + * before the allocation of backing pages. + * + * Caller is supposed to take reg_lock on @kctx before calling this function. + */ +static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx, + size_t needed_pages) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages); + + kctx->jit_phys_pages_to_be_allocated -= needed_pages; +} +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * kbase_has_exec_va_zone - EXEC_VA zone predicate @@ -1693,6 +1893,63 @@ static inline void kbase_mem_pool_unlock(struct kbase_mem_pool *pool) */ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc); +#if MALI_USE_CSF +/** + * kbase_link_event_mem_page - Add the new event memory region to the per + * context list of event pages. + * @kctx: Pointer to kbase context + * @reg: Pointer to the region allocated for event memory. + * + * The region being linked shouldn't have been marked as free and should + * have KBASE_REG_CSF_EVENT flag set for it. + */ +static inline void kbase_link_event_mem_page(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(reg->flags & KBASE_REG_FREE); + WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); + + list_add(®->link, &kctx->csf.event_pages_head); +} + +/** + * kbase_unlink_event_mem_page - Remove the event memory region from the per + * context list of event pages. + * @kctx: Pointer to kbase context + * @reg: Pointer to the region allocated for event memory. + * + * The region being un-linked shouldn't have been marked as free and should + * have KBASE_REG_CSF_EVENT flag set for it. + */ +static inline void kbase_unlink_event_mem_page(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + lockdep_assert_held(&kctx->reg_lock); + + WARN_ON(reg->flags & KBASE_REG_FREE); + WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT)); + + list_del(®->link); +} + +/** + * kbase_mcu_shared_interface_region_tracker_init - Initialize the rb tree to + * manage the shared interface segment of MCU firmware address space. + * @kbdev: Pointer to the kbase device + * + * Returns zero on success or negative error number on failure. + */ +int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev); + +/** + * kbase_mcu_shared_interface_region_tracker_term - Teardown the rb tree + * managing the shared interface segment of MCU firmware address space. + * @kbdev: Pointer to the kbase device + */ +void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev); +#endif /** * kbase_mem_umm_map - Map dma-buf @@ -1742,7 +1999,6 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, int kbase_mem_do_sync_imported(struct kbase_context *kctx, struct kbase_va_region *reg, enum kbase_sync_type sync_fn); - /** * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to * an unaligned address at a given offset from the start of a target page. @@ -1768,4 +2024,76 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, void *src_page, size_t *to_copy, unsigned int nr_pages, unsigned int *target_page_nr, size_t offset); +/** + * kbase_ctx_reg_zone_end_pfn - return the end Page Frame Number of @zone + * @zone: zone to query + * + * Return: The end of the zone corresponding to @zone + */ +static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) +{ + return zone->base_pfn + zone->va_size_pages; +} + +/** + * kbase_ctx_reg_zone_init - initialize a zone in @kctx + * @kctx: Pointer to kbase context + * @zone_bits: A KBASE_REG_ZONE_<...> to initialize + * @base_pfn: Page Frame Number in GPU virtual address space for the start of + * the Zone + * @va_size_pages: Size of the Zone in pages + */ +static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx, + unsigned long zone_bits, + u64 base_pfn, u64 va_size_pages) +{ + struct kbase_reg_zone *zone; + + lockdep_assert_held(&kctx->reg_lock); + WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); + + zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; + *zone = (struct kbase_reg_zone){ + .base_pfn = base_pfn, .va_size_pages = va_size_pages, + }; +} + +/** + * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does + * not have @kctx 's region lock + * @kctx: Pointer to kbase context + * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve + * + * This should only be used in performance-critical paths where the code is + * resilient to a race with the zone changing. + * + * Return: The zone corresponding to @zone_bits + */ +static inline struct kbase_reg_zone * +kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, + unsigned long zone_bits) +{ + WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); + + return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; +} + +/** + * kbase_ctx_reg_zone_get - get a zone from @kctx + * @kctx: Pointer to kbase context + * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve + * + * The get is not refcounted - there is no corresponding 'put' operation + * + * Return: The zone corresponding to @zone_bits + */ +static inline struct kbase_reg_zone * +kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) +{ + lockdep_assert_held(&kctx->reg_lock); + WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); + + return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; +} + #endif /* _KBASE_MEM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index b669f2a..21302c1 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_mem_linux.c - * Base kernel memory APIs, Linux implementation. + * DOC: Base kernel memory APIs, Linux implementation. */ #include @@ -35,10 +31,9 @@ #include #include #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \ - (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)) +#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE) #include -#endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */ +#endif /* LINUX_VERSION_CODE < 4.8.0 */ #include #include #include @@ -47,8 +42,11 @@ #include #include #include -#include +#include #include +#include +#include +#include #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \ (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)) @@ -87,6 +85,12 @@ #define IR_THRESHOLD_STEPS (256u) +#if MALI_USE_CSF +static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, + struct vm_area_struct *vma); +static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, + struct vm_area_struct *vma); +#endif static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg, u64 offset_bytes, size_t size, @@ -107,6 +111,25 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, static struct kbase_va_region *kbase_find_event_mem_region( struct kbase_context *kctx, u64 gpu_addr) { +#if MALI_USE_CSF + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct kbase_va_region *reg; + + lockdep_assert_held(&kctx->reg_lock); + + list_for_each_entry(reg, &kctx->csf.event_pages_head, link) { + if ((reg->start_pfn <= gpu_pfn) && + (gpu_pfn < (reg->start_pfn + reg->nr_pages))) { + if (WARN_ON(reg->flags & KBASE_REG_FREE)) + return NULL; + + if (WARN_ON(!(reg->flags & KBASE_REG_CSF_EVENT))) + return NULL; + + return reg; + } + } +#endif return NULL; } @@ -269,8 +292,8 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx, } struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, - u64 *gpu_va) + u64 va_pages, u64 commit_pages, + u64 extension, u64 *flags, u64 *gpu_va) { int zone; struct kbase_va_region *reg; @@ -282,15 +305,20 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(gpu_va); dev = kctx->kbdev->dev; - dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n", - va_pages, commit_pages, extent, *flags); + dev_dbg(dev, + "Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n", + va_pages, commit_pages, extension, *flags); +#if MALI_USE_CSF + *gpu_va = 0; /* return 0 on failure */ +#else if (!(*flags & BASE_MEM_FLAG_MAP_FIXED)) *gpu_va = 0; /* return 0 on failure */ else dev_err(dev, "Keeping requested GPU VA of 0x%llx\n", (unsigned long long)*gpu_va); +#endif if (!kbase_check_alloc_flags(*flags)) { dev_warn(dev, @@ -299,7 +327,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto bad_flags; } -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) { /* Mask coherency flags if infinite cache is enabled to prevent * the skipping of syncs from BASE side. @@ -325,7 +353,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, *flags &= ~BASE_MEM_COHERENT_SYSTEM; } - if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent)) + if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, + extension)) goto bad_sizes; #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED @@ -363,6 +392,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, goto prepare_failed; } + if (unlikely(reg->cpu_alloc != reg->gpu_alloc)) + *flags |= BASE_MEM_KERNEL_SYNC; + + /* make sure base knows if the memory is actually cached or not */ + if (reg->flags & KBASE_REG_CPU_CACHED) + *flags |= BASE_MEM_CACHED_CPU; + else + *flags &= ~BASE_MEM_CACHED_CPU; + if (*flags & BASE_MEM_GROW_ON_GPF) { unsigned int const ir_threshold = atomic_read( &kctx->kbdev->memdev.ir_threshold); @@ -372,12 +410,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, } else reg->threshold_pages = 0; - if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) { - /* kbase_check_alloc_sizes() already checks extent is valid for - * assigning to reg->extent */ - reg->extent = extent; + if (*flags & BASE_MEM_GROW_ON_GPF) { + /* kbase_check_alloc_sizes() already checks extension is valid for + * assigning to reg->extension + */ + reg->extension = extension; +#if !MALI_USE_CSF + } else if (*flags & BASE_MEM_TILER_ALIGN_TOP) { + reg->extension = extension; +#endif /* !MALI_USE_CSF */ } else { - reg->extent = 0; + reg->extension = 0; } if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) { @@ -404,7 +447,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, } } - /* mmap needed to setup VA? */ if (*flags & BASE_MEM_SAME_VA) { unsigned long cookie, cookie_nr; @@ -436,6 +478,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, *gpu_va = reg->start_pfn << PAGE_SHIFT; } +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { + kbase_jit_done_phys_increase(kctx, commit_pages); + + mutex_lock(&kctx->jit_evict_lock); + WARN_ON(!list_empty(®->jit_node)); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_unlock(kctx); return reg; @@ -443,6 +496,13 @@ no_mmap: no_cookie: no_kern_mapping: no_mem: +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) { + kbase_gpu_vm_lock(kctx); + kbase_jit_done_phys_increase(kctx, commit_pages); + kbase_gpu_vm_unlock(kctx); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); invalid_flags: @@ -511,21 +571,36 @@ int kbase_mem_query(struct kbase_context *kctx, *out |= BASE_MEM_COHERENT_SYSTEM; if (KBASE_REG_SHARE_IN & reg->flags) *out |= BASE_MEM_COHERENT_LOCAL; - if (kctx->api_version >= KBASE_API_VERSION(11, 2)) { - /* Prior to 11.2, these were known about by user-side - * but we did not return them. Returning some of these - * caused certain clients that were not expecting them - * to fail, so we omit all of them as a special-case - * for compatibility reasons */ + if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) { + /* Prior to this version, this was known about by + * user-side but we did not return them. Returning + * it caused certain clients that were not expecting + * it to fail, so we omit it as a special-case for + * compatibility reasons + */ if (KBASE_REG_PF_GROW & reg->flags) *out |= BASE_MEM_GROW_ON_GPF; + } + if (mali_kbase_supports_mem_protected(kctx->api_version)) { + /* Prior to this version, this was known about by + * user-side but we did not return them. Returning + * it caused certain clients that were not expecting + * it to fail, so we omit it as a special-case for + * compatibility reasons + */ if (KBASE_REG_PROTECTED & reg->flags) *out |= BASE_MEM_PROTECTED; } +#if !MALI_USE_CSF if (KBASE_REG_TILER_ALIGN_TOP & reg->flags) *out |= BASE_MEM_TILER_ALIGN_TOP; +#endif /* !MALI_USE_CSF */ if (!(KBASE_REG_GPU_CACHED & reg->flags)) *out |= BASE_MEM_UNCACHED_GPU; +#if MALI_USE_CSF + if (KBASE_REG_CSF_EVENT & reg->flags) + *out |= BASE_MEM_CSF_EVENT; +#endif if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags) *out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE; @@ -561,18 +636,17 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) { struct kbase_context *kctx; - struct kbase_mem_phy_alloc *alloc; - unsigned long pages = 0; kctx = container_of(s, struct kbase_context, reclaim); - mutex_lock(&kctx->jit_evict_lock); - - list_for_each_entry(alloc, &kctx->evict_list, evict_node) - pages += alloc->nents; + WARN((sc->gfp_mask & __GFP_ATOMIC), + "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n", + sc->gfp_mask); + WARN(in_atomic(), + "Shrinker called whilst in atomic context. The caller must switch to using GFP_ATOMIC or similar. gfp_mask==%x\n", + sc->gfp_mask); - mutex_unlock(&kctx->jit_evict_lock); - return pages; + return atomic_read(&kctx->evict_nents); } /** @@ -604,6 +678,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, unsigned long freed = 0; kctx = container_of(s, struct kbase_context, reclaim); + mutex_lock(&kctx->jit_evict_lock); list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { @@ -630,6 +705,7 @@ unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, kbase_free_phy_pages_helper(alloc, alloc->evicted); freed += alloc->evicted; + WARN_ON(atomic_sub_return(alloc->evicted, &kctx->evict_nents) < 0); list_del_init(&alloc->evict_node); /* @@ -648,35 +724,20 @@ out_unlock: return freed; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, - struct shrink_control *sc) -{ - if (sc->nr_to_scan == 0) - return kbase_mem_evictable_reclaim_count_objects(s, sc); - - return kbase_mem_evictable_reclaim_scan_objects(s, sc); -} -#endif - int kbase_mem_evictable_init(struct kbase_context *kctx) { INIT_LIST_HEAD(&kctx->evict_list); mutex_init(&kctx->jit_evict_lock); - /* Register shrinker */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) - kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; -#else + atomic_set(&kctx->evict_nents, 0); + kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; -#endif kctx->reclaim.seeks = DEFAULT_SEEKS; /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + * struct shrinker does not define batch + */ kctx->reclaim.batch = 0; -#endif register_shrinker(&kctx->reclaim); return 0; } @@ -705,6 +766,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) kbdev, kctx->id, (u64)new_page_count); + kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents); } /** @@ -731,6 +793,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) kbdev, kctx->id, (u64)new_page_count); + kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents); } int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) @@ -751,6 +814,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) * can reclaim it. */ list_add(&gpu_alloc->evict_node, &kctx->evict_list); + atomic_add(gpu_alloc->nents, &kctx->evict_nents); mutex_unlock(&kctx->jit_evict_lock); kbase_mem_evictable_mark_reclaim(gpu_alloc); @@ -770,6 +834,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) * First remove the allocation from the eviction list as it's no * longer eligible for eviction. */ + WARN_ON(atomic_sub_return(gpu_alloc->nents, &kctx->evict_nents) < 0); list_del_init(&gpu_alloc->evict_node); mutex_unlock(&kctx->jit_evict_lock); @@ -840,7 +905,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in real_flags |= KBASE_REG_SHARE_IN; /* now we can lock down the context, and find the region */ - down_write(¤t->mm->mmap_sem); + down_write(kbase_mem_get_process_mmap_lock()); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -852,10 +917,18 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; if (prev_needed != new_needed) { - /* Aliased allocations can't be made ephemeral */ + /* Aliased allocations can't be shrunk as the code doesn't + * support looking up: + * - all physical pages assigned to different GPU VAs + * - CPU mappings for the physical pages at different vm_pgoff + * (==GPU VA) locations. + */ if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) goto out_unlock; + if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) + goto out_unlock; + if (new_needed) { /* Only native allocations can be marked not needed */ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { @@ -938,7 +1011,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in out_unlock: kbase_gpu_vm_unlock(kctx); - up_write(¤t->mm->mmap_sem); + up_write(kbase_mem_get_process_mmap_lock()); out: return ret; } @@ -1029,7 +1102,7 @@ int kbase_mem_do_sync_imported(struct kbase_context *kctx, dir); #endif /* KBASE_MEM_ION_SYNC_WORKAROUND */ break; - }; + } if (unlikely(ret)) dev_warn(kctx->kbdev->dev, @@ -1056,6 +1129,8 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); alloc->imported.umm.sgt = NULL; + kbase_remove_dma_buf_usage(kctx, alloc); + memset(pa, 0xff, sizeof(*pa) * alloc->nents); alloc->nents = 0; } @@ -1123,6 +1198,7 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx, /* Update nents as we now have pages to map */ alloc->nents = count; + kbase_add_dma_buf_usage(kctx, alloc); return 0; @@ -1327,7 +1403,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) need_sync = true; -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use @@ -1383,7 +1459,8 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; reg->gpu_alloc->imported.umm.need_sync = need_sync; - reg->extent = 0; + reg->gpu_alloc->imported.umm.kctx = kctx; + reg->extension = 0; if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) { int err; @@ -1436,6 +1513,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; struct page **pages = NULL; + int write; /* Flag supported only for dma-buf imported memory */ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) @@ -1478,7 +1556,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (*flags & BASE_MEM_IMPORT_SHARED) shared_zone = true; -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* * 64-bit tasks require us to reserve VA on the CPU that we use @@ -1547,33 +1625,33 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; } - down_read(¤t->mm->mmap_sem); + down_read(kbase_mem_get_process_mmap_lock()); + + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE faulted_pages = get_user_pages(current, current->mm, address, *va_pages, #if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, - pages, NULL); + write ? FOLL_WRITE : 0, pages, NULL); #else - reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); + write, 0, pages, NULL); #endif -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) +#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE faulted_pages = get_user_pages(address, *va_pages, - reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); + write, 0, pages, NULL); #else faulted_pages = get_user_pages(address, *va_pages, - reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, - pages, NULL); + write ? FOLL_WRITE : 0, pages, NULL); #endif - up_read(¤t->mm->mmap_sem); + up_read(kbase_mem_get_process_mmap_lock()); if (faulted_pages != *va_pages) goto fault_mismatch; reg->gpu_alloc->nents = 0; - reg->extent = 0; + reg->extension = 0; if (pages) { struct device *dev = kctx->kbdev->dev; @@ -1672,10 +1750,11 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* calculate the number of pages this alias will cover */ *num_pages = nents * stride; -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* 64-bit tasks must MMAP anyway, but not expose this address to - * clients */ + * clients + */ *flags |= BASE_MEM_NEED_MMAP; reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, *num_pages, @@ -1721,7 +1800,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, goto bad_handle; /* must be > 0 */ if (ai[i].length > stride) goto bad_handle; /* can't be larger than the - stride */ + * stride + */ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; } else { struct kbase_va_region *aliasing_reg; @@ -1736,6 +1816,15 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, goto bad_handle; /* Not found/already free */ if (aliasing_reg->flags & KBASE_REG_DONT_NEED) goto bad_handle; /* Ephemeral region */ + if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE) + goto bad_handle; /* JIT regions can't be + * aliased. NO_USER_FREE flag + * covers the entire lifetime + * of JIT regions. The other + * types of regions covered + * by this flag also shall + * not be aliased. + */ if (!(aliasing_reg->flags & KBASE_REG_GPU_CACHED)) goto bad_handle; /* GPU uncached memory */ if (!aliasing_reg->gpu_alloc) @@ -1743,16 +1832,18 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) goto bad_handle; /* Not a native alloc */ if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) - goto bad_handle; - /* Non-coherent memory cannot alias - coherent memory, and vice versa.*/ + goto bad_handle; /* Non-coherent memory cannot + * alias coherent memory, and + * vice versa. + */ /* check size against stride */ if (!ai[i].length) goto bad_handle; /* must be > 0 */ if (ai[i].length > stride) goto bad_handle; /* can't be larger than the - stride */ + * stride + */ alloc = aliasing_reg->gpu_alloc; @@ -1765,10 +1856,22 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; + + /* Ensure the underlying alloc is marked as being + * mapped at >1 different GPU VA immediately, even + * though mapping might not happen until later. + * + * Otherwise, we would (incorrectly) allow shrinking of + * the source region (aliasing_reg) and so freeing the + * physical pages (without freeing the entire alloc) + * whilst we still hold an implicit reference on those + * physical pages. + */ + kbase_mem_phy_alloc_gpu_mapped(alloc); } } -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { /* Bind to a cookie */ if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) { @@ -1803,11 +1906,15 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, return gpu_va; -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) no_cookie: #endif no_mmap: bad_handle: + /* Marking the source allocs as not being mapped on the GPU and putting + * them is handled by putting reg's allocs, so no rollback of those + * actions is done here. + */ kbase_gpu_vm_unlock(kctx); no_aliased_array: invalid_flags: @@ -1887,7 +1994,7 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, sizeof(user_buffer))) { reg = NULL; } else { -#ifdef CONFIG_COMPAT +#if IS_ENABLED(CONFIG_COMPAT) if (kbase_ctx_flag(kctx, KCTX_COMPAT)) uptr = compat_ptr(user_buffer.ptr); else @@ -2024,7 +2131,7 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) { u64 old_pages; - u64 delta; + u64 delta = 0; int res = -EINVAL; struct kbase_va_region *reg; bool read_locked = false; @@ -2037,7 +2144,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) return -EINVAL; } - down_write(¤t->mm->mmap_sem); + down_write(kbase_mem_get_process_mmap_lock()); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -2054,13 +2161,27 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) if (0 == (reg->flags & KBASE_REG_GROWABLE)) goto out_unlock; + if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) + goto out_unlock; + /* Would overflow the VA region */ if (new_pages > reg->nr_pages) goto out_unlock; - /* can't be mapped more than once on the GPU */ + /* Can't shrink when physical pages are mapped to different GPU + * VAs. The code doesn't support looking up: + * - all physical pages assigned to different GPU VAs + * - CPU mappings for the physical pages at different vm_pgoff + * (==GPU VA) locations. + * + * Note that for Native allocs mapped at multiple GPU VAs, growth of + * such allocs is not a supported use-case. + */ if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) goto out_unlock; + + if (atomic_read(®->cpu_alloc->kernel_mappings) > 0) + goto out_unlock; /* can't grow regions which are ephemeral */ if (reg->flags & KBASE_REG_DONT_NEED) goto out_unlock; @@ -2085,7 +2206,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) * No update to the mm so downgrade the writer lock to a read * lock so other readers aren't blocked after this point. */ - downgrade_write(¤t->mm->mmap_sem); + downgrade_write(kbase_mem_get_process_mmap_lock()); read_locked = true; /* Allocate some more pages */ @@ -2127,9 +2248,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages) out_unlock: kbase_gpu_vm_unlock(kctx); if (read_locked) - up_read(¤t->mm->mmap_sem); + up_read(kbase_mem_get_process_mmap_lock()); else - up_write(¤t->mm->mmap_sem); + up_write(kbase_mem_get_process_mmap_lock()); return res; } @@ -2360,11 +2481,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, * See MIDBASE-1057 */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; -#else - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; -#endif vma->vm_ops = &kbase_vm_ops; vma->vm_private_data = map; @@ -2510,16 +2627,14 @@ out: void kbase_os_mem_map_lock(struct kbase_context *kctx) { - struct mm_struct *mm = current->mm; (void)kctx; - down_read(&mm->mmap_sem); + down_read(kbase_mem_get_process_mmap_lock()); } void kbase_os_mem_map_unlock(struct kbase_context *kctx) { - struct mm_struct *mm = current->mm; (void)kctx; - up_read(&mm->mmap_sem); + up_read(kbase_mem_get_process_mmap_lock()); } static int kbasep_reg_mmap(struct kbase_context *kctx, @@ -2547,7 +2662,8 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, /* incorrect mmap size */ /* leave the cookie for a potential later * mapping, or to be reclaimed later when the - * context is freed */ + * context is freed + */ err = -ENOMEM; goto out; } @@ -2576,6 +2692,11 @@ static int kbasep_reg_mmap(struct kbase_context *kctx, kctx->pending_regions[cookie] = NULL; bitmap_set(kctx->cookies, cookie, 1); +#if MALI_USE_CSF + if (reg->flags & KBASE_REG_CSF_EVENT) + kbase_link_event_mem_page(kctx, reg); +#endif + /* * Overwrite the offset with the region start_pfn, so we effectively * map from offset 0 in the region. However subtract the aligned @@ -2595,7 +2716,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, { struct kbase_va_region *reg = NULL; void *kaddr = NULL; - size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + size_t nr_pages = vma_pages(vma); int err = 0; int free_on_close = 0; struct device *dev = kctx->kbdev->dev; @@ -2608,7 +2729,7 @@ int kbase_context_mmap(struct kbase_context *const kctx, if (!(vma->vm_flags & VM_WRITE)) vma->vm_flags &= ~VM_MAYWRITE; - if (0 == nr_pages) { + if (nr_pages == 0) { err = -EINVAL; goto out; } @@ -2629,7 +2750,8 @@ int kbase_context_mmap(struct kbase_context *const kctx, /* if not the MTP, verify that the MTP has been mapped */ rcu_read_lock(); /* catches both when the special page isn't present or - * when we've forked */ + * when we've forked + */ if (rcu_dereference(kctx->process_mm) != current->mm) { err = -EINVAL; rcu_read_unlock(); @@ -2646,16 +2768,30 @@ int kbase_context_mmap(struct kbase_context *const kctx, case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): /* MMU dump */ err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); - if (0 != err) + if (err != 0) goto out_unlock; /* free the region on munmap */ free_on_close = 1; break; +#if MALI_USE_CSF + case PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE): + kbase_gpu_vm_unlock(kctx); + err = kbase_csf_cpu_mmap_user_reg_page(kctx, vma); + goto out; + case PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) ... + PFN_DOWN(BASE_MEM_COOKIE_BASE) - 1: { + kbase_gpu_vm_unlock(kctx); + mutex_lock(&kctx->csf.lock); + err = kbase_csf_cpu_mmap_user_io_pages(kctx, vma); + mutex_unlock(&kctx->csf.lock); + goto out; + } +#endif case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { err = kbasep_reg_mmap(kctx, vma, ®, &nr_pages, &aligned_offset); - if (0 != err) + if (err != 0) goto out_unlock; /* free the region on munmap */ free_on_close = 1; @@ -2728,8 +2864,21 @@ int kbase_context_mmap(struct kbase_context *const kctx, if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { /* MMU dump - userspace should now have a reference on - * the pages, so we can now free the kernel mapping */ + * the pages, so we can now free the kernel mapping + */ vfree(kaddr); + /* CPU mapping of GPU allocations have GPU VA as the vm_pgoff + * and that is used to shrink the mapping when the commit size + * is reduced. So vm_pgoff for CPU mapping created to get the + * snapshot of GPU page tables shall not match with any GPU VA. + * That can be ensured by setting vm_pgoff as vma->vm_start + * because, + * - GPU VA of any SAME_VA allocation cannot match with + * vma->vm_start, as CPU VAs are unique. + * - GPU VA of CUSTOM_VA allocations are outside the CPU + * virtual address space. + */ + vma->vm_pgoff = PFN_DOWN(vma->vm_start); } out_unlock: @@ -2824,8 +2973,8 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, /* Note: enforcing a RO prot_request onto prot is not done, since: * - CPU-arch-specific integration required - * - kbase_vmap() requires no access checks to be made/enforced */ - + * - kbase_vmap() requires no access checks to be made/enforced + */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); kfree(pages); @@ -2846,6 +2995,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, if (map->sync_needed) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_CPU); + kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc); return 0; } @@ -2901,7 +3051,8 @@ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, * be made. * * As mentioned in kbase_vmap_prot() this means that a kernel-side - * CPU-RO mapping is not enforced to allow this to work */ + * CPU-RO mapping is not enforced to allow this to work + */ return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); } KBASE_EXPORT_TEST_API(kbase_vmap); @@ -2915,6 +3066,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, if (map->sync_needed) kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE); + kbase_mem_phy_alloc_kernel_unmapped(map->cpu_alloc); map->offset_in_page = 0; map->cpu_pages = NULL; map->gpu_pages = NULL; @@ -2933,7 +3085,7 @@ KBASE_EXPORT_TEST_API(kbase_vunmap); static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) { -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0)) +#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) /* To avoid the build breakage due to an unexported kernel symbol * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, * we inline here the equivalent of 'add_mm_counter()' from linux @@ -3017,14 +3169,319 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_ /* no real access */ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + + return 0; +} + +#if MALI_USE_CSF +static unsigned long get_queue_doorbell_pfn(struct kbase_device *kbdev, + struct kbase_queue *queue) +{ + lockdep_assert_held(&kbdev->csf.reg_lock); + + /* Return the real Hw doorbell page if queue has been + * assigned one, otherwise a dummy page. Always return the + * dummy page in no mali builds. + */ + if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID) + return PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_db_page)); + return (PFN_DOWN(kbdev->reg_start + CSF_HW_DOORBELL_PAGE_OFFSET + + (u64)queue->doorbell_nr * CSF_HW_DOORBELL_PAGE_SIZE)); +} + +static void kbase_csf_user_io_pages_vm_open(struct vm_area_struct *vma) +{ + WARN(1, "Unexpected attempt to clone private vma\n"); + vma->vm_private_data = NULL; +} + +static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) +{ + struct kbase_queue *queue = vma->vm_private_data; + struct kbase_context *kctx; + struct kbase_device *kbdev; + int err; + bool reset_prevented = false; + + if (WARN_ON(!queue)) + return; + + kctx = queue->kctx; + kbdev = kctx->kbdev; + + err = kbase_reset_gpu_prevent_and_wait(kbdev); + if (err) + dev_warn( + kbdev->dev, + "Unsuccessful GPU reset detected when unbinding queue (csi_index=%d), attempting to unbind regardless", + queue->csi_index); + else + reset_prevented = true; + + mutex_lock(&kctx->csf.lock); + kbase_csf_queue_unbind(queue); + mutex_unlock(&kctx->csf.lock); + + if (reset_prevented) + kbase_reset_gpu_allow(kbdev); + + /* Now as the vma is closed, drop the reference on mali device file */ + fput(kctx->filp); +} + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ #else - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; #endif - vma->vm_ops = &kbase_vm_special_ops; + struct kbase_queue *queue = vma->vm_private_data; + unsigned long doorbell_cpu_addr, input_cpu_addr, output_cpu_addr; + unsigned long doorbell_page_pfn, input_page_pfn, output_page_pfn; + pgprot_t doorbell_pgprot, input_page_pgprot, output_page_pgprot; + size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + vm_fault_t ret; + struct kbase_device *kbdev; + struct memory_group_manager_device *mgm_dev; + + /* Few sanity checks up front */ + if ((nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) || + (vma->vm_pgoff != queue->db_file_offset)) + return VM_FAULT_SIGBUS; + + mutex_lock(&queue->kctx->csf.lock); + kbdev = queue->kctx->kbdev; + mgm_dev = kbdev->mgm_dev; + + /* Always map the doorbell page as uncached */ + doorbell_pgprot = pgprot_device(vma->vm_page_prot); + +#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \ + (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE))) + vma->vm_page_prot = doorbell_pgprot; + input_page_pgprot = doorbell_pgprot; + output_page_pgprot = doorbell_pgprot; +#else + if (kbdev->system_coherency == COHERENCY_NONE) { + input_page_pgprot = pgprot_writecombine(vma->vm_page_prot); + output_page_pgprot = pgprot_writecombine(vma->vm_page_prot); + } else { + input_page_pgprot = vma->vm_page_prot; + output_page_pgprot = vma->vm_page_prot; + } +#endif + + doorbell_cpu_addr = vma->vm_start; + +#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE + if ((unsigned long)vmf->virtual_address == doorbell_cpu_addr) { +#else + if (vmf->address == doorbell_cpu_addr) { +#endif + mutex_lock(&kbdev->csf.reg_lock); + doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, + KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr, + doorbell_page_pfn, doorbell_pgprot); + mutex_unlock(&kbdev->csf.reg_lock); + } else { + /* Map the Input page */ + input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE; + input_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[0])); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, + KBASE_MEM_GROUP_CSF_IO, vma, input_cpu_addr, + input_page_pfn, input_page_pgprot); + if (ret != VM_FAULT_NOPAGE) + goto exit; + + /* Map the Output page */ + output_cpu_addr = input_cpu_addr + PAGE_SIZE; + output_page_pfn = PFN_DOWN(as_phys_addr_t(queue->phys[1])); + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, + KBASE_MEM_GROUP_CSF_IO, vma, output_cpu_addr, + output_page_pfn, output_page_pgprot); + } + +exit: + mutex_unlock(&queue->kctx->csf.lock); + return ret; +} + +static const struct vm_operations_struct kbase_csf_user_io_pages_vm_ops = { + .open = kbase_csf_user_io_pages_vm_open, + .close = kbase_csf_user_io_pages_vm_close, + .fault = kbase_csf_user_io_pages_vm_fault +}; + +/* Program the client process's page table entries to map the pair of + * input/output pages & Hw doorbell page. The caller should have validated that + * vma->vm_pgoff maps to the range of csf cookies. + */ +static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, + struct vm_area_struct *vma) +{ + unsigned long cookie = + vma->vm_pgoff - PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE); + size_t nr_pages = vma_pages(vma); + struct kbase_queue *queue; + int err = 0; + + lockdep_assert_held(&kctx->csf.lock); + + queue = kctx->csf.user_pages_info[cookie]; + + /* Looks like the bind has been aborted */ + if (!queue) + return -EINVAL; + + if (WARN_ON(test_bit(cookie, kctx->csf.cookies))) + return -EINVAL; + + /* no need for the cookie anymore */ + kctx->csf.user_pages_info[cookie] = NULL; + bitmap_set(kctx->csf.cookies, cookie, 1); + + /* Reset the handle to avoid (re)freeing the cookie (which can + * now get re-assigned) on unbind. + */ + queue->handle = BASEP_MEM_INVALID_HANDLE; + + if (nr_pages != BASEP_QUEUE_NR_MMAP_USER_PAGES) { + err = -EINVAL; + goto map_failed; + } + + err = kbase_csf_alloc_command_stream_user_pages(kctx, queue); + if (err) + goto map_failed; + + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + /* TODO use VM_MIXEDMAP, since it is more appropriate as both types of + * memory with and without "struct page" backing are being inserted here. + * Hw Doorbell pages comes from the device register area so kernel does + * not use "struct page" for them. + */ + vma->vm_flags |= VM_PFNMAP; + + vma->vm_ops = &kbase_csf_user_io_pages_vm_ops; + vma->vm_private_data = queue; + + /* Make vma point to the special internal file, but don't drop the + * reference on mali device file (that would be done later when the + * vma is closed). + */ + vma->vm_file = kctx->kbdev->csf.db_filp; + get_file(vma->vm_file); + /* Also adjust the vm_pgoff */ + vma->vm_pgoff = queue->db_file_offset; + + return 0; + +map_failed: + /* The queue cannot have got to KBASE_CSF_QUEUE_BOUND state if we + * reached here, so safe to use a variant of unbind that only works on + * stopped queues + * + * This is so we don't enter the CSF scheduler from this path. + */ + kbase_csf_queue_unbind_stopped(queue); + + return err; +} + +static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) +{ + struct kbase_context *kctx = vma->vm_private_data; + + WARN_ON(!kctx->csf.user_reg_vma); + + kctx->csf.user_reg_vma = NULL; +} + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ +#else +static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; +#endif + struct kbase_context *kctx = vma->vm_private_data; + struct kbase_device *kbdev = kctx->kbdev; + struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; + unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE); + size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + vm_fault_t ret = VM_FAULT_SIGBUS; + + /* Few sanity checks up front */ + if (WARN_ON(nr_pages != 1) || + WARN_ON(vma != kctx->csf.user_reg_vma) || + WARN_ON(vma->vm_pgoff != + PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) + return VM_FAULT_SIGBUS; + + mutex_lock(&kbdev->pm.lock); + + /* Don't map in the actual register page if GPU is powered down. + * Always map in the dummy page in no mali builds. + */ + if (!kbdev->pm.backend.gpu_powered) + pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page)); + + ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev, + KBASE_MEM_GROUP_CSF_FW, vma, + vma->vm_start, pfn, + vma->vm_page_prot); + + mutex_unlock(&kbdev->pm.lock); + + return ret; +} + +static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = { + .close = kbase_csf_user_reg_vm_close, + .fault = kbase_csf_user_reg_vm_fault +}; + +static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, + struct vm_area_struct *vma) +{ + size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start); + + /* Few sanity checks */ + if (kctx->csf.user_reg_vma) + return -EBUSY; + + if (nr_pages != 1) + return -EINVAL; + + if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) + return -EPERM; + + /* Map uncached */ + vma->vm_page_prot = pgprot_device(vma->vm_page_prot); + + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + + /* User register page comes from the device register area so + * "struct page" isn't available for it. + */ + vma->vm_flags |= VM_PFNMAP; + + kctx->csf.user_reg_vma = vma; + + vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; return 0; } +#endif /* MALI_USE_CSF */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index cd094b3..36159c1 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010, 2012-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,21 +17,16 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_mem_linux.h * Base kernel memory APIs, Linux implementation. */ #ifndef _KBASE_MEM_LINUX_H_ #define _KBASE_MEM_LINUX_H_ -/** A HWC dump mapping */ +/* A HWC dump mapping */ struct kbase_hwc_dma_mapping { void *cpu_va; dma_addr_t dma_pa; @@ -43,7 +39,7 @@ struct kbase_hwc_dma_mapping { * @kctx: The kernel context * @va_pages: The number of pages of virtual address space to reserve * @commit_pages: The number of physical pages to allocate upfront - * @extent: The number of extra pages to allocate on each GPU fault which + * @extension: The number of extra pages to allocate on each GPU fault which * grows the region. * @flags: bitmask of BASE_MEM_* flags to convey special requirements & * properties for the new allocation. @@ -53,8 +49,8 @@ struct kbase_hwc_dma_mapping { * Return: 0 on success or error code */ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, - u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, - u64 *gpu_va); + u64 va_pages, u64 commit_pages, + u64 extension, u64 *flags, u64 *gpu_va); /** * kbase_mem_query - Query properties of a GPU memory region @@ -194,8 +190,8 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, * Take the provided region and make all the physical pages within it * reclaimable by the kernel, updating the per-process VM stats as well. * Remove any CPU mappings (as these can't be removed in the shrinker callback - * as mmap_sem might already be taken) but leave the GPU mapping intact as - * and until the shrinker reclaims the allocation. + * as mmap_sem/mmap_lock might already be taken) but leave the GPU mapping + * intact as and until the shrinker reclaims the allocation. * * Note: Must be called with the region lock of the containing context. */ @@ -461,4 +457,18 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, } #endif +/** + * kbase_mem_get_process_mmap_lock - Return the mmap lock for the current process + * + * Return: the mmap lock for the current process + */ +static inline struct rw_semaphore *kbase_mem_get_process_mmap_lock(void) +{ +#if KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE + return ¤t->mm->mmap_sem; +#else /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ + return ¤t->mm->mmap_lock; +#endif /* KERNEL_VERSION(5, 8, 0) > LINUX_VERSION_CODE */ +} + #endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h index 7011603..3f260bf 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2014,2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #ifndef _KBASE_MEM_LOWLEVEL_H #define _KBASE_MEM_LOWLEVEL_H @@ -31,9 +28,7 @@ #include -/** - * @brief Flags for kbase_phy_allocator_pages_alloc - */ +/* Flags for kbase_phy_allocator_pages_alloc */ #define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ #define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ #define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index 0723e32..a11da82 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -154,20 +153,12 @@ static void kbase_mem_pool_spill(struct kbase_mem_pool *next_pool, struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) { struct page *p; - gfp_t gfp; + gfp_t gfp = GFP_HIGHUSER | __GFP_ZERO; struct kbase_device *const kbdev = pool->kbdev; struct device *const dev = kbdev->dev; dma_addr_t dma_addr; int i; -#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && \ - LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) - /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ - gfp = GFP_USER | __GFP_ZERO; -#else - gfp = GFP_HIGHUSER | __GFP_ZERO; -#endif - /* don't warn on higher order failures */ if (pool->order) gfp |= __GFP_NOWARN; @@ -318,7 +309,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size) kbase_mem_pool_unlock(pool); } - +KBASE_EXPORT_TEST_API(kbase_mem_pool_set_max_size); static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s, struct shrink_control *sc) @@ -364,17 +355,6 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s, return freed; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static int kbase_mem_pool_reclaim_shrink(struct shrinker *s, - struct shrink_control *sc) -{ - if (sc->nr_to_scan == 0) - return kbase_mem_pool_reclaim_count_objects(s, sc); - - return kbase_mem_pool_reclaim_scan_objects(s, sc); -} -#endif - int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool_config *config, unsigned int order, @@ -398,19 +378,13 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, spin_lock_init(&pool->pool_lock); INIT_LIST_HEAD(&pool->page_list); - /* Register shrinker */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) - pool->reclaim.shrink = kbase_mem_pool_reclaim_shrink; -#else pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects; pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects; -#endif pool->reclaim.seeks = DEFAULT_SEEKS; /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + * struct shrinker does not define batch + */ pool->reclaim.batch = 0; -#endif register_shrinker(&pool->reclaim); pool_dbg(pool, "initialized\n"); @@ -830,8 +804,8 @@ void kbase_mem_pool_free_pages_locked(struct kbase_mem_pool *pool, nr_to_pool = kbase_mem_pool_capacity(pool); nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array_locked(pool, nr_pages, pages, false, - dirty); + kbase_mem_pool_add_array_locked(pool, nr_to_pool, pages, false, + dirty); i += nr_to_pool; } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c index 5879fdf..cfb43b0 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h index 2932945..207b585 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_MEM_POOL_DEBUGFS_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c index aa25548..8d7bb4d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h index 0484f59..38fd4ca 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_MEM_POOL_GROUP_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index 5752d4a..ea8e34b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,23 +17,23 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) -/** Show callback for the @c mem_profile debugfs file. +/** + * Show callback for the @c mem_profile debugfs file. * * This function is called to get the contents of the @c mem_profile debugfs * file. This is a report of current memory usage and distribution in userspace. * - * @param sfile The debugfs entry - * @param data Data associated with the entry + * @sfile: The debugfs entry + * @data: Data associated with the entry * - * @return 0 if it successfully prints data in debugfs entry file, non-zero otherwise + * Return: 0 if it successfully prints data in debugfs entry file, non-zero + * otherwise */ static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) { @@ -71,6 +72,11 @@ static const struct file_operations kbasep_mem_profile_debugfs_fops = { int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, size_t size) { +#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) + const mode_t mode = 0444; +#else + const mode_t mode = 0400; +#endif int err = 0; if (IS_ERR_OR_NULL(kctx->kctx_dentry)) /* not initialized */ @@ -84,7 +90,7 @@ int kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) { if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { err = -ENOMEM; - } else if (!debugfs_create_file("mem_profile", 0444, + } else if (!debugfs_create_file("mem_profile", mode, kctx->kctx_dentry, kctx, &kbasep_mem_profile_debugfs_fops)) { err = -EAGAIN; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h index 1462247..093a65e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,14 +17,9 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_mem_profile_debugfs.h * Header file for mem profiles entries in debugfs * */ @@ -35,12 +31,17 @@ #include /** - * @brief Remove entry from Mali memory profile debugfs + * Remove entry from Mali memory profile debugfs + * @kctx: The context whose debugfs file @p data should be removed from */ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); /** - * @brief Insert @p data to the debugfs file so it can be read by userspace + * Insert @p data to the debugfs file so it can be read by userspace + * @kctx: The context whose debugfs file @p data should be inserted to + * @data: A NULL-terminated string to be inserted to the debugfs file, + * without the trailing new line character + * @size: The length of the @p data string * * The function takes ownership of @p data and frees it later when new data * is inserted. @@ -48,10 +49,6 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); * If the debugfs entry corresponding to the @p kctx doesn't exist, * an attempt will be made to create it. * - * @param kctx The context whose debugfs file @p data should be inserted to - * @param data A NULL-terminated string to be inserted to the debugfs file, - * without the trailing new line character - * @param size The length of the @p data string * @return 0 if @p data inserted correctly * -EAGAIN in case of error * @post @ref mem_profile_initialized will be set to @c true diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h index d55cc85..3184a98 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,9 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_mem_profile_debugfs_buf_size.h * Header file for the size of the buffer to accumulate the histogram report text in */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h index ec52122..f0b385e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. @@ -40,14 +39,14 @@ * defined. See documentation below: */ -/** +/* * The name of the variable where the result BLOB will be stored. */ #if !defined(MIPE_HEADER_BLOB_VAR_NAME) #error "MIPE_HEADER_BLOB_VAR_NAME must be defined!" #endif -/** +/* * A compiler attribute for the BLOB variable. * * e.g. __attribute__((section("my_section"))) @@ -58,6 +57,17 @@ #define MIPE_HEADER_BLOB_VAR_ATTRIBUTE #endif +/** + * A compiler attribute for packing structures + * + * e.g. __packed + * + * Default value is __attribute__((__packed__)) + */ +#if !defined(MIPE_HEADER_PACKED_ATTRIBUTE) +#define MIPE_HEADER_PACKED_ATTRIBUTE __attribute__((__packed__)) +#endif + /** * MIPE stream id. * @@ -67,7 +77,7 @@ #error "MIPE_HEADER_STREAM_ID must be defined!" #endif -/** +/* * MIPE packet class. * * See enum tl_packet_class. @@ -76,10 +86,11 @@ #error "MIPE_HEADER_PKT_CLASS must be defined!" #endif -/** +/* * The list of tracepoints to process. * * It should be defined as follows: + * * #define MIPE_HEADER_TRACEPOINT_LIST \ * TRACEPOINT_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ * TRACEPOINT_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \ @@ -94,17 +105,18 @@ #error "MIPE_HEADER_TRACEPOINT_LIST must be defined!" #endif -/** +/* * The number of entries in MIPE_HEADER_TRACEPOINT_LIST. */ #if !defined(MIPE_HEADER_TRACEPOINT_LIST_SIZE) #error "MIPE_HEADER_TRACEPOINT_LIST_SIZE must be defined!" #endif -/** +/* * The list of enums to process. * * It should be defined as follows: + * * #define MIPE_HEADER_ENUM_LIST \ * ENUM_DESC(enum_arg_name, enum_value) \ * ENUM_DESC(enum_arg_name, enum_value) \ @@ -117,7 +129,7 @@ */ #if defined(MIPE_HEADER_ENUM_LIST) -/** +/* * Tracepoint message ID used for enums declaration. */ #if !defined(MIPE_HEADER_ENUM_MSG_ID) @@ -149,7 +161,7 @@ const struct char _arg_types[sizeof(arg_types)]; \ u32 _size_arg_names; \ char _arg_names[sizeof(arg_names)]; \ - } __attribute__ ((__packed__)) __ ## name; + } MIPE_HEADER_PACKED_ATTRIBUTE __ ## name; #define ENUM_DESC(arg_name, value) \ struct { \ @@ -159,13 +171,13 @@ const struct u32 _value; \ u32 _value_str_len; \ char _value_str[sizeof(#value)]; \ - } __attribute__ ((__packed__)) __ ## arg_name ## _ ## value; + } MIPE_HEADER_PACKED_ATTRIBUTE __ ## arg_name ## _ ## value; MIPE_HEADER_TRACEPOINT_LIST MIPE_HEADER_ENUM_LIST #undef TRACEPOINT_DESC #undef ENUM_DESC -} __attribute__((packed)) MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { +} MIPE_HEADER_PACKED_ATTRIBUTE MIPE_HEADER_BLOB_VAR_NAME MIPE_HEADER_BLOB_VAR_ATTRIBUTE = { ._mipe_w0 = MIPE_PACKET_HEADER_W0( TL_PACKET_FAMILY_TL, MIPE_HEADER_PKT_CLASS, diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h index 54667cf..c35ee61 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* THIS FILE IS AUTOGENERATED BY mali_trace_generator.py. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c index 38ae46e..4554bee 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h index 431b1f4..1eae2fc 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_NATIVE_MGM_H_ @@ -25,7 +24,7 @@ #include -/** +/* * kbase_native_mgm_dev - Native memory group manager device * * An implementation of the memory group manager interface that is intended for diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c index fbb090e..bf525ed 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2016-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014, 2016-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -26,7 +25,6 @@ #include #include - /* * This file is included only for type definitions and functions belonging to * specific platform folders. Do not add dependencies with symbols that are @@ -41,14 +39,13 @@ static struct platform_device *mali_device; #ifndef CONFIG_OF /** - * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources + * Convert data in struct kbase_io_resources struct to Linux-specific resources + * @io_resources: Input IO resource data + * @linux_resources: Pointer to output array of Linux resource structures * * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. - * - * @param[in] io_resource Input IO resource data - * @param[out] linux_resources Pointer to output array of Linux resource structures */ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c index b9ed8c3..de100dd 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,15 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_pm.c - * Base kernel power management APIs + * DOC: Base kernel power management APIs */ #include @@ -33,12 +29,14 @@ #include #include -#include +#include #ifdef CONFIG_MALI_ARBITER_SUPPORT #include #endif /* CONFIG_MALI_ARBITER_SUPPORT */ +#include + int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) { return kbase_hwaccess_pm_powerup(kbdev, flags); @@ -66,14 +64,14 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, kbase_pm_lock(kbdev); #ifdef CONFIG_MALI_ARBITER_SUPPORT - if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, suspend_handler)) + if (kbase_arbiter_pm_ctx_active_handle_suspend(kbdev, + suspend_handler)) { + kbase_pm_unlock(kbdev); return 1; + } +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ - if (kbase_pm_is_suspending(kbdev) || - kbase_pm_is_gpu_lost(kbdev)) { -#else if (kbase_pm_is_suspending(kbdev)) { -#endif /* CONFIG_MALI_ARBITER_SUPPORT */ switch (suspend_handler) { case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: if (kbdev->pm.active_count != 0) @@ -101,6 +99,7 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ + kbase_clk_rate_trace_manager_gpu_active(kbdev); } kbase_pm_unlock(kbdev); @@ -128,6 +127,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev) if (c == 0) { /* Last context has gone idle */ kbase_hwaccess_pm_gpu_idle(kbdev); + kbase_clk_rate_trace_manager_gpu_idle(kbdev); /* Wake up anyone waiting for this to become 0 (e.g. suspend). * The waiters must synchronize with us by locking the pm.lock @@ -171,6 +171,7 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbdev->js_data.runpool_irq.submit_allowed = 0; kbase_disjoint_state_up(kbdev); for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) kbase_job_slot_softstop(kbdev, i, NULL); @@ -184,9 +185,14 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev) * all pm references */ +#if !MALI_USE_CSF /* Suspend job scheduler and associated components, so that it releases all - * the PM active count references */ + * the PM active count references + */ kbasep_js_suspend(kbdev); +#else + kbase_csf_scheduler_pm_suspend(kbdev); +#endif /* Wait for the active count to reach zero. This is not the same as * waiting for a power down, since not all policies power down when this @@ -221,14 +227,16 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) /* Initial active call, to power on the GPU/cores if needed */ #ifdef CONFIG_MALI_ARBITER_SUPPORT - (void)kbase_pm_context_active_handle_suspend(kbdev, - (arb_gpu_start ? - KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : - KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE)); + if (kbase_pm_context_active_handle_suspend(kbdev, + (arb_gpu_start ? + KBASE_PM_SUSPEND_HANDLER_VM_GPU_GRANTED : + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE))) + return; #else kbase_pm_context_active(kbdev); #endif +#if !MALI_USE_CSF /* Resume any blocked atoms (which may cause contexts to be scheduled in * and dependent atoms to run) */ @@ -238,6 +246,9 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) * atoms */ kbasep_js_resume(kbdev); +#else + kbase_csf_scheduler_pm_resume(kbdev); +#endif /* Matching idle call, to power off the GPU/cores if we didn't actually * need it and the policy doesn't want it on @@ -245,9 +256,15 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) kbase_pm_context_idle(kbdev); /* Re-enable GPU hardware counters */ +#if MALI_USE_CSF + kbase_csf_scheduler_spin_lock(kbdev, &flags); + kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +#else spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +#endif /* Resume vinstr */ kbase_vinstr_resume(kbdev->vinstr_ctx); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h index 257f959..980a8d1 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_pm.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,14 +17,9 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - /** - * @file mali_kbase_pm.h * Power management API definitions */ @@ -35,6 +31,13 @@ #define PM_ENABLE_IRQS 0x01 #define PM_HW_ISSUES_DETECT 0x02 +#ifdef CONFIG_MALI_ARBITER_SUPPORT +/* In the case that the GPU was granted by the Arbiter, it will have + * already been reset. The following flag ensures it is not reset + * twice. + */ +#define PM_NO_RESET 0x04 +#endif /** Initialize the power management framework. * @@ -59,12 +62,12 @@ int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); /** * Halt the power management framework. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * * Should ensure that no new interrupts are generated, * but allow any currently running interrupt handlers to complete successfully. * The GPU is forced off by the time this function returns, regardless of * whether or not the active power policy asks for the GPU to be powered off. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_halt(struct kbase_device *kbdev); @@ -154,6 +157,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev); /** * Suspend the GPU and prevent any further register accesses to it from Kernel * threads. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This is called in response to an OS suspend event, and calls into the various * kbase components to complete the suspend. @@ -161,21 +165,18 @@ void kbase_pm_context_idle(struct kbase_device *kbdev); * @note the mechanisms used here rely on all user-space threads being frozen * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up * the GPU e.g. via atom submission. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_suspend(struct kbase_device *kbdev); /** * Resume the GPU, allow register accesses to it, and resume running atoms on * the GPU. + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * This is called in response to an OS resume event, and calls into the various * kbase components to complete the resume. * * Also called when using VM arbiter, when GPU access has been granted. - * - * @param kbdev The kbase device structure for the device (must be a valid pointer) */ void kbase_pm_resume(struct kbase_device *kbdev); @@ -192,8 +193,7 @@ void kbase_pm_vsync_callback(int buffer_updated, void *data); /** * kbase_pm_driver_suspend() - Put GPU and driver in suspend state - * @param kbdev The kbase device structure for the device - * (must be a valid pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) * * Suspend the GPU and prevent any further register accesses to it from Kernel * threads. @@ -212,8 +212,8 @@ void kbase_pm_driver_suspend(struct kbase_device *kbdev); /** * kbase_pm_driver_resume() - Put GPU and driver in resume - * @param kbdev The kbase device structure for the device - * (must be a valid pointer) + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @arb_gpu_start: Arbiter has notified we can use GPU * * Resume the GPU, allow register accesses to it, and resume running atoms on * the GPU. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c index 1d114a6..1e807d7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016, 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,18 +17,130 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase.h" - #include "mali_kbase_regs_history_debugfs.h" -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) +#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) #include +/** + * kbase_io_history_resize - resize the register access history buffer. + * + * @h: Pointer to a valid register history to resize + * @new_size: Number of accesses the buffer could hold + * + * A successful resize will clear all recent register accesses. + * If resizing fails for any reason (e.g., could not allocate memory, invalid + * buffer size) then the original buffer will be kept intact. + * + * @return 0 if the buffer was resized, failure otherwise + */ +static int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size) +{ + struct kbase_io_access *old_buf; + struct kbase_io_access *new_buf; + unsigned long flags; + + if (!new_size) + goto out_err; /* The new size must not be 0 */ + + new_buf = vmalloc(new_size * sizeof(*h->buf)); + if (!new_buf) + goto out_err; + + spin_lock_irqsave(&h->lock, flags); + + old_buf = h->buf; + + /* Note: we won't bother with copying the old data over. The dumping + * logic wouldn't work properly as it relies on 'count' both as a + * counter and as an index to the buffer which would have changed with + * the new array. This is a corner case that we don't need to support. + */ + h->count = 0; + h->size = new_size; + h->buf = new_buf; + + spin_unlock_irqrestore(&h->lock, flags); + + vfree(old_buf); + + return 0; + +out_err: + return -1; +} + +int kbase_io_history_init(struct kbase_io_history *h, u16 n) +{ + h->enabled = false; + spin_lock_init(&h->lock); + h->count = 0; + h->size = 0; + h->buf = NULL; + if (kbase_io_history_resize(h, n)) + return -1; + + return 0; +} + +void kbase_io_history_term(struct kbase_io_history *h) +{ + vfree(h->buf); + h->buf = NULL; +} + +void kbase_io_history_add(struct kbase_io_history *h, + void __iomem const *addr, u32 value, u8 write) +{ + struct kbase_io_access *io; + unsigned long flags; + + spin_lock_irqsave(&h->lock, flags); + + io = &h->buf[h->count % h->size]; + io->addr = (uintptr_t)addr | write; + io->value = value; + ++h->count; + /* If count overflows, move the index by the buffer size so the entire + * buffer will still be dumped later + */ + if (unlikely(!h->count)) + h->count = h->size; + + spin_unlock_irqrestore(&h->lock, flags); +} + +void kbase_io_history_dump(struct kbase_device *kbdev) +{ + struct kbase_io_history *const h = &kbdev->io_history; + size_t i; + size_t iters; + unsigned long flags; + + if (!unlikely(h->enabled)) + return; + + spin_lock_irqsave(&h->lock, flags); + + dev_err(kbdev->dev, "Register IO History:"); + iters = (h->size > h->count) ? h->count : h->size; + dev_err(kbdev->dev, "Last %zu register accesses of %zu total:\n", iters, + h->count); + for (i = 0; i < iters; ++i) { + struct kbase_io_access *io = + &h->buf[(h->count - iters + i) % h->size]; + char const access = (io->addr & 1) ? 'w' : 'r'; + + dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i, + access, (unsigned long)(io->addr & ~0x1), io->value); + } + + spin_unlock_irqrestore(&h->lock, flags); +} static int regs_history_size_get(void *data, u64 *val) { @@ -66,7 +179,7 @@ DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops, static int regs_history_show(struct seq_file *sfile, void *data) { struct kbase_io_history *const h = sfile->private; - u16 i; + size_t i; size_t iters; unsigned long flags; @@ -85,8 +198,8 @@ static int regs_history_show(struct seq_file *sfile, void *data) &h->buf[(h->count - iters + i) % h->size]; char const access = (io->addr & 1) ? 'w' : 'r'; - seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access, - (unsigned long)(io->addr & ~0x1), io->value); + seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i, + access, (unsigned long)(io->addr & ~0x1), io->value); } spin_unlock_irqrestore(&h->lock, flags); @@ -95,7 +208,6 @@ out: return 0; } - /** * regs_history_open - open operation for regs_history debugfs file * @@ -109,7 +221,6 @@ static int regs_history_open(struct inode *in, struct file *file) return single_open(file, ®s_history_show, in->i_private); } - static const struct file_operations regs_history_fops = { .owner = THIS_MODULE, .open = ®s_history_open, @@ -118,7 +229,6 @@ static const struct file_operations regs_history_fops = { .release = single_release, }; - void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) { debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR, @@ -131,6 +241,4 @@ void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev) kbdev->mali_debugfs_directory, &kbdev->io_history, ®s_history_fops); } - - -#endif /* CONFIG_DEBUG_FS */ +#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h index a0078cb..3b181d3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -37,7 +36,31 @@ struct kbase_device; -#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_MALI_NO_MALI) +#if defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) + +/** + * kbase_io_history_init - initialize data struct for register access history + * + * @h: The register history to initialize + * @n: The number of register accesses that the buffer could hold + * + * @return 0 if successfully initialized, failure otherwise + */ +int kbase_io_history_init(struct kbase_io_history *h, u16 n); + +/** + * kbase_io_history_term - uninit all resources for the register access history + * + * @h: The register history to terminate + */ +void kbase_io_history_term(struct kbase_io_history *h); + +/** + * kbase_io_history_dump - print the register history to the kernel ring buffer + * + * @kbdev: Pointer to kbase_device containing the register history to dump + */ +void kbase_io_history_dump(struct kbase_device *kbdev); /** * kbasep_regs_history_debugfs_init - add debugfs entries for register history @@ -46,10 +69,7 @@ struct kbase_device; */ void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev); -#else /* CONFIG_DEBUG_FS */ - -#define kbasep_regs_history_debugfs_init CSTD_NOP - -#endif /* CONFIG_DEBUG_FS */ +#else /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#endif /* defined(CONFIG_DEBUG_FS) && !IS_ENABLED(CONFIG_MALI_NO_MALI) */ #endif /*_KBASE_REGS_HISTORY_DEBUGFS_H*/ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h index df72eec..292a29c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,16 +17,142 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_RESET_GPU_H_ #define _KBASE_RESET_GPU_H_ +/** + * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst + * the current thread is accessing the GPU, + * and wait for any in-flight reset to + * finish. + * @kbdev: Device pointer + * + * This should be used when a potential access to the HW is going to be made + * from a non-atomic context. + * + * It will wait for any in-flight reset to finish before returning. Hence, + * correct lock ordering must be observed with respect to the calling thread + * and the reset worker thread. + * + * This does not synchronize general access to the HW, and so multiple threads + * can prevent GPU reset concurrently, whilst not being serialized. This is + * advantageous as the threads can make this call at points where they do not + * know for sure yet whether they will indeed access the GPU (for example, to + * respect lock ordering), without unnecessarily blocking others. + * + * Threads must still use other synchronization to ensure they access the HW + * consistently, at a point where they are certain it needs to be accessed. + * + * On success, ensure that when access to the GPU by the caller thread has + * finished, that it calls kbase_reset_gpu_allow() again to allow resets to + * happen. + * + * This may return a failure in cases such as a previous failure to reset the + * GPU within a reasonable time. If that happens, the GPU might be + * non-operational and the caller should not attempt any further access. + * + * Note: + * For atomic context, instead check kbase_reset_gpu_is_active(). + * + * Return: 0 on success, or negative error code on failure. + */ +int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting + * whilst the current thread is accessing the + * GPU, unless a reset is already in progress. + * @kbdev: Device pointer + * + * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an + * existing reset to complete. This can be used on codepaths that the Reset + * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would + * otherwise deadlock. + * + * Instead, a reset that is currently happening will cause this function to + * return an error code indicating that, and further resets will not have been + * prevented. + * + * In such cases, the caller must check for -EAGAIN, and take similar actions + * as for handling reset in atomic context. That is, they must cancel any + * actions that depended on reset being prevented, possibly deferring them + * until after the reset. + * + * Otherwise a successful return means that the caller can continue its actions + * safely in the knowledge that reset is prevented, and the reset worker will + * correctly wait instead of deadlocking against this thread. + * + * On success, ensure that when access to the GPU by the caller thread has + * finished, that it calls kbase_reset_gpu_allow() again to allow resets to + * happen. + * + * Refer to kbase_reset_gpu_prevent_and_wait() for more information. + * + * Return: 0 on success. -EAGAIN if a reset is currently happening. Other + * negative error codes on failure. + */ +int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been + * previously prevented. + * @kbdev: Device pointer + * + * This should be used when a potential access to the HW has finished from a + * non-atomic context. + * + * It must be used from the same thread that originally made a previously call + * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another + * thread. + */ +void kbase_reset_gpu_allow(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is + * currently prevented by the current + * thread. + * @kbdev: Device pointer + * + * Make debugging checks that the current thread has made a call to + * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to + * kbase_reset_gpu_allow(). + * + * CONFIG_LOCKDEP is required to prove that reset is indeed + * prevented. Otherwise only limited debugging checks can be made. + */ +void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that + * either GPU reset previously + * failed, or is currently + * prevented. + * + * @kbdev: Device pointer + * + * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where + * reset was not prevented due to a failure, yet we still need to execute the + * cleanup code following. + * + * Cleanup code following this call must handle any inconsistent state modified + * by the failed GPU reset, and must timeout any blocking operations instead of + * waiting forever. + */ +void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); + +/** + * Flags for kbase_prepare_to_reset_gpu + */ +#define RESET_FLAGS_NONE ((unsigned int)0) +/* This reset should be treated as an unrecoverable error by HW counter logic */ +#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) + /** * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. * @kbdev: Device pointer + * @flags: Bitfield indicating impact of reset (see flag defines) * * Caller is expected to hold the kbdev->hwaccess_lock. * @@ -34,18 +161,20 @@ * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, + unsigned int flags); /** * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. * @kbdev: Device pointer - * + * @flags: Bitfield indicating impact of reset (see flag defines) + * Return: a boolean which should be interpreted as follows: * - true - Prepared for reset, kbase_reset_gpu should be called. * - false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ -bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); /** * kbase_reset_gpu - Reset the GPU @@ -95,8 +224,13 @@ int kbase_reset_gpu_silent(struct kbase_device *kbdev); * kbase_reset_gpu_is_active - Reports if the GPU is being reset * @kbdev: Device pointer * - * Return: True if the GPU is in the process of being reset (or if the reset of - * GPU failed, not applicable to Job Manager GPUs). + * Any changes made to the HW when this returns true may be lost, overwritten + * or corrupted. + * + * Note that unless appropriate locks are held when using this function, the + * state could change immediately afterwards. + * + * Return: True if the GPU is in the process of being reset. */ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); @@ -126,14 +260,4 @@ int kbase_reset_gpu_init(struct kbase_device *kbdev); */ void kbase_reset_gpu_term(struct kbase_device *kbdev); -/** - * kbase_reset_gpu_register_complete_cb - Register the callback function to be - * invoked on completion of GPU reset. - * - * @kbdev: Device pointer - * @complete_callback: Pointer to the callback function - */ -void kbase_reset_gpu_register_complete_cb(struct kbase_device *kbdev, - int (*complete_callback)(struct kbase_device *kbdev)); - #endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c index b5c7b12..abbe8d5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015, 2018, 2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,9 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) #include #include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h index 221eb21..d0086db 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_smc.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,16 +17,12 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #ifndef _KBASE_SMC_H_ #define _KBASE_SMC_H_ -#ifdef CONFIG_ARM64 +#if IS_ENABLED(CONFIG_ARM64) #include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index cbb0c76..bee3513 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include #include @@ -30,8 +27,9 @@ #include #endif #include -#include +#include #include +#include #include #include #include @@ -42,10 +40,9 @@ #include #include +#if !MALI_USE_CSF /** - * @file mali_kbase_softjobs.c - * - * This file implements the logic behind software only jobs that are + * DOC: This file implements the logic behind software only jobs that are * executed within the driver rather than being handed over to the GPU. */ @@ -136,7 +133,7 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) void *user_result; struct timespec64 ts; struct base_dump_cpu_gpu_counters data; - u64 system_time; + u64 system_time = 0ULL; u64 cycle_counter; u64 jc = katom->jc; struct kbase_context *kctx = katom->kctx; @@ -146,7 +143,11 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) /* Take the PM active reference as late as possible - otherwise, it could * delay suspend until we process the atom (which may be at the end of a - * long chain of dependencies */ + * long chain of dependencies + */ +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_inc(&kctx->kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); if (pm_active_err) { struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; @@ -164,6 +165,10 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) return pm_active_err; } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + else + atomic_dec(&kctx->kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, &ts); @@ -181,7 +186,8 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) /* GPU_WR access is checked on the range for returning the result to * userspace for the following reasons: * - security, this is currently how imported user bufs are checked. - * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ + * - userspace ddk guaranteed to assume region was mapped as GPU_WR + */ user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); if (!user_result) return 0; @@ -292,7 +298,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) if (!kbase_sync_fence_in_info_get(dep, &info)) { dev_warn(dev, - "\tVictim trigger atom %d fence [%p] %s: %s\n", + "\tVictim trigger atom %d fence [%pK] %s: %s\n", kbase_jd_atom_id(kctx, dep), info.fence, info.name, @@ -321,11 +327,11 @@ static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) return; } - dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", + dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%pK] after %dms\n", kctx->tgid, kctx->id, kbase_jd_atom_id(kctx, katom), info.fence, timeout_ms); - dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", + dev_warn(dev, "\tGuilty fence [%pK] %s: %s\n", info.fence, info.name, kbase_sync_status_string(info.status)); @@ -713,14 +719,16 @@ out_unlock: out_cleanup: /* Frees allocated memory for kbase_debug_copy_job struct, including - * members, and sets jc to 0 */ + * members, and sets jc to 0 + */ kbase_debug_copy_finish(katom); kfree(user_buffers); return ret; } +#endif /* !MALI_USE_CSF */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, unsigned long page_num, struct page **page) { @@ -801,16 +809,16 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, dma_to_copy = min(dma_buf->size, (size_t)(buf_data->nr_extres_pages * PAGE_SIZE)); ret = dma_buf_begin_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) - 0, dma_to_copy, +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) + 0, dma_to_copy, #endif - DMA_FROM_DEVICE); + DMA_FROM_DEVICE); if (ret) goto out_unlock; for (i = 0; i < dma_to_copy/PAGE_SIZE && target_page_nr < buf_data->nr_pages; i++) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE struct page *pg; void *extres_page = dma_buf_kmap_page(gpu_alloc, i, &pg); #else @@ -822,20 +830,20 @@ int kbase_mem_copy_from_extres(struct kbase_context *kctx, buf_data->nr_pages, &target_page_nr, offset); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 6, 0) +#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE kunmap(pg); #else dma_buf_kunmap(dma_buf, i, extres_page); #endif if (ret) - goto out_unlock; + break; } } dma_buf_end_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && !defined(CONFIG_CHROMEOS) - 0, dma_to_copy, +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS) + 0, dma_to_copy, #endif - DMA_FROM_DEVICE); + DMA_FROM_DEVICE); break; } default: @@ -846,6 +854,7 @@ out_unlock: return ret; } +#if !MALI_USE_CSF static int kbase_debug_copy(struct kbase_jd_atom *katom) { struct kbase_debug_copy_buffer *buffers = katom->softjob_data; @@ -863,6 +872,7 @@ static int kbase_debug_copy(struct kbase_jd_atom *katom) return 0; } +#endif /* !MALI_USE_CSF */ #define KBASEP_JIT_ALLOC_GPU_ADDR_ALIGNMENT ((u32)0x7) @@ -899,7 +909,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS)) return -EINVAL; -#if !MALI_JIT_PRESSURE_LIMIT +#if !MALI_JIT_PRESSURE_LIMIT_BASE /* If just-in-time memory allocation pressure limit feature is disabled, * heap_info_gpu_addr must be zeroed-out */ @@ -907,21 +917,19 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx, return -EINVAL; #endif +#if !MALI_USE_CSF /* If BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE is set, heap_info_gpu_addr * cannot be 0 */ if ((info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) && !info->heap_info_gpu_addr) return -EINVAL; +#endif /* !MALI_USE_CSF */ return 0; } - -#if (KERNEL_VERSION(3, 18, 63) > LINUX_VERSION_CODE) -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) -#endif +#if !MALI_USE_CSF /* * Sizes of user data to copy for each just-in-time memory interface version @@ -998,10 +1006,10 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) ret = kbasep_jit_alloc_validate(kctx, info); if (ret) goto free_info; - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom, - info->va_pages, info->commit_pages, info->extent, - info->id, info->bin_id, info->max_allocations, - info->flags, info->usage_id); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO( + kbdev, katom, info->va_pages, info->commit_pages, + info->extension, info->id, info->bin_id, + info->max_allocations, info->flags, info->usage_id); } katom->jit_blocked = false; @@ -1016,7 +1024,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) * though the region is valid it doesn't represent the * same thing it used to. * - * Complete validation of va_pages, commit_pages and extent + * Complete validation of va_pages, commit_pages and extension * isn't done here as it will be done during the call to * kbase_mem_alloc. */ @@ -1091,14 +1099,19 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) } } -#if MALI_JIT_PRESSURE_LIMIT - /** - * If this is the only JIT_ALLOC atom in-flight then allow it to exceed - * the defined pressure limit. +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* + * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit + * is disabled at the context scope, then bypass JIT pressure limit + * logic in kbase_jit_allocate(). */ - if (kctx->jit_current_allocations == 0) + if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED) + || (kctx->jit_current_allocations == 0)) { ignore_pressure_limit = true; -#endif /* MALI_JIT_PRESSURE_LIMIT */ + } +#else + ignore_pressure_limit = true; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ for (i = 0, info = katom->softjob_data; i < count; i++, info++) { if (kctx->jit_alloc[info->id]) { @@ -1215,10 +1228,10 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom) MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id); #endif - KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom, - info->gpu_alloc_addr, new_addr, info->flags, - entry_mmu_flags, info->id, info->commit_pages, - info->extent, info->va_pages); + KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT( + kbdev, katom, info->gpu_alloc_addr, new_addr, + info->flags, entry_mmu_flags, info->id, + info->commit_pages, info->extension, info->va_pages); kbase_vunmap(kctx, &mapping); kbase_trace_jit_report_gpu_mem(kctx, reg, @@ -1358,12 +1371,16 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx) list_for_each_safe(i, tmp, &jit_pending_alloc_list) { struct kbase_jd_atom *pending_atom = list_entry(i, struct kbase_jd_atom, queue); + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom); + kbase_kinstr_jm_atom_sw_start(pending_atom); if (kbase_jit_allocate_process(pending_atom) == 0) { /* Atom has completed */ INIT_WORK(&pending_atom->work, kbasep_jit_finish_worker); queue_work(kctx->jctx.job_done_wq, &pending_atom->work); } + KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom); + kbase_kinstr_jm_atom_sw_stop(pending_atom); } } @@ -1412,41 +1429,27 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) struct base_external_resource_list *ext_res; u64 count = 0; size_t copy_size; - int ret; user_ext_res = (__user struct base_external_resource_list *) (uintptr_t) katom->jc; /* Fail the job if there is no info structure */ - if (!user_ext_res) { - ret = -EINVAL; - goto fail; - } + if (!user_ext_res) + return -EINVAL; - if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { - ret = -EINVAL; - goto fail; - } + if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) + return -EINVAL; /* Is the number of external resources in range? */ - if (!count || count > BASE_EXT_RES_COUNT_MAX) { - ret = -EINVAL; - goto fail; - } + if (!count || count > BASE_EXT_RES_COUNT_MAX) + return -EINVAL; /* Copy the information for safe access and future storage */ copy_size = sizeof(*ext_res); copy_size += sizeof(struct base_external_resource) * (count - 1); - ext_res = kzalloc(copy_size, GFP_KERNEL); - if (!ext_res) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { - ret = -EINVAL; - goto free_info; - } + ext_res = memdup_user(user_ext_res, copy_size); + if (IS_ERR(ext_res)) + return PTR_ERR(ext_res); /* * Overwrite the count with the first value incase it was changed @@ -1457,11 +1460,6 @@ static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) katom->softjob_data = ext_res; return 0; - -free_info: - kfree(ext_res); -fail: - return ret; } static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) @@ -1538,6 +1536,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) struct kbase_device *kbdev = kctx->kbdev; KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom); + kbase_kinstr_jm_atom_sw_start(katom); trace_sysgraph(SGR_SUBMIT, kctx->id, kbase_jd_atom_id(kctx, katom)); @@ -1600,6 +1599,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) /* Atom is complete */ KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom); + kbase_kinstr_jm_atom_sw_stop(katom); return ret; } @@ -1635,7 +1635,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) struct base_fence fence; int fd; - if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + if (copy_from_user(&fence, + (__user void *)(uintptr_t)katom->jc, + sizeof(fence)) != 0) return -EINVAL; fd = kbase_sync_fence_out_create(katom, @@ -1644,7 +1646,8 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) return -EINVAL; fence.basep.fd = fd; - if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { + if (copy_to_user((__user void *)(uintptr_t)katom->jc, + &fence, sizeof(fence)) != 0) { kbase_sync_fence_out_remove(katom); kbase_sync_fence_close_fd(fd); fence.basep.fd = -EINVAL; @@ -1657,7 +1660,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) struct base_fence fence; int ret; - if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + if (copy_from_user(&fence, + (__user void *)(uintptr_t)katom->jc, + sizeof(fence)) != 0) return -EINVAL; /* Get a reference to the fence object */ @@ -1776,6 +1781,9 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); +#ifdef CONFIG_MALI_ARBITER_SUPPORT + atomic_dec(&kbdev->pm.gpu_users_waiting); +#endif /* CONFIG_MALI_ARBITER_SUPPORT */ } mutex_unlock(&kctx->jctx.lock); } @@ -1783,3 +1791,4 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) if (resched) kbase_js_sched_all(kbdev); } +#endif /* !MALI_USE_CSF */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c index 22caa4a..84784be 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.c @@ -1,11 +1,12 @@ - /* +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,9 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ + #include "mali_kbase_strings.h" #define KBASE_DRV_NAME "mali" diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h index d2f1825..c3f94f9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_strings.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ extern const char kbase_drv_name[]; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h index 80b54d0..ad05cdf 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,14 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_sync.h - * - * This file contains our internal "API" for explicit fences. + * DOC: This file contains our internal "API" for explicit fences. * It hides the implementation details of the actual explicit fence mechanism * used (Android fences or sync file with DMA fences). */ @@ -31,11 +28,12 @@ #ifndef MALI_KBASE_SYNC_H #define MALI_KBASE_SYNC_H +#include #include -#ifdef CONFIG_SYNC +#if IS_ENABLED(CONFIG_SYNC) #include #endif -#ifdef CONFIG_SYNC_FILE +#if IS_ENABLED(CONFIG_SYNC_FILE) #include "mali_kbase_fence_defs.h" #include #endif @@ -72,6 +70,7 @@ struct kbase_sync_fence_info { */ int kbase_sync_fence_stream_create(const char *name, int *const out_fd); +#if !MALI_USE_CSF /** * kbase_sync_fence_out_create Create an explicit output fence to specified atom * @katom: Atom to assign the new explicit fence to @@ -92,6 +91,7 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd); * return: 0 on success, < 0 on error */ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); +#endif /* !MALI_USE_CSF */ /** * kbase_sync_fence_validate() - Validate a fd to be a valid fence @@ -104,6 +104,7 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd); */ int kbase_sync_fence_validate(int fd); +#if !MALI_USE_CSF /** * kbase_sync_fence_out_trigger - Signal explicit output fence attached on katom * @katom: Atom with an explicit fence to signal @@ -154,6 +155,7 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom); * This will also release the corresponding reference. */ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); +#endif /* !MALI_USE_CSF */ /** * kbase_sync_fence_close_fd() - Close a file descriptor representing a fence @@ -161,13 +163,16 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom); */ static inline void kbase_sync_fence_close_fd(int fd) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + close_fd(fd); +#elif KERNEL_VERSION(4, 17, 0) <= LINUX_VERSION_CODE ksys_close(fd); #else sys_close(fd); #endif } +#if !MALI_USE_CSF /** * kbase_sync_fence_in_info_get() - Retrieves information about input fence * @katom: Atom to get fence information from @@ -187,6 +192,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, */ int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info); +#endif /* !MALI_USE_CSF */ #if defined(CONFIG_SYNC_FILE) #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) @@ -207,6 +213,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, const char *kbase_sync_status_string(int status); +#if !MALI_USE_CSF /* * Internal worker used to continue processing of atom. */ @@ -219,5 +226,6 @@ void kbase_sync_fence_wait_worker(struct work_struct *data); */ void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom); #endif +#endif /* !MALI_USE_CSF */ #endif /* MALI_KBASE_SYNC_H */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c index 75940fb..8af2584 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_android.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -50,15 +49,6 @@ struct mali_sync_pt { int result; }; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) -/* For backwards compatibility with kernels before 3.17. After 3.17 - * sync_pt_parent is included in the kernel. */ -static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) -{ - return pt->parent; -} -#endif - static struct mali_sync_timeline *to_mali_sync_timeline( struct sync_timeline *timeline) { @@ -196,6 +186,7 @@ int kbase_sync_fence_stream_create(const char *name, int *const out_fd) return 0; } +#if !MALI_USE_CSF /* Allocates a sync point within the timeline. * * The timeline must be the one allocated by kbase_sync_timeline_alloc @@ -225,10 +216,6 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) struct sync_timeline *tl; struct sync_pt *pt; struct sync_fence *fence; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) - struct files_struct *files; - struct fdtable *fdt; -#endif int fd; struct file *tl_file; @@ -259,29 +246,11 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) /* from here the fence owns the sync_pt */ /* create a fd representing the fence */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); if (fd < 0) { sync_fence_put(fence); goto out; } -#else - fd = get_unused_fd(); - if (fd < 0) { - sync_fence_put(fence); - goto out; - } - - files = current->files; - spin_lock(&files->file_lock); - fdt = files_fdtable(files); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) - __set_close_on_exec(fd, fdt); -#else - FD_SET(fd, fdt->close_on_exec); -#endif - spin_unlock(&files->file_lock); -#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ /* bind fence to the new fd */ sync_fence_install(fence, fd); @@ -289,7 +258,8 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int tl_fd) katom->fence = sync_fence_fdget(fd); if (katom->fence == NULL) { /* The only way the fence can be NULL is if userspace closed it - * for us, so we don't need to clear it up */ + * for us, so we don't need to clear it up + */ fd = -EINVAL; goto out; } @@ -305,6 +275,7 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) katom->fence = sync_fence_fdget(fd); return katom->fence ? 0 : -ENOENT; } +#endif /* !MALI_USE_CSF */ int kbase_sync_fence_validate(int fd) { @@ -318,6 +289,7 @@ int kbase_sync_fence_validate(int fd) return 0; } +#if !MALI_USE_CSF /* Returns true if the specified timeline is allocated by Mali */ static int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) { @@ -376,22 +348,14 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) if (!katom->fence) return BASE_JD_EVENT_JOB_CANCELLED; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - if (!list_is_singular(&katom->fence->pt_list_head)) { -#else if (katom->fence->num_fences != 1) { -#endif /* Not exactly one item in the list - so it didn't (directly) - * come from us */ + * come from us + */ return BASE_JD_EVENT_JOB_CANCELLED; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - pt = list_first_entry(&katom->fence->pt_list_head, - struct sync_pt, pt_list); -#else pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); -#endif timeline = sync_pt_parent(pt); if (!kbase_sync_timeline_is_ours(timeline)) { @@ -413,11 +377,7 @@ static inline int kbase_fence_get_status(struct sync_fence *fence) if (!fence) return -ENOENT; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - return fence->status; -#else return atomic_read(&fence->status); -#endif } static void kbase_fence_wait_callback(struct sync_fence *fence, @@ -461,7 +421,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) if (ret < 0) { katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ + * to do this we schedule the work queue to complete this job + */ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); queue_work(katom->kctx->jctx.job_done_wq, &katom->work); } @@ -473,7 +434,8 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) { if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { /* The wait wasn't cancelled - leave the cleanup for - * kbase_fence_wait_callback */ + * kbase_fence_wait_callback + */ return; } @@ -540,3 +502,4 @@ void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) sync_fence_wait(katom->fence, 1); } #endif +#endif /* !MALI_USE_CSF */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c index 2e1ede5..5ee7fc3 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_common.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* - * @file mali_kbase_sync_common.c + * @file * * Common code for our explicit fence functionality */ @@ -30,6 +29,7 @@ #include "mali_kbase.h" #include "mali_kbase_sync.h" +#if !MALI_USE_CSF void kbase_sync_fence_wait_worker(struct work_struct *data) { struct kbase_jd_atom *katom; @@ -37,6 +37,7 @@ void kbase_sync_fence_wait_worker(struct work_struct *data) katom = container_of(data, struct kbase_jd_atom, work); kbase_soft_event_wait_callback(katom); } +#endif /* !MALI_USE_CSF */ const char *kbase_sync_status_string(int status) { diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c index 0679c48..25670c4 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_sync_file.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -59,9 +58,10 @@ int kbase_sync_fence_stream_create(const char *name, int *const out_fd) return 0; } +#if !MALI_USE_CSF int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; #else struct dma_fence *fence; @@ -106,7 +106,7 @@ int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd) int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence = sync_file_get_fence(fd); #else struct dma_fence *fence = sync_file_get_fence(fd); @@ -119,10 +119,11 @@ int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd) return 0; } +#endif /* !MALI_USE_CSF */ int kbase_sync_fence_validate(int fd) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence = sync_file_get_fence(fd); #else struct dma_fence *fence = sync_file_get_fence(fd); @@ -136,6 +137,7 @@ int kbase_sync_fence_validate(int fd) return 0; /* valid */ } +#if !MALI_USE_CSF enum base_jd_event_code kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) { @@ -157,7 +159,7 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result) return (result != 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) static void kbase_fence_wait_callback(struct fence *fence, struct fence_cb *cb) #else @@ -175,7 +177,7 @@ static void kbase_fence_wait_callback(struct dma_fence *fence, #if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE || \ (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)) - if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error) + if (dma_fence_is_signaled(kcb->fence) && kcb->fence->error < 0) #else if (dma_fence_is_signaled(kcb->fence) && kcb->fence->status < 0) #endif @@ -200,7 +202,7 @@ static void kbase_fence_wait_callback(struct dma_fence *fence, int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) { int err; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; #else struct dma_fence *fence; @@ -233,8 +235,8 @@ int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom) katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - + * to do this we schedule the work queue to complete this job + */ INIT_WORK(&katom->work, kbase_sync_fence_wait_worker); queue_work(katom->kctx->jctx.job_done_wq, &katom->work); } @@ -246,7 +248,8 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom) { if (!kbase_fence_free_callbacks(katom)) { /* The wait wasn't cancelled - - * leave the cleanup for kbase_fence_wait_callback */ + * leave the cleanup for kbase_fence_wait_callback + */ return; } @@ -273,6 +276,7 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom) kbase_fence_free_callbacks(katom); kbase_fence_in_remove(katom); } +#endif /* !MALI_USE_CSF */ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) void kbase_sync_fence_info_get(struct fence *fence, @@ -317,10 +321,11 @@ void kbase_sync_fence_info_get(struct dma_fence *fence, #endif } +#if !MALI_USE_CSF int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; #else struct dma_fence *fence; @@ -340,7 +345,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom, int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom, struct kbase_sync_fence_info *info) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)) +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) struct fence *fence; #else struct dma_fence *fence; @@ -364,3 +369,4 @@ void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom) /* Not implemented */ } #endif +#endif /* !MALI_USE_CSF*/ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c new file mode 100644 index 0000000..3088c41 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include + +/** + * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation + * is mapped to GPU for the first time within a process. + * Another instantiation is done for the case when that + * allocation is mapped for the first time to GPU. + * + * @dma_buf: Reference to dma_buf been imported. + * @dma_buf_node: Link node to maintain a rb_tree of kbase_dma_buf. + * @import_count: The number of times the dma_buf was imported. + */ +struct kbase_dma_buf { + struct dma_buf *dma_buf; + struct rb_node dma_buf_node; + u32 import_count; +}; + +/** + * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping. + * + * @kctx: Pointer to kbase context. + * @dma_buf: Pointer to a dma buffer mapping. + * @tree: Pointer to root of rb_tree containing the dma_buf's mapped. + * + * when we un-map any dma mapping we need to remove them from rb_tree, + * rb_tree is maintained at kbase_device level and kbase_process level + * by passing the root of kbase_device or kbase_process we can remove + * the node from the tree. + */ +static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx, + struct dma_buf *dma_buf, + struct rb_root *tree) +{ + struct kbase_dma_buf *buf_node = NULL; + struct rb_node *node = tree->rb_node; + bool mapping_removed = false; + + lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + + while (node) { + buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + + if (dma_buf == buf_node->dma_buf) { + WARN_ON(!buf_node->import_count); + + buf_node->import_count--; + + if (!buf_node->import_count) { + rb_erase(&buf_node->dma_buf_node, tree); + kfree(buf_node); + mapping_removed = true; + } + + break; + } + + if (dma_buf < buf_node->dma_buf) + node = node->rb_left; + else + node = node->rb_right; + } + + WARN_ON(!buf_node); + return mapping_removed; +} + +/** + * kbase_capture_dma_buf_mapping - capture a dma buffer mapping. + * + * @kctx: Pointer to kbase context. + * @dma_buf: Pointer to a dma buffer mapping. + * @root: Pointer to root of rb_tree containing the dma_buf's. + * + * We maintain a kbase_device level and kbase_process level rb_tree + * of all unique dma_buf's mapped to gpu memory. So when attach any + * dma_buf add it the rb_tree's. To add the unique mapping we need + * check if the mapping is not a duplicate and then add them. + */ +static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx, + struct dma_buf *dma_buf, + struct rb_root *root) +{ + struct kbase_dma_buf *buf_node = NULL; + struct rb_node *node = root->rb_node; + bool unique_buf_imported = true; + + lockdep_assert_held(&kctx->kbdev->dma_buf_lock); + + while (node) { + buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node); + + if (dma_buf == buf_node->dma_buf) { + unique_buf_imported = false; + break; + } + + if (dma_buf < buf_node->dma_buf) + node = node->rb_left; + else + node = node->rb_right; + } + + if (unique_buf_imported) { + struct kbase_dma_buf *new_buf_node = + kzalloc(sizeof(*new_buf_node), GFP_KERNEL); + + if (new_buf_node == NULL) { + dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n"); + /* Dont account for it if we fail to allocate memory */ + unique_buf_imported = false; + } else { + struct rb_node **new = &(root->rb_node), *parent = NULL; + + new_buf_node->dma_buf = dma_buf; + new_buf_node->import_count = 1; + while (*new) { + struct kbase_dma_buf *new_node; + + parent = *new; + new_node = rb_entry(parent, struct kbase_dma_buf, + dma_buf_node); + if (dma_buf < new_node->dma_buf) + new = &(*new)->rb_left; + else + new = &(*new)->rb_right; + } + rb_link_node(&new_buf_node->dma_buf_node, parent, new); + rb_insert_color(&new_buf_node->dma_buf_node, root); + } + } else if (!WARN_ON(!buf_node)) { + buf_node->import_count++; + } + + return unique_buf_imported; +} + +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool dev_mapping_removed, prcs_mapping_removed; + + mutex_lock(&kbdev->dma_buf_lock); + + dev_mapping_removed = kbase_delete_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + + prcs_mapping_removed = kbase_delete_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + + WARN_ON(dev_mapping_removed && !prcs_mapping_removed); + + spin_lock(&kbdev->gpu_mem_usage_lock); + if (dev_mapping_removed) + kbdev->total_gpu_pages -= alloc->nents; + + if (prcs_mapping_removed) + kctx->kprcs->total_gpu_pages -= alloc->nents; + + if (dev_mapping_removed || prcs_mapping_removed) + kbase_trace_gpu_mem_usage(kbdev, kctx); + spin_unlock(&kbdev->gpu_mem_usage_lock); + + mutex_unlock(&kbdev->dma_buf_lock); +} + +void kbase_add_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool unique_dev_dmabuf, unique_prcs_dmabuf; + + mutex_lock(&kbdev->dma_buf_lock); + + /* add dma_buf to device and process. */ + unique_dev_dmabuf = kbase_capture_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root); + + unique_prcs_dmabuf = kbase_capture_dma_buf_mapping( + kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root); + + WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf); + + spin_lock(&kbdev->gpu_mem_usage_lock); + if (unique_dev_dmabuf) + kbdev->total_gpu_pages += alloc->nents; + + if (unique_prcs_dmabuf) + kctx->kprcs->total_gpu_pages += alloc->nents; + + if (unique_prcs_dmabuf || unique_dev_dmabuf) + kbase_trace_gpu_mem_usage(kbdev, kctx); + spin_unlock(&kbdev->gpu_mem_usage_lock); + + mutex_unlock(&kbdev->dma_buf_lock); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h new file mode 100644 index 0000000..fd871fc --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_trace_gpu_mem.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_TRACE_GPU_MEM_H_ +#define _KBASE_TRACE_GPU_MEM_H_ + +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) +#include +#endif + +#define DEVICE_TGID ((u32) 0U) + +static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + lockdep_assert_held(&kbdev->gpu_mem_usage_lock); + + trace_gpu_mem_total(kbdev->id, DEVICE_TGID, + kbdev->total_gpu_pages << PAGE_SHIFT); + + if (likely(kctx)) + trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid, + kctx->kprcs->total_gpu_pages << PAGE_SHIFT); +#endif +} + +static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev, + struct kbase_context *kctx, size_t pages) +{ + spin_lock(&kbdev->gpu_mem_usage_lock); + + if (likely(kctx)) + kctx->kprcs->total_gpu_pages -= pages; + + kbdev->total_gpu_pages -= pages; + + kbase_trace_gpu_mem_usage(kbdev, kctx); + + spin_unlock(&kbdev->gpu_mem_usage_lock); +} + +static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev, + struct kbase_context *kctx, size_t pages) +{ + spin_lock(&kbdev->gpu_mem_usage_lock); + + if (likely(kctx)) + kctx->kprcs->total_gpu_pages += pages; + + kbdev->total_gpu_pages += pages; + + kbase_trace_gpu_mem_usage(kbdev, kctx); + + spin_unlock(&kbdev->gpu_mem_usage_lock); +} + +/** + * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured. + * + * @kctx: Pointer to the kbase context + * @alloc: Pointer to the alloc to unmap + * + * Remove reference to dma buf been unmapped from kbase_device level + * rb_tree and Kbase_process level dma buf rb_tree. + */ +void kbase_remove_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +/** + * kbase_add_dma_buf_usage - Add a dma-buf entry captured. + * + * @kctx: Pointer to the kbase context + * @alloc: Pointer to the alloc to map in + * + * Add reference to dma buf been mapped to kbase_device level + * rb_tree and Kbase_process level dma buf rb_tree. + */ +void kbase_add_dma_buf_usage(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc); + +#endif /* _KBASE_TRACE_GPU_MEM_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h index 8d4f044..2dad49b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_utility.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2013, 2015, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #ifndef _KBASE_UTILITY_H #define _KBASE_UTILITY_H @@ -32,7 +29,7 @@ static inline void kbase_timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *timer)) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) +#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE setup_timer(timer, (void (*)(unsigned long)) callback, (unsigned long) timer); #else diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index d96b565..64405af 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,16 +17,14 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_vinstr.h" #include "mali_kbase_hwcnt_virtualizer.h" #include "mali_kbase_hwcnt_types.h" -#include "mali_kbase_hwcnt_reader.h" +#include #include "mali_kbase_hwcnt_gpu.h" -#include "mali_kbase_ioctl.h" +#include #include "mali_malisw.h" #include "mali_kbase_debug.h" @@ -33,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +53,10 @@ * counters. * @hvirt: Hardware counter virtualizer used by vinstr. * @metadata: Hardware counter metadata provided by virtualizer. + * @metadata_user: API compatible hardware counter metadata provided by vinstr. + * For compatibility with the user driver interface, this + * contains a "truncated" version of the HWCNT metadata limited + * to 64 entries per block. NULL when not required. * @lock: Lock protecting all vinstr state. * @suspend_count: Suspend reference count. If non-zero, timer and worker are * prevented from being re-scheduled. @@ -64,6 +68,7 @@ struct kbase_vinstr_context { struct kbase_hwcnt_virtualizer *hvirt; const struct kbase_hwcnt_metadata *metadata; + const struct kbase_hwcnt_metadata *metadata_user; struct mutex lock; size_t suspend_count; size_t client_count; @@ -83,6 +88,7 @@ struct kbase_vinstr_context { * @next_dump_time_ns: Time in ns when this client's next periodic dump must * occur. If 0, not a periodic client. * @enable_map: Counters enable map. + * @tmp_buf: Temporary buffer to use before handing dump to client. * @dump_bufs: Array of dump buffers allocated by this client. * @dump_bufs_meta: Metadata of dump buffers. * @meta_idx: Index of metadata being accessed by userspace. @@ -97,6 +103,7 @@ struct kbase_vinstr_client { u64 next_dump_time_ns; u32 dump_interval_ns; struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer tmp_buf; struct kbase_hwcnt_dump_buffer_array dump_bufs; struct kbase_hwcnt_reader_metadata *dump_bufs_meta; atomic_t meta_idx; @@ -182,8 +189,10 @@ static int kbasep_vinstr_client_dump( u64 ts_end_ns; unsigned int write_idx; unsigned int read_idx; + struct kbase_hwcnt_dump_buffer *tmp_buf; struct kbase_hwcnt_dump_buffer *dump_buf; struct kbase_hwcnt_reader_metadata *meta; + u8 clk_cnt; WARN_ON(!vcli); lockdep_assert_held(&vcli->vctx->lock); @@ -198,23 +207,42 @@ static int kbasep_vinstr_client_dump( dump_buf = &vcli->dump_bufs.bufs[write_idx]; meta = &vcli->dump_bufs_meta[write_idx]; + tmp_buf = &vcli->tmp_buf; errcode = kbase_hwcnt_virtualizer_client_dump( - vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf); + vcli->hvcli, &ts_start_ns, &ts_end_ns, tmp_buf); if (errcode) return errcode; /* Patch the dump buf headers, to hide the counters that other hwcnt * clients are using. */ - kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map); + kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &vcli->enable_map); + + /* Copy the temp buffer to the userspace visible buffer. The strict + * variant will explicitly zero any non-enabled counters to ensure + * nothing except exactly what the user asked for is made visible. + * + * If the metadata in vinstr (vctx->metadata_user) is not NULL, it means + * vinstr has the truncated metadata, so do a narrow copy since + * virtualizer has a bigger buffer but user only needs part of it. + * otherwise we do a full copy. + */ + if (vcli->vctx->metadata_user) + kbase_hwcnt_dump_buffer_copy_strict_narrow(dump_buf, tmp_buf, + &vcli->enable_map); + else + kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf, + &vcli->enable_map); - /* Zero all non-enabled counters (current values are undefined) */ - kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map); + clk_cnt = vcli->vctx->metadata->clk_cnt; meta->timestamp = ts_end_ns; meta->event_id = event_id; meta->buffer_idx = write_idx; + meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0; + meta->cycles.shader_cores = + (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0; /* Notify client. Make sure all changes to memory are visible. */ wmb(); @@ -345,11 +373,7 @@ static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer) * cancelled, and the worker itself won't reschedule this timer if * suspend_count != 0. */ -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, &vctx->dump_work); -#else - queue_work(system_highpri_wq, &vctx->dump_work); -#endif + kbase_hwcnt_virtualizer_queue_work(vctx->hvirt, &vctx->dump_work); return HRTIMER_NORESTART; } @@ -365,6 +389,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli); kfree(vcli->dump_bufs_meta); kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs); + kbase_hwcnt_dump_buffer_free(&vcli->tmp_buf); kbase_hwcnt_enable_map_free(&vcli->enable_map); kfree(vcli); } @@ -374,7 +399,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli) * the vinstr context. * @vctx: Non-NULL pointer to vinstr context. * @setup: Non-NULL pointer to hardware counter ioctl setup structure. - * setup->buffer_count must not be 0. + * setup->buffer_count must not be 0 and must be a power of 2. * @out_vcli: Non-NULL pointer to where created client will be stored on * success. * @@ -392,6 +417,7 @@ static int kbasep_vinstr_client_create( WARN_ON(!vctx); WARN_ON(!setup); WARN_ON(setup->buffer_count == 0); + WARN_ON(!is_power_of_2(setup->buffer_count)); vcli = kzalloc(sizeof(*vcli), GFP_KERNEL); if (!vcli) @@ -404,14 +430,36 @@ static int kbasep_vinstr_client_create( if (errcode) goto error; - phys_em.jm_bm = setup->jm_bm; + phys_em.fe_bm = setup->fe_bm; phys_em.shader_bm = setup->shader_bm; phys_em.tiler_bm = setup->tiler_bm; phys_em.mmu_l2_bm = setup->mmu_l2_bm; kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em); - errcode = kbase_hwcnt_dump_buffer_array_alloc( - vctx->metadata, setup->buffer_count, &vcli->dump_bufs); + /* Use virtualizer's metadata to alloc tmp buffer which interacts with + * the HWC virtualizer. + */ + errcode = kbase_hwcnt_dump_buffer_alloc(vctx->metadata, &vcli->tmp_buf); + if (errcode) + goto error; + + /* Enable all the available clk_enable_map. */ + vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1; + + if (vctx->metadata_user) + /* Use vinstr's truncated metadata to alloc dump buffers which + * interact with clients. + */ + errcode = + kbase_hwcnt_dump_buffer_array_alloc(vctx->metadata_user, + setup->buffer_count, + &vcli->dump_bufs); + else + /* Use metadata from virtualizer to allocate dump buffers if + * vinstr doesn't have the truncated metadata. + */ + errcode = kbase_hwcnt_dump_buffer_array_alloc( + vctx->metadata, setup->buffer_count, &vcli->dump_bufs); if (errcode) goto error; @@ -439,6 +487,7 @@ int kbase_vinstr_init( struct kbase_hwcnt_virtualizer *hvirt, struct kbase_vinstr_context **out_vctx) { + int errcode; struct kbase_vinstr_context *vctx; const struct kbase_hwcnt_metadata *metadata; @@ -455,6 +504,11 @@ int kbase_vinstr_init( vctx->hvirt = hvirt; vctx->metadata = metadata; + vctx->metadata_user = NULL; + errcode = kbase_hwcnt_gpu_metadata_create_truncate_64( + &vctx->metadata_user, metadata); + if (errcode) + goto err_metadata_create; mutex_init(&vctx->lock); INIT_LIST_HEAD(&vctx->clients); @@ -464,6 +518,11 @@ int kbase_vinstr_init( *out_vctx = vctx; return 0; + +err_metadata_create: + kfree(vctx); + + return errcode; } void kbase_vinstr_term(struct kbase_vinstr_context *vctx) @@ -484,6 +543,9 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx) } } + if (vctx->metadata_user) + kbase_hwcnt_metadata_destroy(vctx->metadata_user); + WARN_ON(vctx->client_count != 0); kfree(vctx); } @@ -543,11 +605,8 @@ void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) } if (has_periodic_clients) -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, &vctx->dump_work); -#else - queue_work(system_highpri_wq, &vctx->dump_work); -#endif + kbase_hwcnt_virtualizer_queue_work( + vctx->hvirt, &vctx->dump_work); } } @@ -564,7 +623,8 @@ int kbase_vinstr_hwcnt_reader_setup( if (!vctx || !setup || (setup->buffer_count == 0) || - (setup->buffer_count > MAX_BUFFER_COUNT)) + (setup->buffer_count > MAX_BUFFER_COUNT) || + !is_power_of_2(setup->buffer_count)) return -EINVAL; errcode = kbasep_vinstr_client_create(vctx, setup, &vcli); @@ -675,26 +735,31 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt; struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx]; + const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); + const size_t min_size = min(size, meta_size); /* Metadata sanity check. */ WARN_ON(idx != meta->buffer_idx); - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; - /* Check if there is any buffer available. */ - if (atomic_read(&cli->write_idx) == meta_idx) + if (unlikely(atomic_read(&cli->write_idx) == meta_idx)) return -EAGAIN; /* Check if previously taken buffer was put back. */ - if (atomic_read(&cli->read_idx) != meta_idx) + if (unlikely(atomic_read(&cli->read_idx) != meta_idx)) return -EBUSY; + /* Clear user buffer to zero. */ + if (unlikely(meta_size < size && clear_user(buffer, size))) + return -EFAULT; + /* Copy next available buffer's metadata to user. */ - if (copy_to_user(buffer, meta, size)) + if (unlikely(copy_to_user(buffer, meta, min_size))) return -EFAULT; - atomic_inc(&cli->meta_idx); + /* Compare exchange meta idx to protect against concurrent getters */ + if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1)) + return -EBUSY; return 0; } @@ -715,24 +780,68 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( unsigned int read_idx = atomic_read(&cli->read_idx); unsigned int idx = read_idx % cli->dump_bufs.buf_cnt; - struct kbase_hwcnt_reader_metadata meta; - - if (sizeof(struct kbase_hwcnt_reader_metadata) != size) - return -EINVAL; + struct kbase_hwcnt_reader_metadata *meta; + const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata); + const size_t max_size = max(size, meta_size); + int ret = 0; + u8 stack_kbuf[64]; + u8 *kbuf = NULL; + size_t i; /* Check if any buffer was taken. */ - if (atomic_read(&cli->meta_idx) == read_idx) + if (unlikely(atomic_read(&cli->meta_idx) == read_idx)) return -EPERM; + if (likely(max_size <= sizeof(stack_kbuf))) { + /* Use stack buffer when the size is small enough. */ + if (unlikely(meta_size > size)) + memset(stack_kbuf, 0, sizeof(stack_kbuf)); + kbuf = stack_kbuf; + } else { + kbuf = kzalloc(max_size, GFP_KERNEL); + if (unlikely(!kbuf)) + return -ENOMEM; + } + + /* + * Copy user buffer to zero cleared kernel buffer which has enough + * space for both user buffer and kernel metadata. + */ + if (unlikely(copy_from_user(kbuf, buffer, size))) { + ret = -EFAULT; + goto out; + } + + /* + * Make sure any "extra" data passed from userspace is zero. + * It's meaningful only in case meta_size < size. + */ + for (i = meta_size; i < size; i++) { + /* Check if user data beyond meta size is zero. */ + if (unlikely(kbuf[i] != 0)) { + ret = -EINVAL; + goto out; + } + } + /* Check if correct buffer is put back. */ - if (copy_from_user(&meta, buffer, size)) - return -EFAULT; - if (idx != meta.buffer_idx) - return -EINVAL; + meta = (struct kbase_hwcnt_reader_metadata *)kbuf; + if (unlikely(idx != meta->buffer_idx)) { + ret = -EINVAL; + goto out; + } - atomic_inc(&cli->read_idx); + /* Compare exchange read idx to protect against concurrent putters */ + if (read_idx != + atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) { + ret = -EPERM; + goto out; + } - return 0; +out: + if (unlikely(kbuf != stack_kbuf)) + kfree(kbuf); + return ret; } /** @@ -760,11 +869,8 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( * worker is already queued. */ if ((interval != 0) && (cli->vctx->suspend_count == 0)) -#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE - queue_work(system_wq, &cli->vctx->dump_work); -#else - queue_work(system_highpri_wq, &cli->vctx->dump_work); -#endif + kbase_hwcnt_virtualizer_queue_work(cli->vctx->hvirt, + &cli->vctx->dump_work); mutex_unlock(&cli->vctx->lock); @@ -813,26 +919,58 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( struct kbase_vinstr_client *cli, u32 __user *hwver) { - u32 ver = 0; + u32 ver = 5; const enum kbase_hwcnt_gpu_group_type type = kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0); - switch (type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V4: - ver = 4; - break; - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - ver = 5; - break; - default: - WARN_ON(true); - } - - if (ver != 0) { - return put_user(ver, hwver); - } else { + if (WARN_ON(type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) return -EINVAL; + + return put_user(ver, hwver); +} + +/** + * The hwcnt reader's ioctl command - get API version. + * @cli: The non-NULL pointer to the client + * @arg: Command's argument. + * @size: Size of arg. + * + * @return 0 on success, else error code. + */ +static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( + struct kbase_vinstr_client *cli, unsigned long arg, size_t size) +{ + long ret = -EINVAL; + + if (size == sizeof(u32)) { + ret = put_user(HWCNT_READER_API, (u32 __user *)arg); + } else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) { + u8 clk_cnt = cli->vctx->metadata->clk_cnt; + unsigned long bytes = 0; + struct kbase_hwcnt_reader_api_version api_version = { + .version = HWCNT_READER_API, + .features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE, + }; + + if (clk_cnt > 0) + api_version.features |= + KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP; + if (clk_cnt > 1) + api_version.features |= + KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES; + + bytes = copy_to_user( + (void __user *)arg, &api_version, sizeof(api_version)); + + /* copy_to_user returns zero in case of success. + * If it fails, it returns the number of bytes that could NOT be copied + */ + if (bytes == 0) + ret = 0; + else + ret = -EFAULT; } + return ret; } /** @@ -858,42 +996,48 @@ static long kbasep_vinstr_hwcnt_reader_ioctl( if (!cli) return -EINVAL; - switch (cmd) { - case KBASE_HWCNT_READER_GET_API_VERSION: - rcode = put_user(HWCNT_READER_API, (u32 __user *)arg); + switch (_IOC_NR(cmd)) { + case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION): + rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version( + cli, arg, _IOC_SIZE(cmd)); break; - case KBASE_HWCNT_READER_GET_HWVER: + case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER): rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver( cli, (u32 __user *)arg); break; - case KBASE_HWCNT_READER_GET_BUFFER_SIZE: - rcode = put_user( - (u32)cli->vctx->metadata->dump_buf_bytes, - (u32 __user *)arg); + case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE): + if (cli->vctx->metadata_user) + rcode = put_user( + (u32)cli->vctx->metadata_user->dump_buf_bytes, + (u32 __user *)arg); + else + rcode = put_user( + (u32)cli->vctx->metadata->dump_buf_bytes, + (u32 __user *)arg); break; - case KBASE_HWCNT_READER_DUMP: + case _IOC_NR(KBASE_HWCNT_READER_DUMP): rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli); break; - case KBASE_HWCNT_READER_CLEAR: + case _IOC_NR(KBASE_HWCNT_READER_CLEAR): rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli); break; - case KBASE_HWCNT_READER_GET_BUFFER: + case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER): rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer( cli, (void __user *)arg, _IOC_SIZE(cmd)); break; - case KBASE_HWCNT_READER_PUT_BUFFER: + case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER): rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer( cli, (void __user *)arg, _IOC_SIZE(cmd)); break; - case KBASE_HWCNT_READER_SET_INTERVAL: + case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL): rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval( cli, (u32)arg); break; - case KBASE_HWCNT_READER_ENABLE_EVENT: + case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT): rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event( cli, (enum base_hwcnt_reader_event)arg); break; - case KBASE_HWCNT_READER_DISABLE_EVENT: + case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT): rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event( cli, (enum base_hwcnt_reader_event)arg); break; @@ -955,7 +1099,16 @@ static int kbasep_vinstr_hwcnt_reader_mmap( return -EINVAL; vm_size = vma->vm_end - vma->vm_start; - size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes; + + /* The mapping is allowed to span the entirety of the page allocation, + * not just the chunk where the dump buffers are allocated. + * This accommodates the corner case where the combined size of the + * dump buffers is smaller than a single page. + * This does not pose a security risk as the pages are zeroed on + * allocation, and anything out of bounds of the dump buffers is never + * written to. + */ + size = (1ull << cli->dump_bufs.page_order) * PAGE_SIZE; if (vma->vm_pgoff > (size >> PAGE_SHIFT)) return -EINVAL; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index 81d315f..6747ec7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h index f618755..2a243dd 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #undef TRACE_SYSTEM @@ -31,7 +30,7 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #define MALI_JOB_SLOTS_EVENT_CHANGED -/** +/* * mali_job_slots_event - Reports change of job slot status. * @gpu_id: Kbase device id * @event_id: ORed together bitfields representing a type of event, @@ -181,6 +180,23 @@ TRACE_EVENT(mali_total_alloc_pages_change, __print_symbolic(KBASE_MMU_FAULT_STATUS_ACCESS(status), \ KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS) +#if MALI_USE_CSF +#define KBASE_MMU_FAULT_CODE_VALID(code) \ + ((code >= 0xC0 && code <= 0xEB) && \ + (!(code >= 0xC5 && code <= 0xC7)) && \ + (!(code >= 0xCC && code <= 0xD8)) && \ + (!(code >= 0xDC && code <= 0xDF)) && \ + (!(code >= 0xE1 && code <= 0xE3))) +#define KBASE_MMU_FAULT_CODE_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ + {0xC0, "TRANSLATION_FAULT_" }, \ + {0xC4, "TRANSLATION_FAULT_" }, \ + {0xC8, "PERMISSION_FAULT_" }, \ + {0xD0, "TRANSTAB_BUS_FAULT_" }, \ + {0xD8, "ACCESS_FLAG_" }, \ + {0xE0, "ADDRESS_SIZE_FAULT_IN" }, \ + {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ + {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }) +#else /* MALI_USE_CSF */ #define KBASE_MMU_FAULT_CODE_VALID(code) \ ((code >= 0xC0 && code <= 0xEF) && \ (!(code >= 0xC5 && code <= 0xC6)) && \ @@ -197,6 +213,7 @@ TRACE_EVENT(mali_total_alloc_pages_change, {0xE4, "ADDRESS_SIZE_FAULT_OUT" }, \ {0xE8, "MEMORY_ATTRIBUTES_FAULT_" }, \ {0xEC, "MEMORY_ATTRIBUTES_NONCACHEABLE_" }) +#endif /* MALI_USE_CSF */ #endif /* __TRACE_MALI_MMU_HELPERS */ /* trace_mali_mmu_page_fault_grow @@ -288,7 +305,8 @@ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free, TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx", __entry->start_addr, __entry->nr_pages, __entry->backed_pages)); -#if MALI_JIT_PRESSURE_LIMIT +#if !MALI_USE_CSF +#if MALI_JIT_PRESSURE_LIMIT_BASE /* trace_mali_jit_report * * Tracepoint about the GPU data structure read to form a just-in-time memory @@ -326,13 +344,11 @@ TRACE_EVENT(mali_jit_report, ), __entry->read_val, __entry->used_pages) ); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ +#endif /* !MALI_USE_CSF */ -#if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE) TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif - -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* trace_mali_jit_report_pressure * * Tracepoint about change in physical memory pressure, due to the information @@ -366,14 +382,13 @@ TRACE_EVENT(mali_jit_report_pressure, { KBASE_JIT_REPORT_ON_ALLOC_OR_FREE, "HAPPENED_ON_ALLOC_OR_FREE" })) ); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ #ifndef __TRACE_SYSGRAPH_ENUM #define __TRACE_SYSGRAPH_ENUM /* Enum of sysgraph message IDs */ enum sysgraph_msg { SGR_ARRIVE, - SGR_DEP_RES, SGR_SUBMIT, SGR_COMPLETE, SGR_POST, @@ -401,7 +416,7 @@ TRACE_EVENT(sysgraph, __entry->message = message; __entry->atom_id = atom_id; ), - TP_printk("msg=%u proc_id=%u, param1=%d\n", __entry->message, + TP_printk("msg=%u proc_id=%u, param1=%d", __entry->message, __entry->proc_id, __entry->atom_id) ); @@ -427,7 +442,7 @@ TRACE_EVENT(sysgraph_gpu, __entry->atom_id = atom_id; __entry->js = js; ), - TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d\n", + TP_printk("msg=%u proc_id=%u, param1=%d, param2=%d", __entry->message, __entry->proc_id, __entry->atom_id, __entry->js) ); @@ -516,7 +531,7 @@ TRACE_EVENT(mali_jit_trim, TP_printk("freed_pages=%zu", __entry->freed_pages) ); -#include "mali_kbase_debug_linux_ktrace.h" +#include "debug/mali_kbase_debug_linux_ktrace.h" #endif /* _TRACE_MALI_H */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h index 3a4db10..c0649f2 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_malisw.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -28,26 +27,11 @@ #define _MALISW_H_ #include -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) -#define U8_MAX ((u8)~0U) -#define S8_MAX ((s8)(U8_MAX>>1)) -#define S8_MIN ((s8)(-S8_MAX - 1)) -#define U16_MAX ((u16)~0U) -#define S16_MAX ((s16)(U16_MAX>>1)) -#define S16_MIN ((s16)(-S16_MAX - 1)) -#define U32_MAX ((u32)~0U) -#define S32_MAX ((s32)(U32_MAX>>1)) -#define S32_MIN ((s32)(-S32_MAX - 1)) -#define U64_MAX ((u64)~0ULL) -#define S64_MAX ((s64)(U64_MAX>>1)) -#define S64_MIN ((s64)(-S64_MAX - 1)) -#endif /* LINUX_VERSION_CODE */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) -#define SIZE_MAX (~(size_t)0) -#endif /* LINUX_VERSION_CODE */ /** * MIN - Return the lesser of two values. + * @x: value1 + * @y: value2 * * As a macro it may evaluate its arguments more than once. * Refer to MAX macro for more details @@ -55,7 +39,9 @@ #define MIN(x, y) ((x) < (y) ? (x) : (y)) /** - * MAX - Return the greater of two values. + * MAX - Return the greater of two values. + * @x: value1 + * @y: value2 * * As a macro it may evaluate its arguments more than once. * If called on the same two arguments as MIN it is guaranteed to return @@ -67,24 +53,27 @@ #define MAX(x, y) ((x) < (y) ? (y) : (x)) /** - * @hideinitializer - * Function-like macro for suppressing unused variable warnings. Where possible - * such variables should be removed; this macro is present for cases where we - * much support API backwards compatibility. + * Function-like macro for suppressing unused variable warnings. + * @x: unused variable + * + * Where possible such variables should be removed; this macro is present for + * cases where we much support API backwards compatibility. */ #define CSTD_UNUSED(x) ((void)(x)) /** - * @hideinitializer - * Function-like macro for use where "no behavior" is desired. This is useful - * when compile time macros turn a function-like macro in to a no-op, but - * where having no statement is otherwise invalid. + * Function-like macro for use where "no behavior" is desired. + * @...: no-op + * + * This is useful when compile time macros turn a function-like macro in to a + * no-op, but where having no statement is otherwise invalid. */ #define CSTD_NOP(...) ((void)#__VA_ARGS__) /** - * @hideinitializer * Function-like macro for stringizing a single level macro. + * @x: macro's value + * * @code * #define MY_MACRO 32 * CSTD_STR1( MY_MACRO ) @@ -94,10 +83,11 @@ #define CSTD_STR1(x) #x /** - * @hideinitializer - * Function-like macro for stringizing a macro's value. This should not be used - * if the macro is defined in a way which may have no value; use the - * alternative @c CSTD_STR2N macro should be used instead. + * Function-like macro for stringizing a macro's value. + * @x: macro's value + * + * This should not be used if the macro is defined in a way which may have no + * value; use the alternative @c CSTD_STR2N macro should be used instead. * @code * #define MY_MACRO 32 * CSTD_STR2( MY_MACRO ) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c new file mode 100644 index 0000000..1db3abe --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* Create the trace point if not configured in kernel */ +#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY +#define CREATE_TRACE_POINTS +#include "mali_power_gpu_frequency_trace.h" +#endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h new file mode 100644 index 0000000..ea18fcd --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_power_gpu_frequency_trace.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI +#define _TRACE_POWER_GPU_FREQUENCY_MALI +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_GPU_FREQUENCY_H + +#include + +DECLARE_EVENT_CLASS(gpu, + + TP_PROTO(unsigned int state, unsigned int gpu_id), + + TP_ARGS(state, gpu_id), + + TP_STRUCT__entry( + __field( u32, state ) + __field( u32, gpu_id ) + ), + + TP_fast_assign( + __entry->state = state; + __entry->gpu_id = gpu_id; + ), + + TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state, + (unsigned long)__entry->gpu_id) +); + +DEFINE_EVENT(gpu, gpu_frequency, + + TP_PROTO(unsigned int frequency, unsigned int gpu_id), + + TP_ARGS(frequency, gpu_id) +); + +#endif /* _TRACE_POWER_GPU_FREQUENCY_H */ + +/* This part must be outside protection */ +#include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild new file mode 100644 index 0000000..7f2bb26 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/Kbuild @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + mmu/mali_kbase_mmu.o \ + mmu/mali_kbase_mmu_hw_direct.o \ + mmu/mali_kbase_mmu_mode_aarch64.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += mmu/backend/mali_kbase_mmu_csf.o +else + mali_kbase-y += mmu/backend/mali_kbase_mmu_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c new file mode 100644 index 0000000..05253ae --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_csf.c @@ -0,0 +1,565 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: Base kernel MMU management specific for CSF GPU. + */ + +#include +#include +#include +#include +#include +#include + +void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, + struct kbase_mmu_setup * const setup) +{ + /* Set up the required caching policies at the correct indices + * in the memattr register. + */ + setup->memattr = + (AS_MEMATTR_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + (AS_MEMATTR_AARCH64_OUTER_IMPL_DEF << + (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | + (AS_MEMATTR_AARCH64_OUTER_WA << + (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + (AS_MEMATTR_AARCH64_NON_CACHEABLE << + (AS_MEMATTR_INDEX_NON_CACHEABLE * 8)) | + (AS_MEMATTR_AARCH64_SHARED << + (AS_MEMATTR_INDEX_SHARED * 8)); + + setup->transtab = (u64)mmut->pgd & AS_TRANSTAB_BASE_MASK; + setup->transcfg = AS_TRANSCFG_ADRMODE_AARCH64_4K; +} + +/** + * submit_work_pagefault() - Submit a work for MMU page fault. + * + * @kbdev: Kbase device pointer + * @as_nr: Faulty address space + * @fault: Data relating to the fault + * + * This function submits a work for reporting the details of MMU fault. + */ +static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr, + struct kbase_fault *fault) +{ + unsigned long flags; + struct kbase_as *const as = &kbdev->as[as_nr]; + struct kbase_context *kctx; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); + + if (kctx) { + kbase_ctx_sched_retain_ctx_refcount(kctx); + + as->pf_data = (struct kbase_fault) { + .status = fault->status, + .addr = fault->addr, + }; + + /* + * A page fault work item could already be pending for the + * context's address space, when the page fault occurs for + * MCU's address space. + */ + if (!queue_work(as->pf_wq, &as->work_pagefault)) + kbase_ctx_sched_release_ctx(kctx); + else { + dev_dbg(kbdev->dev, + "Page fault is already pending for as %u\n", + as_nr); + atomic_inc(&kbdev->faults_pending); + } + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, + struct kbase_fault *fault) +{ + /* decode the fault status */ + u32 exception_type = fault->status & 0xFF; + u32 access_type = (fault->status >> 8) & 0x3; + u32 source_id = (fault->status >> 16); + int as_no; + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "Unexpected Page fault in firmware address space at VA 0x%016llX\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + fault->addr, + fault->status, + exception_type, kbase_gpu_exception_name(exception_type), + access_type, kbase_gpu_access_type_name(fault->status), + source_id); + + /* Report MMU fault for all address spaces (except MCU_AS_NR) */ + for (as_no = 1; as_no < kbdev->nr_hw_address_spaces; as_no++) + submit_work_pagefault(kbdev, as_no, fault); + + /* GPU reset is required to recover */ + if (kbase_prepare_to_reset_gpu(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_mmu_report_mcu_as_fault_and_reset); + +void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, struct kbase_fault *fault) +{ + struct kbase_device *kbdev = kctx->kbdev; + u32 const status = fault->status; + int exception_type = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> + GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT; + int access_type = (status & GPU_FAULTSTATUS_ACCESS_TYPE_MASK) >> + GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; + int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >> + GPU_FAULTSTATUS_SOURCE_ID_SHIFT; + const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ? + "true" : "false"; + int as_no = as->number; + unsigned long flags; + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "GPU bus fault in AS%d at VA 0x%016llX\n" + "VA_VALID: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + addr_valid, + status, + exception_type, kbase_gpu_exception_name(exception_type), + access_type, kbase_gpu_access_type_name(access_type), + source_id, + kctx->pid); + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + mutex_unlock(&kbdev->mmu_hw_mutex); + + /* Switching to UNMAPPED mode above would have enabled the firmware to + * recover from the fault (if the memory access was made by firmware) + * and it can then respond to CSG termination requests to be sent now. + * All GPU command queue groups associated with the context would be + * affected as they use the same GPU address space. + */ + kbase_csf_ctx_handle_fault(kctx, fault); + + /* Now clear the GPU fault */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAR_FAULT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/* + * The caller must ensure it's retained the ctx to prevent it from being + * scheduled out whilst it's being worked on. + */ +void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str, + struct kbase_fault *fault) +{ + unsigned long flags; + unsigned int exception_type; + unsigned int access_type; + unsigned int source_id; + int as_no; + struct kbase_device *kbdev; + const u32 status = fault->status; + + as_no = as->number; + kbdev = kctx->kbdev; + + /* Make sure the context was active */ + if (WARN_ON(atomic_read(&kctx->refcount) <= 0)) + return; + + /* decode the fault status */ + exception_type = AS_FAULTSTATUS_EXCEPTION_TYPE_GET(status); + access_type = AS_FAULTSTATUS_ACCESS_TYPE_GET(status); + source_id = AS_FAULTSTATUS_SOURCE_ID_GET(status); + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n" + "pid: %d\n", + as_no, fault->addr, + reason_str, + status, + exception_type, kbase_gpu_exception_name(exception_type), + access_type, kbase_gpu_access_type_name(status), + source_id, + kctx->pid); + + /* AS transaction begin */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* switch to UNMAPPED mode, + * will abort all jobs and stop any hw counter dumping + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_disable(kctx); + kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + mutex_unlock(&kbdev->mmu_hw_mutex); + /* AS transaction end */ + + /* Switching to UNMAPPED mode above would have enabled the firmware to + * recover from the fault (if the memory access was made by firmware) + * and it can then respond to CSG termination requests to be sent now. + * All GPU command queue groups associated with the context would be + * affected as they use the same GPU address space. + */ + kbase_csf_ctx_handle_fault(kctx, fault); + + /* Clear down the fault */ + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); +} + +/** + * kbase_mmu_interrupt_process() - Process a bus or page fault. + * @kbdev: The kbase_device the fault happened on + * @kctx: The kbase_context for the faulting address space if one was + * found. + * @as: The address space that has the fault + * @fault: Data relating to the fault + * + * This function will process a fault on a specific address space + */ +static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as, + struct kbase_fault *fault) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + if (!kctx) { + dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n", + kbase_as_has_bus_fault(as, fault) ? + "Bus error" : "Page fault", + as->number, fault->addr); + + /* Since no ctx was found, the MMU must be disabled. */ + WARN_ON(as->current_setup.transtab); + + if (kbase_as_has_bus_fault(as, fault)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAR_FAULT); + else if (kbase_as_has_page_fault(as, fault)) { + kbase_mmu_hw_clear_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + kbase_mmu_hw_enable_fault(kbdev, as, + KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); + } + + return; + } + + if (kbase_as_has_bus_fault(as, fault)) { + /* + * We need to switch to UNMAPPED mode - but we do this in a + * worker so that we can sleep + */ + WARN_ON(!queue_work(as->pf_wq, &as->work_busfault)); + atomic_inc(&kbdev->faults_pending); + } else { + WARN_ON(!queue_work(as->pf_wq, &as->work_pagefault)); + atomic_inc(&kbdev->faults_pending); + } +} + +int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, + u32 status, u32 as_nr) +{ + struct kbase_context *kctx; + unsigned long flags; + struct kbase_as *as; + struct kbase_fault *fault; + + if (WARN_ON(as_nr == MCU_AS_NR)) + return -EINVAL; + + if (WARN_ON(as_nr >= BASE_MAX_NR_AS)) + return -EINVAL; + + as = &kbdev->as[as_nr]; + fault = &as->bf_data; + fault->status = status; + fault->addr = (u64) kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32; + fault->addr |= kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO)); + fault->protected_mode = false; + + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_nr); + + kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_nr); + + /* Process the bus fault interrupt for this address space */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as, fault); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return 0; +} + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) +{ + const int num_as = 16; + const int pf_shift = 0; + const unsigned long as_bit_mask = (1UL << num_as) - 1; + unsigned long flags; + u32 new_mask; + u32 tmp; + u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask); + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (pf_bits) { + struct kbase_context *kctx; + int as_no = ffs(pf_bits) - 1; + struct kbase_as *as = &kbdev->as[as_no]; + struct kbase_fault *fault = &as->pf_data; + + /* find faulting address */ + fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI)); + fault->addr <<= 32; + fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO)); + + /* Mark the fault protected or not */ + fault->protected_mode = false; + + /* report the fault to debugfs */ + kbase_as_fault_debugfs_new(kbdev, as_no); + + /* record the fault status */ + fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, + AS_FAULTSTATUS)); + + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + + /* Mark page fault as handled */ + pf_bits &= ~(1UL << as_no); + + /* remove the queued PF from the mask */ + new_mask &= ~MMU_PAGE_FAULT(as_no); + + if (as_no == MCU_AS_NR) { + kbase_mmu_report_mcu_as_fault_and_reset(kbdev, fault); + /* Pointless to handle remaining faults */ + break; + } + + /* + * Refcount the kctx - it shouldn't disappear anyway, since + * Page faults _should_ only occur whilst GPU commands are + * executing, and a command causing the Page fault shouldn't + * complete until the MMU is updated. + * Reference is released at the end of bottom half of page + * fault handling. + */ + kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); + + /* Process the interrupt for this address space */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as, fault); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, + struct kbase_va_region *const reg) +{ + /* Can't soft-stop the provoking job */ + return -EPERM; +} + +/** + * kbase_mmu_gpu_fault_worker() - Process a GPU fault for the device. + * + * @data: work_struct passed by queue_work() + * + * Report a GPU fatal error for all GPU command queue groups that are + * using the address space and terminate them. + */ +static void kbase_mmu_gpu_fault_worker(struct work_struct *data) +{ + struct kbase_as *const faulting_as = container_of(data, struct kbase_as, + work_gpufault); + const u32 as_nr = faulting_as->number; + struct kbase_device *const kbdev = container_of(faulting_as, struct + kbase_device, as[as_nr]); + struct kbase_fault *fault; + struct kbase_context *kctx; + u32 status; + u64 address; + u32 as_valid; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + fault = &faulting_as->gf_data; + status = fault->status; + as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; + address = fault->addr; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + dev_warn(kbdev->dev, + "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n" + "ASID_VALID: %s, ADDRESS_VALID: %s\n", + status, + kbase_gpu_exception_name( + GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), + as_nr, address, + as_valid ? "true" : "false", + status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false"); + + kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr); + kbase_csf_ctx_handle_fault(kctx, fault); + kbase_ctx_sched_release_ctx_lock(kctx); + + atomic_dec(&kbdev->faults_pending); + + /* A work for GPU fault is complete. + * Till reaching here, no further GPU fault will be reported. + * Now clear the GPU fault to allow next GPU fault interrupt report. + */ + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAR_FAULT); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +/** + * submit_work_gpufault() - Submit a work for GPU fault. + * + * @kbdev: Kbase device pointer + * @status: GPU fault status + * @as_nr: Faulty address space + * @address: GPU fault address + * + * This function submits a work for reporting the details of GPU fault. + */ +static void submit_work_gpufault(struct kbase_device *kbdev, u32 status, + u32 as_nr, u64 address) +{ + unsigned long flags; + struct kbase_as *const as = &kbdev->as[as_nr]; + struct kbase_context *kctx; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kctx = kbase_ctx_sched_as_to_ctx_nolock(kbdev, as_nr); + + if (kctx) { + kbase_ctx_sched_retain_ctx_refcount(kctx); + + as->gf_data = (struct kbase_fault) { + .status = status, + .addr = address, + }; + + if (WARN_ON(!queue_work(as->pf_wq, &as->work_gpufault))) + kbase_ctx_sched_release_ctx(kctx); + else + atomic_inc(&kbdev->faults_pending); + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); +} + +void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, + u32 as_nr, u64 address, bool as_valid) +{ + if (!as_valid || (as_nr == MCU_AS_NR)) { + int as; + + /* Report GPU fault for all contexts (except MCU_AS_NR) in case either + * the address space is invalid or it's MCU address space. + */ + for (as = 1; as < kbdev->nr_hw_address_spaces; as++) + submit_work_gpufault(kbdev, status, as, address); + } else + submit_work_gpufault(kbdev, status, as_nr, address); +} +KBASE_EXPORT_TEST_API(kbase_mmu_gpu_fault_interrupt); + +int kbase_mmu_as_init(struct kbase_device *kbdev, int i) +{ + kbdev->as[i].number = i; + kbdev->as[i].bf_data.addr = 0ULL; + kbdev->as[i].pf_data.addr = 0ULL; + kbdev->as[i].gf_data.addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + if (!kbdev->as[i].pf_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); + INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); + INIT_WORK(&kbdev->as[i].work_gpufault, kbase_mmu_gpu_fault_worker); + + return 0; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c index 2d8fb51..01ca419 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/backend/mali_kbase_mmu_jm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,21 +17,18 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * Base kernel MMU management specific for Job Manager GPU. + * DOC: Base kernel MMU management specific for Job Manager GPU. */ #include #include #include -#include +#include #include -#include "../mali_kbase_mmu_internal.h" -#include "mali_kbase_device_internal.h" +#include void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup) @@ -98,7 +96,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED); } -/** +/* * The caller must ensure it's retained the ctx to prevent it from being * scheduled out whilst it's being worked on. */ @@ -145,6 +143,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, kctx->pid); /* hardware counters dump fault handling */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING)) { @@ -153,6 +152,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, kbdev->hwcnt.addr_bytes))) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; } + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Stop the kctx from submitting more jobs and cause it to be scheduled * out/rescheduled - this will occur on releasing the context's refcount @@ -187,14 +187,26 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED); } -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, +/** + * kbase_mmu_interrupt_process() - Process a bus or page fault. + * @kbdev: The kbase_device the fault happened on + * @kctx: The kbase_context for the faulting address space if one was + * found. + * @as: The address space that has the fault + * @fault: Data relating to the fault + * + * This function will process a fault on a specific address space + */ +static void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as, struct kbase_fault *fault) { + unsigned long flags; + lockdep_assert_held(&kbdev->hwaccess_lock); dev_dbg(kbdev->dev, - "Entering %s kctx %p, as %p\n", + "Entering %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); if (!kctx) { @@ -228,11 +240,13 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, * hw counters dumping in progress, signal the * other thread that it failed */ + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); if ((kbdev->hwcnt.kctx == kctx) && (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING)) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* * Stop the kctx from submitting more jobs and cause it @@ -241,14 +255,10 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, */ kbasep_js_clear_submit_allowed(js_devdata, kctx); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, fault->addr, - fault->extra_addr); - else - dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", - as->number, fault->addr); + dev_warn(kbdev->dev, + "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", + as->number, fault->addr, + fault->extra_addr); /* * We need to switch to UNMAPPED mode - but we do this in a @@ -262,7 +272,7 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, } dev_dbg(kbdev->dev, - "Leaving %s kctx %p, as %p\n", + "Leaving %s kctx %pK, as %pK\n", __func__, (void *)kctx, (void *)as); } @@ -296,7 +306,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) unsigned long flags; u32 new_mask; u32 tmp, bf_bits, pf_bits; - bool gpu_lost = false; dev_dbg(kbdev->dev, "Entering %s irq_stat %u\n", __func__, irq_stat); @@ -362,22 +371,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* record the fault status */ fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, AS_FAULTSTATUS)); - - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); - fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); - } - - /* check if we still have GPU */ - gpu_lost = kbase_is_gpu_lost(kbdev); - if (gpu_lost) { - if (kctx) - kbasep_js_runpool_release_ctx(kbdev, kctx); - return; - } + fault->extra_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr <<= 32; + fault->extra_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); if (kbase_as_has_bus_fault(as, fault)) { /* Mark bus fault as handled. @@ -418,7 +416,23 @@ int kbase_mmu_switch_to_ir(struct kbase_context *const kctx, struct kbase_va_region *const reg) { dev_dbg(kctx->kbdev->dev, - "Switching to incremental rendering for region %p\n", + "Switching to incremental rendering for region %pK\n", (void *)reg); return kbase_job_slot_softstop_start_rp(kctx, reg); } + +int kbase_mmu_as_init(struct kbase_device *kbdev, int i) +{ + kbdev->as[i].number = i; + kbdev->as[i].bf_data.addr = 0ULL; + kbdev->as[i].pf_data.addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i); + if (!kbdev->as[i].pf_wq) + return -ENOMEM; + + INIT_WORK(&kbdev->as[i].work_pagefault, kbase_mmu_page_fault_worker); + INIT_WORK(&kbdev->as[i].work_busfault, kbase_mmu_bus_fault_worker); + + return 0; +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c index c2c7c4b..41ee07f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,10 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** - * @file mali_kbase_mmu.c - * Base kernel MMU management. + * DOC: Base kernel MMU management. */ #include @@ -31,20 +29,20 @@ #include #include #include -#include +#include #include #include #include #include #include -#include -#include #include #include #include #include #include +#include +#include #define KBASE_MMU_PAGE_ENTRIES 512 /** @@ -82,21 +80,20 @@ static void kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, u64 vpfn, size_t nr, bool sync, int as_nr); /** - * kbase_mmu_sync_pgd - sync page directory to memory + * kbase_mmu_sync_pgd() - sync page directory to memory when needed. * @kbdev: Device pointer. * @handle: Address of DMA region. * @size: Size of the region to sync. * * This should be called after each page directory update. */ - static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { - /* If page table is not coherent then ensure the gpu can read + /* In non-coherent system, ensure the GPU can read * the pages from memory */ - if (kbdev->system_coherency != COHERENCY_ACE) + if (kbdev->system_coherency == COHERENCY_NONE) dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE); } @@ -117,7 +114,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, /** * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to * a region on a GPU page fault - * + * @kbdev: KBase device * @reg: The region that will be backed with more pages * @fault_rel_pfn: PFN of the fault relative to the start of the region * @@ -133,23 +130,32 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, struct kbase_va_region *reg, size_t fault_rel_pfn) { - size_t multiple = reg->extent; + size_t multiple = reg->extension; size_t reg_current_size = kbase_reg_current_backed_size(reg); size_t minimum_extra = fault_rel_pfn - reg_current_size + 1; size_t remainder; if (!multiple) { - dev_warn(kbdev->dev, - "VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", + dev_warn( + kbdev->dev, + "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n", ((unsigned long long)reg->start_pfn) << PAGE_SHIFT); return minimum_extra; } /* Calculate the remainder to subtract from minimum_extra to make it - * the desired (rounded down) multiple of the extent. + * the desired (rounded down) multiple of the extension. * Depending on reg's flags, the base used for calculating multiples is * different */ + + /* multiple is based from the current backed size, even if the + * current backed size/pfn for end of committed memory are not + * themselves aligned to multiple + */ + remainder = minimum_extra % multiple; + +#if !MALI_USE_CSF if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { /* multiple is based from the top of the initial commit, which * has been allocated in such a way that (start_pfn + @@ -175,13 +181,8 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev, remainder = pages_after_initial % multiple; } - } else { - /* multiple is based from the current backed size, even if the - * current backed size/pfn for end of committed memory are not - * themselves aligned to multiple - */ - remainder = minimum_extra % multiple; } +#endif /* !MALI_USE_CSF */ if (remainder == 0) return minimum_extra; @@ -522,10 +523,15 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { +#if MALI_USE_CSF + CSTD_UNUSED(kbdev); + kbase_ctx_sched_release_ctx_lock(kctx); +#else /* MALI_USE_CSF */ kbasep_js_runpool_release_ctx(kbdev, kctx); +#endif /* MALI_USE_CSF */ } -void page_fault_worker(struct work_struct *data) +void kbase_mmu_page_fault_worker(struct work_struct *data) { u64 fault_pfn; u32 fault_status; @@ -544,7 +550,9 @@ void page_fault_worker(struct work_struct *data) struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; int i; size_t current_backed_size; - +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t pages_trimmed = 0; +#endif faulting_as = container_of(data, struct kbase_as, work_pagefault); fault = &faulting_as->pf_data; @@ -553,7 +561,7 @@ void page_fault_worker(struct work_struct *data) kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); dev_dbg(kbdev->dev, - "Entering %s %p, fault_pfn %lld, as_no %d\n", + "Entering %s %pK, fault_pfn %lld, as_no %d\n", __func__, (void *)data, fault_pfn, as_no); /* Grab the context that was already refcounted in kbase_mmu_interrupt() @@ -568,6 +576,21 @@ void page_fault_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); +#if MALI_JIT_PRESSURE_LIMIT_BASE +#if !MALI_USE_CSF + mutex_lock(&kctx->jctx.lock); +#endif +#endif + +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* check if we still have GPU */ + if (unlikely(kbase_is_gpu_removed(kbdev))) { + dev_dbg(kbdev->dev, + "%s: GPU has been removed\n", __func__); + goto fault_done; + } +#endif + if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Protected mode fault", fault); @@ -611,21 +634,13 @@ void page_fault_worker(struct work_struct *data) goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Address size fault", fault); goto fault_done; case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault", fault); - else - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Unknown fault code", fault); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory attributes fault", fault); goto fault_done; default: @@ -691,6 +706,10 @@ page_fault_retry: goto fault_done; } + if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) == + AS_FAULTSTATUS_ACCESS_TYPE_READ) + dev_warn(kbdev->dev, "Grow on pagefault while reading"); + /* find the size we need to grow it by * we know the result fit in a size_t due to * kbase_region_tracker_find_region_enclosing_address @@ -758,6 +777,13 @@ page_fault_retry: pages_to_grow = 0; +#if MALI_JIT_PRESSURE_LIMIT_BASE + if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) { + kbase_jit_request_phys_increase(kctx, new_pages); + pages_trimmed = new_pages; + } +#endif + spin_lock(&kctx->mem_partials_lock); grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow, &grow_2mb_pool, prealloc_sas); @@ -818,7 +844,7 @@ page_fault_retry: if (kbase_mmu_switch_to_ir(kctx, region) >= 0) { dev_dbg(kctx->kbdev->dev, - "Get region %p for IR\n", + "Get region %pK for IR\n", (void *)region); kbase_va_region_alloc_get(kctx, region); } @@ -872,6 +898,13 @@ page_fault_retry: } } #endif + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (pages_trimmed) { + kbase_jit_done_phys_increase(kctx, pages_trimmed); + pages_trimmed = 0; + } +#endif kbase_gpu_vm_unlock(kctx); } else { int ret = -ENOMEM; @@ -918,6 +951,17 @@ page_fault_retry: } fault_done: +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (pages_trimmed) { + kbase_gpu_vm_lock(kctx); + kbase_jit_done_phys_increase(kctx, pages_trimmed); + kbase_gpu_vm_unlock(kctx); + } +#if !MALI_USE_CSF + mutex_unlock(&kctx->jctx.lock); +#endif +#endif + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) kfree(prealloc_sas[i]); @@ -928,7 +972,7 @@ fault_done: release_ctx(kbdev, kctx); atomic_dec(&kbdev->faults_pending); - dev_dbg(kbdev->dev, "Leaving page_fault_worker %p\n", (void *)data); + dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data); } static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, @@ -938,7 +982,11 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, int i; struct page *p; +#ifdef CONFIG_MALI_2MB_ALLOC + p = kbase_mem_pool_alloc(&kbdev->mem_pools.large[mmut->group_id]); +#else /* CONFIG_MALI_2MB_ALLOC */ p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]); +#endif /* CONFIG_MALI_2MB_ALLOC */ if (!p) return 0; @@ -964,6 +1012,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, atomic_add(1, &kbdev->memdev.used_pages); + kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1); + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) kbdev->mmu_mode->entry_invalidate(&page[i]); @@ -973,8 +1023,12 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, return page_to_phys(p); alloc_free: - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, - false); + +#ifdef CONFIG_MALI_2MB_ALLOC + kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], p, false); +#else /* CONFIG_MALI_2MB_ALLOC */ + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false); +#endif /* CONFIG_MALI_2MB_ALLOC */ return 0; } @@ -1200,7 +1254,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, */ mutex_unlock(&kctx->mmu.mmu_lock); err = kbase_mem_pool_grow( +#ifdef CONFIG_MALI_2MB_ALLOC + &kbdev->mem_pools.large[ +#else &kbdev->mem_pools.small[ +#endif kctx->mmu.group_id], MIDGARD_MMU_BOTTOMLEVEL); mutex_lock(&kctx->mmu.mmu_lock); @@ -1279,7 +1337,11 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev, tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte); tmp_p = phys_to_page(tmp_pgd); +#ifdef CONFIG_MALI_2MB_ALLOC + kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], +#else kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], +#endif tmp_p, false); /* If the MMU tables belong to a context then we accounted the memory @@ -1290,6 +1352,8 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev, atomic_sub(1, &mmut->kctx->used_pages); } atomic_sub(1, &kbdev->memdev.used_pages); + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); } u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, @@ -1361,7 +1425,11 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, */ mutex_unlock(&mmut->mmu_lock); err = kbase_mem_pool_grow( +#ifdef CONFIG_MALI_2MB_ALLOC + &kbdev->mem_pools.large[mmut->group_id], +#else &kbdev->mem_pools.small[mmut->group_id], +#endif cur_level); mutex_lock(&mmut->mmu_lock); } while (!err); @@ -1509,7 +1577,7 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, */ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - if (kbase_prepare_to_reset_gpu_locked(kbdev)) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); } } @@ -1522,10 +1590,29 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, { int err; u32 op; + bool gpu_powered; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + gpu_powered = kbdev->pm.backend.gpu_powered; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + /* GPU is off so there's no need to perform flush/invalidate. + * But even if GPU is not actually powered down, after gpu_powered flag + * was set to false, it is still safe to skip the flush/invalidate. + * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE + * which is sent when address spaces are restored after gpu_powered flag + * is set to true. Flushing of L2 cache is certainly not required as L2 + * cache is definitely off if gpu_powered is false. + */ + if (!gpu_powered) + return; if (kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - /* GPU is off so there's no need to perform flush/invalidate */ + /* GPU has just been powered off due to system suspend. + * So again, no need to perform flush/invalidate. + */ return; } @@ -1544,9 +1631,10 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, /* Flush failed to complete, assume the GPU has hung and * perform a reset to recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - if (kbase_prepare_to_reset_gpu(kbdev)) + if (kbase_prepare_to_reset_gpu( + kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu(kbdev); } @@ -1577,9 +1665,13 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, return; kbdev = kctx->kbdev; +#if !MALI_USE_CSF mutex_lock(&kbdev->js_data.queue_mutex); ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx); mutex_unlock(&kbdev->js_data.queue_mutex); +#else + ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx); +#endif /* !MALI_USE_CSF */ if (ctx_is_in_runpool) { KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); @@ -1621,6 +1713,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); /* * The address space is being disabled, drain all knowledge of it out @@ -1832,7 +1925,11 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn, */ mutex_unlock(&kctx->mmu.mmu_lock); err = kbase_mem_pool_grow( +#ifdef CONFIG_MALI_2MB_ALLOC + &kbdev->mem_pools.large[ +#else &kbdev->mem_pools.small[ +#endif kctx->mmu.group_id], MIDGARD_MMU_BOTTOMLEVEL); mutex_lock(&kctx->mmu.mmu_lock); @@ -1927,8 +2024,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, } p = pfn_to_page(PFN_DOWN(pgd)); - +#ifdef CONFIG_MALI_2MB_ALLOC + kbase_mem_pool_free(&kbdev->mem_pools.large[mmut->group_id], +#else kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], +#endif p, true); atomic_sub(1, &kbdev->memdev.used_pages); @@ -1940,6 +2040,8 @@ static void mmu_teardown_level(struct kbase_device *kbdev, kbase_process_page_usage_dec(mmut->kctx, 1); atomic_sub(1, &mmut->kctx->used_pages); } + + kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1); } int kbase_mmu_init(struct kbase_device *const kbdev, @@ -1969,7 +2071,11 @@ int kbase_mmu_init(struct kbase_device *const kbdev, int err; err = kbase_mem_pool_grow( +#ifdef CONFIG_MALI_2MB_ALLOC + &kbdev->mem_pools.large[mmut->group_id], +#else &kbdev->mem_pools.small[mmut->group_id], +#endif MIDGARD_MMU_BOTTOMLEVEL); if (err) { kbase_mmu_term(kbdev, mmut); @@ -2000,6 +2106,11 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) mutex_destroy(&mmut->mmu_lock); } +void kbase_mmu_as_term(struct kbase_device *kbdev, int i) +{ + destroy_workqueue(kbdev->as[i].pf_wq); +} + static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) { @@ -2140,7 +2251,7 @@ fail_free: } KBASE_EXPORT_TEST_API(kbase_mmu_dump); -void bus_fault_worker(struct work_struct *data) +void kbase_mmu_bus_fault_worker(struct work_struct *data) { struct kbase_as *faulting_as; int as_no; @@ -2168,6 +2279,17 @@ void bus_fault_worker(struct work_struct *data) return; } +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* check if we still have GPU */ + if (unlikely(kbase_is_gpu_removed(kbdev))) { + dev_dbg(kbdev->dev, + "%s: GPU has been removed\n", __func__); + release_ctx(kbdev, kctx); + atomic_dec(&kbdev->faults_pending); + return; + } +#endif + if (unlikely(fault->protected_mode)) { kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Permission failure", fault); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h index c9e27b1..a2d1a8e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,35 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_MMU_H_ #define _KBASE_MMU_H_ +/** + * kbase_mmu_as_init() - Initialising GPU address space object. + * + * This is called from device probe to initialise an address space object + * of the device. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @i: Array index of address space object. + * + * Return: 0 on success and non-zero value on failure. + */ +int kbase_mmu_as_init(struct kbase_device *kbdev, int i); + +/** + * kbase_mmu_as_term() - Terminate address space object. + * + * This is called upon device termination to destroy + * the address space object of the device. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer). + * @i: Array index of address space object. + */ +void kbase_mmu_as_term(struct kbase_device *kbdev, int i); + /** * kbase_mmu_init - Initialise an object representing GPU page tables * @@ -115,4 +138,18 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status, u32 as_nr); +/** + * kbase_mmu_gpu_fault_interrupt() - Report a GPU fault. + * @kbdev: Kbase device pointer + * @status: GPU fault status + * @as_nr: Faulty address space + * @address: GPU fault address + * @as_valid: true if address space is valid + * + * This function builds GPU fault information to submit a work + * for reporting the details of the fault. + */ +void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status, + u32 as_nr, u64 address, bool as_valid); + #endif /* _KBASE_MMU_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h index e6eef86..d1f1ff2 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -39,6 +38,11 @@ struct kbase_context; /** * enum kbase_mmu_fault_type - MMU fault type descriptor. + * @KBASE_MMU_FAULT_TYPE_UNKNOWN: unknown fault + * @KBASE_MMU_FAULT_TYPE_PAGE: page fault + * @KBASE_MMU_FAULT_TYPE_BUS: nus fault + * @KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED: page_unexpected fault + * @KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED: bus_unexpected fault */ enum kbase_mmu_fault_type { KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c index f22e73e..a99b988 100755 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_hw_direct.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -25,8 +24,7 @@ #include #include #include -#include -#include +#include /** * lock_region() - Generate lockaddr to lock memory region in MMU @@ -126,38 +124,33 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) struct kbase_mmu_setup *current_setup = &as->current_setup; u64 transcfg = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) { - transcfg = current_setup->transcfg; + transcfg = current_setup->transcfg; - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK - * Clear PTW_MEMATTR bits - */ - transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; - /* Enable correct PTW_MEMATTR bits */ - transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; - /* Ensure page-tables reads use read-allocate cache-policy in - * the L2 - */ - transcfg |= AS_TRANSCFG_R_ALLOCATE; - - if (kbdev->system_coherency == COHERENCY_ACE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) - * Clear PTW_SH bits - */ - transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); - /* Enable correct PTW_SH bits */ - transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); - } + /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK + * Clear PTW_MEMATTR bits + */ + transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; + /* Enable correct PTW_MEMATTR bits */ + transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; + /* Ensure page-tables reads use read-allocate cache-policy in + * the L2 + */ + transcfg |= AS_TRANSCFG_R_ALLOCATE; - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (transcfg >> 32) & 0xFFFFFFFFUL); - } else { - if (kbdev->system_coherency == COHERENCY_ACE) - current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; + if (kbdev->system_coherency != COHERENCY_NONE) { + /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) + * Clear PTW_SH bits + */ + transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); + /* Enable correct PTW_SH bits */ + transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); } + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), + transcfg); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + (transcfg >> 32) & 0xFFFFFFFFUL); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL); kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), @@ -230,10 +223,11 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, /* Clear the page (and bus fault IRQ as well in case one occurred) */ pf_bf_mask = MMU_PAGE_FAULT(as->number); +#if !MALI_USE_CSF if (type == KBASE_MMU_FAULT_TYPE_BUS || type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) pf_bf_mask |= MMU_BUS_ERROR(as->number); - +#endif kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); unlock: @@ -261,10 +255,11 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | MMU_PAGE_FAULT(as->number); +#if !MALI_USE_CSF if (type == KBASE_MMU_FAULT_TYPE_BUS || type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) irq_mask |= MMU_BUS_ERROR(as->number); - +#endif kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); unlock: diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h index 28bd341..b8cd55f 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_internal.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KBASE_MMU_INTERNAL_H_ @@ -26,6 +25,15 @@ void kbase_mmu_get_as_setup(struct kbase_mmu_table *mmut, struct kbase_mmu_setup * const setup); +/** + * kbase_mmu_report_mcu_as_fault_and_reset - Report page fault for all + * address spaces and reset the GPU. + * @kbdev: The kbase_device the fault happened on + * @fault: Data relating to the fault + */ +void kbase_mmu_report_mcu_as_fault_and_reset(struct kbase_device *kbdev, + struct kbase_fault *fault); + void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, struct kbase_fault *fault); @@ -33,24 +41,11 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, struct kbase_as *as, const char *reason_str, struct kbase_fault *fault); -/** - * kbase_mmu_interrupt_process - Process a bus or page fault. - * @kbdev The kbase_device the fault happened on - * @kctx The kbase_context for the faulting address space if one was found. - * @as The address space that has the fault - * @fault Data relating to the fault - * - * This function will process a fault on a specific address space - */ -void kbase_mmu_interrupt_process(struct kbase_device *kbdev, - struct kbase_context *kctx, struct kbase_as *as, - struct kbase_fault *fault); - /** * kbase_mmu_switch_to_ir() - Switch to incremental rendering if possible - * @kctx The kbase_context for the faulting address space. - * @reg Reference of a growable GPU memory region in the same context. - * Takes ownership of the reference if successful. + * @kctx: kbase_context for the faulting address space. + * @reg: of a growable GPU memory region in the same context. + * Takes ownership of the reference if successful. * * Used to switch to incremental rendering if we have nearly run out of * virtual address space in a growable memory region. @@ -60,4 +55,18 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, int kbase_mmu_switch_to_ir(struct kbase_context *kctx, struct kbase_va_region *reg); +/** + * kbase_mmu_page_fault_worker() - Process a page fault. + * + * @data: work_struct passed by queue_work() + */ +void kbase_mmu_page_fault_worker(struct work_struct *data); + +/** + * kbase_mmu_bus_fault_worker() - Process a bus fault. + * + * @data: work_struct passed by queue_work() + */ +void kbase_mmu_bus_fault_worker(struct work_struct *data); + #endif /* _KBASE_MMU_INTERNAL_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c index 02493e9..16b928d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mmu/mali_kbase_mmu_mode_aarch64.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2014, 2016-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase.h" @@ -48,25 +47,7 @@ */ static inline void page_table_entry_set(u64 *pte, u64 phy) { -#if KERNEL_VERSION(3, 18, 13) <= LINUX_VERSION_CODE WRITE_ONCE(*pte, phy); -#else -#ifdef CONFIG_64BIT - barrier(); - *pte = phy; - barrier(); -#elif defined(CONFIG_ARM) - barrier(); - asm volatile("ldrd r0, [%1]\n\t" - "strd r0, %0\n\t" - : "=m" (*pte) - : "r" (&phy) - : "r0", "r1"); - barrier(); -#else -#error "64-bit atomic write must be implemented for your architecture" -#endif -#endif } static void mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig index ef9fb96..5f0118d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/Kconfig @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,11 +16,7 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - - # Add your platform specific Kconfig file here # @@ -27,4 +24,3 @@ # # Where xxx is the platform name is the name set in MALI_PLATFORM_NAME # - diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild old mode 100644 new mode 100755 index 0a82eaf..da0fe0c --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,8 +16,6 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # @@ -28,12 +27,12 @@ endif ccflags-y += -DAMLOGIC_GPU_USE_GPPLL=$(USE_GPPLL) mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ - $(MALI_PLATFORM_DIR)/mali_clock.o \ - $(MALI_PLATFORM_DIR)/mpgpu.o \ - $(MALI_PLATFORM_DIR)/meson_main2.o \ - $(MALI_PLATFORM_DIR)/platform_gx.o \ - $(MALI_PLATFORM_DIR)/scaling.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o + platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \ + platform/$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \ + platform/$(MALI_PLATFORM_DIR)/mali_clock.o \ + platform/$(MALI_PLATFORM_DIR)/mpgpu.o \ + platform/$(MALI_PLATFORM_DIR)/meson_main2.o \ + platform/$(MALI_PLATFORM_DIR)/platform_gx.o \ + platform/$(MALI_PLATFORM_DIR)/scaling.o \ + platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_platform.o \ + platform/$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c new file mode 100644 index 0000000..4bcd585 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_clk_rate_trace.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2015, 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include "mali_kbase_config_platform.h" + +#if MALI_USE_CSF +#include +#endif + +static void *enumerate_gpu_clk(struct kbase_device *kbdev, + unsigned int index) +{ + if (index >= kbdev->nr_clocks) + return NULL; + +#if MALI_USE_CSF + if (of_machine_is_compatible("arm,juno")) + WARN_ON(kbdev->nr_clocks != 1); +#endif + + return kbdev->clocks[index]; +} + +static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev, + void *gpu_clk_handle) +{ +#if MALI_USE_CSF + /* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at + * the boot time. Then after the first call to kbase_devfreq_target() + * the clock rate is reported as 450 MHZ and the frequency does not + * change after that. But the actual frequency at which GPU operates + * is always 50 MHz, which is equal to the frequency of system counter + * and HW counters also increment at the same rate. + * DVFS, which is a client of kbase_ipa_control, needs normalization of + * GPU_ACTIVE counter to calculate the time for which GPU has been busy. + * So for the correct normalization need to return the system counter + * frequency value. + * This is a reasonable workaround as the frequency value remains same + * throughout. It can be removed after GPUCORE-25693. + */ + if (of_machine_is_compatible("arm,juno")) + return arch_timer_get_cntfrq(); +#endif + + return clk_get_rate((struct clk *)gpu_clk_handle); +} + +static int gpu_clk_notifier_register(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ + compiletime_assert(offsetof(struct clk_notifier_data, clk) == + offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle), + "mismatch in the offset of clk member"); + + compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) == + sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle), + "mismatch in the size of clk member"); + +#if MALI_USE_CSF + /* Frequency is fixed on Juno platforms */ + if (of_machine_is_compatible("arm,juno")) + return 0; +#endif + + return clk_notifier_register((struct clk *)gpu_clk_handle, nb); +} + +static void gpu_clk_notifier_unregister(struct kbase_device *kbdev, + void *gpu_clk_handle, struct notifier_block *nb) +{ +#if MALI_USE_CSF + if (of_machine_is_compatible("arm,juno")) + return; +#endif + + clk_notifier_unregister((struct clk *)gpu_clk_handle, nb); +} + +struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = { + .get_gpu_clk_rate = get_gpu_clk_rate, + .enumerate_gpu_clk = enumerate_gpu_clk, + .gpu_clk_notifier_register = gpu_clk_notifier_register, + .gpu_clk_notifier_unregister = gpu_clk_notifier_unregister, +}; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c old mode 100644 new mode 100755 index 55a50c4..0e78128 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c @@ -1,20 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* - * mali_kbase_config_devicetree.c * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * (C) COPYRIGHT 2015, 2017, 2020-2021 ARM Limited. All rights reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * */ +#include +#include +#include + #ifdef CONFIG_DEVFREQ_THERMAL #include #include diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c new file mode 100644 index 0000000..63aa33f --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include +#include "mali_kbase_config_platform.h" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +struct kbase_platform_funcs_conf platform_funcs = { + .platform_init_func = NULL, + .platform_term_func = NULL, + .platform_late_init_func = NULL, + .platform_late_term_func = NULL, +}; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h old mode 100644 new mode 100755 index 233a18e..6cb3b53 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -1,45 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * mali_kbase_config_platform.h * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - */ - -/** - * Maximum frequency GPU will be clocked at. Given in kHz. - * This must be specified as there is no default value. - * - * Attached value: number in kHz - * Default value: NA - */ -#define GPU_FREQ_KHZ_MAX (750000) -/** - * Minimum frequency GPU will be clocked at. Given in kHz. - * This must be specified as there is no default value. + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * - * Attached value: number in kHz - * Default value: NA - */ -#define GPU_FREQ_KHZ_MIN (100000) - -/** - * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock - * - * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func - * for the function prototype. - * - * Attached value: A kbase_cpu_clk_speed_func. - * Default Value: NA */ #define CPU_SPEED_FUNC (NULL) @@ -81,7 +58,12 @@ extern struct devfreq_cooling_ops t83x_model_ops; #else #define POWER_MODEL_CALLBACKS (NULL) #endif + +#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops) + extern struct kbase_pm_callback_conf pm_callbacks; +extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops; +extern struct kbase_platform_funcs_conf platform_funcs; void mali_dev_freeze(void); void mali_dev_restore(void); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c old mode 100644 new mode 100755 index ee18718..4a0b72c --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -1,21 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* - * mali_kbase_runtime_pm.c * - * Copyright (C) 2017 Amlogic, Inc. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * */ -//#define DEBUG #include #include #include @@ -25,6 +28,7 @@ #include #include #include + #include "mali_kbase_config_platform.h" #include "mali_scaling.h" diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild index 6780e4c..b547366 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,10 +16,8 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ - mali_kbase_platform_fake.o + platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h index fac3cd5..d8682db 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c index d165ce2..ff1ee65 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,18 +17,16 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include #include #include #include #include "mali_kbase_config_platform.h" +#include + #ifndef CONFIG_OF static struct kbase_io_resources io_resources = { .job_irq_number = 68, @@ -67,3 +66,14 @@ struct kbase_platform_config *kbase_get_platform_config(void) { return &versatile_platform_config; } + +#ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) +#else +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +#endif +{ + return 1; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild index 51b408e..b547366 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2013-2014, 2016-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2016-2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,10 +16,8 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ - mali_kbase_platform_fake.o + platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h index fac3cd5..d8682db 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c index efca0a5..fee6a36 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -25,6 +24,8 @@ #include #include +#include + #ifndef CONFIG_OF static struct kbase_io_resources io_resources = { .job_irq_number = 68, @@ -63,3 +64,14 @@ struct kbase_platform_config *kbase_get_platform_config(void) { return &versatile_platform_config; } + +#ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) +#else +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +#endif +{ + return 1; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild index e07709c..bb92c47 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2016-2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2013, 2016-2017, 2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,11 +16,9 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # mali_kbase-y += \ - $(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ - $(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ - mali_kbase_platform_fake.o + platform/$(MALI_PLATFORM_DIR)/mali_kbase_config_vexpress.o \ + platform/$(MALI_PLATFORM_DIR)/mali_kbase_cpu_vexpress.o \ + mali_kbase_platform_fake.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h index fac3cd5..d8682db 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c index b6714b9..f6fb9aa 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,17 +17,15 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - - #include #include #include #include +#include + #ifndef CONFIG_OF static struct kbase_io_resources io_resources = { .job_irq_number = 75, @@ -65,3 +64,14 @@ struct kbase_platform_config *kbase_get_platform_config(void) { return &versatile_platform_config; } + +#ifdef CONFIG_MALI_MIDGARD_DVFS +#if MALI_USE_CSF +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation) +#else +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, u32 util_gl_share, u32 util_cl_share[2]) +#endif +{ + return 1; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h b/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h index 8778d81..9dd9253 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/protected_mode_switcher.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _PROTECTED_MODE_SWITCH_H_ @@ -28,35 +27,23 @@ struct protected_mode_device; /** * struct protected_mode_ops - Callbacks for protected mode switch operations * - * @protected_mode_enable: Callback to enable protected mode for device + * @protected_mode_enable: Callback to enable protected mode for device, and + * reset device + * Returns 0 on success, non-zero on error * @protected_mode_disable: Callback to disable protected mode for device + * Returns 0 on success, non-zero on error */ struct protected_mode_ops { - /** - * protected_mode_enable() - Enable protected mode on device - * @dev: The struct device - * - * Return: 0 on success, non-zero on error - */ int (*protected_mode_enable)( struct protected_mode_device *protected_dev); - - /** - * protected_mode_disable() - Disable protected mode on device, and - * reset device - * @dev: The struct device - * - * Return: 0 on success, non-zero on error - */ int (*protected_mode_disable)( struct protected_mode_device *protected_dev); }; /** * struct protected_mode_device - Device structure for protected mode devices - * - * @ops - Callbacks associated with this device - * @data - Pointer to device private data + * @ops: Callbacks associated with this device + * @data: Pointer to device private data * * This structure should be registered with the platform device using * platform_set_drvdata(). diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild index df16a77..ee3de7b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,9 +16,15 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # +ccflags-y += -I$(src)/include \ + -I$(src) + +subdir-ccflags-y += -I$(src)/include \ + -I$(src) + obj-$(CONFIG_MALI_KUTF) += kutf/ -obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/ +obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ +obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ + diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig index fa91aea..a86e1ce 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Kconfig @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,9 +16,42 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 -# # -source "drivers/gpu/arm/midgard/tests/kutf/Kconfig" -source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig" +menuconfig MALI_KUTF + bool "Build Mali Kernel Unit Test Framework modules" + depends on MALI_MIDGARD && MALI_DEBUG + default y if MALI_DEBUG + help + This option will build the Mali testing framework modules. + + Modules: + - kutf.ko + - kutf_test.ko + +config MALI_KUTF_IRQ_TEST + bool "Build Mali KUTF IRQ test module" + depends on MALI_KUTF + default y + help + This option will build the IRQ latency measurement test module. + It can determine the latency of the Mali GPU IRQ on your system. + + Modules: + - mali_kutf_irq_test.ko + +config MALI_KUTF_CLK_RATE_TRACE + bool "Build Mali KUTF Clock rate trace test module" + depends on MALI_KUTF + default y + help + This option will build the clock rate trace portal test module. + It can test the clocks integration into the platform and exercise some + basic trace test in the system. + + Modules: + - mali_kutf_clk_rate_trace_test_portal.ko + + +comment "Enable MALI_DEBUG for KUTF modules support" + depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig index be3fedb..167facd 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/Mconfig @@ -1,38 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. # -# A copy of the licence is included with the program, and can also be obtained -# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. # -config UNIT_TEST_KERNEL_MODULES - bool - default y if UNIT_TEST_CODE && BUILD_KERNEL_MODULES - default n - -config BUILD_IPA_TESTS - bool - default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ - default n - -config BUILD_IPA_UNIT_TESTS - bool - default y if NO_MALI && BUILD_IPA_TESTS - default n - -config BUILD_CSF_TESTS - bool - default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF - default n - -config BUILD_ARBIF_TESTS - bool - default y if UNIT_TEST_KERNEL_MODULES && MALI_ARBITER_SUPPORT - default n +menuconfig MALI_KUTF + bool "Build Mali Kernel Unit Test Framework modules" + depends on MALI_MIDGARD && MALI_DEBUG + default y if BACKEND_KERNEL && MALI_DEBUG + help + This option will build the Mali testing framework modules. + + Modules: + - kutf.ko + - kutf_test.ko + +config MALI_KUTF_IRQ_TEST + bool "Build Mali KUTF IRQ test module" + depends on MALI_KUTF + default y + help + This option will build the IRQ latency measurement test module. + It can determine the latency of the Mali GPU IRQ on your system. + + Modules: + - mali_kutf_irq_test.ko +config MALI_KUTF_CLK_RATE_TRACE + bool "Build Mali KUTF Clock rate trace test module" + depends on MALI_KUTF + default y + help + This option will build the clock rate trace portal test module. + It can test the clocks integration into the platform and exercise some + basic trace test in the system. + + Modules: + - mali_kutf_clk_rate_trace_test_portal.ko + + +# Enable MALI_DEBUG for KUTF modules support + +config UNIT_TEST_KERNEL_MODULES + bool + default y if UNIT_TEST_CODE && BACKEND_KERNEL + default n diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp new file mode 100644 index 0000000..9d6137d --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/build.bp @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_defaults { + name: "kernel_test_includes", + local_include_dirs: [ + "include", + "./../../", + "./../", + "./" + ], +} + +bob_defaults { + name: "kernel_test_configs", + mali_kutf: { + kbuild_options: ["CONFIG_MALI_KUTF=y"], + }, + unit_test_kernel_modules: { + kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"], + }, +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h index 15e168c..c4c713c 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KERNEL_UTF_HELPERS_H_ @@ -33,6 +32,14 @@ #include +/** + * kutf_helper_pending_input() - Check any pending lines sent by user space + * @context: KUTF context + * + * Return: true if there are pending lines, otherwise false + */ +bool kutf_helper_pending_input(struct kutf_context *context); + /** * kutf_helper_input_dequeue() - Dequeue a line sent by user space * @context: KUTF context diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h index 3b1300e..e147cbb 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_helpers_user.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KERNEL_UTF_HELPERS_USER_H_ @@ -63,7 +62,8 @@ struct kutf_helper_named_val { * unrecoverable) * * Positive values indicate correct access but invalid parsing (can be - * recovered from assuming data in the future is correct) */ + * recovered from assuming data in the future is correct) + */ enum kutf_helper_err { /* No error - must be zero */ KUTF_HELPER_ERR_NONE = 0, @@ -71,14 +71,16 @@ enum kutf_helper_err { KUTF_HELPER_ERR_INVALID_NAME, /* Named value parsing of string or u64 type encountered extra * characters after the value (after the last digit for a u64 type or - * after the string end delimiter for string type) */ + * after the string end delimiter for string type) + */ KUTF_HELPER_ERR_CHARS_AFTER_VAL, /* Named value parsing of string type couldn't find the string end * delimiter. * * This cannot be encountered when the NAME="value" message exceeds the * textbuf's maximum line length, because such messages are not checked - * for an end string delimiter */ + * for an end string delimiter + */ KUTF_HELPER_ERR_NO_END_DELIMITER, /* Named value didn't parse as any of the known types */ KUTF_HELPER_ERR_INVALID_VALUE, @@ -122,7 +124,8 @@ int kutf_helper_max_str_len_for_kern(const char *val_name, int kern_buf_sz); * * Any failure will be logged on the suite's current test fixture * - * Returns 0 on success, non-zero on failure */ + * Returns 0 on success, non-zero on failure + */ int kutf_helper_send_named_str(struct kutf_context *context, const char *val_name, const char *val_str); @@ -138,7 +141,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, * * Returns 0 on success. Negative value on failure to receive from the 'run' * file, positive value indicates an enum kutf_helper_err value for correct - * reception of data but invalid parsing */ + * reception of data but invalid parsing + */ int kutf_helper_receive_named_val( struct kutf_context *context, struct kutf_helper_named_val *named_val); @@ -165,7 +169,8 @@ int kutf_helper_receive_named_val( * - return value will be 0 to indicate success * * The rationale behind this is that we'd prefer to continue the rest of the - * test with failures propagated, rather than hitting a timeout */ + * test with failures propagated, rather than hitting a timeout + */ int kutf_helper_receive_check_val( struct kutf_helper_named_val *named_val, struct kutf_context *context, diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h index 988559d..5d4d96e 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_mem.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KERNEL_UTF_MEM_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h index 49ebeb4..2fb1a47 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_resultset.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KERNEL_UTF_RESULTSET_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h index 8d75f50..b9c333b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_suite.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KERNEL_UTF_SUITE_H_ @@ -264,9 +263,10 @@ struct kutf_suite { struct list_head test_list; }; -/* ============================================================================ - Application functions -============================================================================ */ +/** =========================================================================== + * Application functions + * ============================================================================ + */ /** * kutf_create_application() - Create an in kernel test application. @@ -284,9 +284,10 @@ struct kutf_application *kutf_create_application(const char *name); */ void kutf_destroy_application(struct kutf_application *app); -/* ============================================================================ - Suite functions -============================================================================ */ +/**============================================================================ + * Suite functions + * ============================================================================ + */ /** * kutf_create_suite() - Create a kernel test suite. @@ -416,10 +417,10 @@ void kutf_add_test_with_filters_and_data( unsigned int filters, union kutf_callback_data test_data); - -/* ============================================================================ - Test functions -============================================================================ */ +/** =========================================================================== + * Test functions + * ============================================================================ + */ /** * kutf_test_log_result_external() - Log a result which has been created * externally into a in a standard form diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h index 25b8285..18dcc3d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/include/kutf/kutf_utils.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _KERNEL_UTF_UTILS_H_ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild index 2531d41..c4790bc 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,12 +16,16 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - -ccflags-y += -I$(src)/../include -obj-$(CONFIG_MALI_KUTF) += kutf.o +ifeq ($(CONFIG_MALI_KUTF),y) +obj-m += kutf.o -kutf-y := kutf_mem.o kutf_resultset.o kutf_suite.o kutf_utils.o kutf_helpers.o kutf_helpers_user.o +kutf-y := \ + kutf_mem.o \ + kutf_resultset.o \ + kutf_suite.o \ + kutf_utils.o \ + kutf_helpers.o \ + kutf_helpers_user.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp index 32eab14..89edae9 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/build.bp @@ -1,23 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * */ bob_kernel_module { name: "kutf", defaults: [ - "kernel_defaults", - "kutf_includes", + "mali_kbase_shared_config_defaults", + "kernel_test_configs", + "kernel_test_includes", ], srcs: [ "Kbuild", @@ -28,9 +35,8 @@ bob_kernel_module { "kutf_suite.c", "kutf_utils.c", ], - kbuild_options: ["CONFIG_MALI_KUTF=m"], enabled: false, - base_build_kutf: { + mali_kutf: { enabled: true, }, } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c index cab5add..c075428 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* Kernel UTF test helpers */ @@ -29,10 +28,11 @@ #include #include #include +#include static DEFINE_SPINLOCK(kutf_input_lock); -static bool pending_input(struct kutf_context *context) +bool kutf_helper_pending_input(struct kutf_context *context) { bool input_pending; @@ -44,6 +44,7 @@ static bool pending_input(struct kutf_context *context) return input_pending; } +EXPORT_SYMBOL(kutf_helper_pending_input); char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) { @@ -59,7 +60,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size) spin_unlock(&kutf_input_lock); err = wait_event_interruptible(context->userdata.input_waitq, - pending_input(context)); + kutf_helper_pending_input(context)); if (err) return ERR_PTR(-EINTR); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c index 108fa82..a8b59f7 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_helpers_user.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* Kernel UTF test helpers that mirror those for kutf-userside */ @@ -42,7 +41,8 @@ static const char *get_val_type_name(enum kutf_helper_valtype valtype) * a) "<0 comparison on unsigned type" warning - if we did both upper * and lower bound check * b) incorrect range checking if it was a signed type - if we did - * upper bound check only */ + * upper bound check only + */ unsigned int type_idx = (unsigned int)valtype; if (type_idx >= (unsigned int)KUTF_HELPER_VALTYPE_COUNT) @@ -54,7 +54,8 @@ static const char *get_val_type_name(enum kutf_helper_valtype valtype) /* Check up to str_len chars of val_str to see if it's a valid value name: * * - Has between 1 and KUTF_HELPER_MAX_VAL_NAME_LEN characters before the \0 terminator - * - And, each char is in the character set [A-Z0-9_] */ + * - And, each char is in the character set [A-Z0-9_] + */ static int validate_val_name(const char *val_str, int str_len) { int i = 0; @@ -87,7 +88,8 @@ static int validate_val_name(const char *val_str, int str_len) * e.g. "str" * * That is, before any '\\', '\n' or '"' characters. This is so we don't have - * to escape the string */ + * to escape the string + */ static int find_quoted_string_valid_len(const char *str) { char *ptr; @@ -207,7 +209,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, str_buf_sz = val_name_len + start_delim_len + val_str_len + end_delim_len + 1; /* Using kmalloc() here instead of mempool since we know we need to free - * before we return */ + * before we return + */ str_buf = kmalloc(str_buf_sz, GFP_KERNEL); if (!str_buf) { errmsg = kutf_dsprintf(&context->fixture_pool, @@ -218,7 +221,8 @@ int kutf_helper_send_named_str(struct kutf_context *context, copy_ptr = str_buf; /* Manually copy each string component instead of snprintf because - * val_str may need to end early, and less error path handling */ + * val_str may need to end early, and less error path handling + */ /* name */ memcpy(copy_ptr, val_name, val_name_len); @@ -331,7 +335,8 @@ int kutf_helper_receive_named_val( /* possibly a number value - strtoull will parse it */ err = kstrtoull(recv_str, 0, &u64val); /* unlike userspace can't get an end ptr, but if kstrtoull() - * reads characters after the number it'll report -EINVAL */ + * reads characters after the number it'll report -EINVAL + */ if (!err) { int len_remain = strnlen(recv_str, recv_sz); @@ -399,7 +404,8 @@ int kutf_helper_receive_check_val( goto out_fail_and_fixup; } - if (strcmp(named_val->val_name, expect_val_name) != 0) { + if (named_val->val_name != NULL && + strcmp(named_val->val_name, expect_val_name) != 0) { const char *msg = kutf_dsprintf(&context->fixture_pool, "Expecting to receive value named '%s' but got '%s'", expect_val_name, named_val->val_name); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c index fd98bea..716970a 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_mem.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* Kernel UTF memory management functions */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c index 94ecfa4..c7572bd 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_resultset.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* Kernel UTF result management functions */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c index 3f15669..6745299 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,12 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* Kernel UTF suite, test and fixture management including user to kernel - * interaction */ + * interaction + */ #include #include @@ -598,7 +598,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func, goto fail_file; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) +#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE tmp = debugfs_create_file_unsafe( #else tmp = debugfs_create_file( @@ -634,7 +634,7 @@ static void kutf_remove_test_variant(struct kutf_test_fixture *test_fix) kfree(test_fix); } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE /* Adapting to the upstream debugfs_create_x32() change */ static int ktufp_u32_get(void *data, u64 *val) { @@ -679,7 +679,7 @@ void kutf_add_test_with_filters_and_data( } test_func->filters = filters; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0) +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir, &test_func->filters, &kutfp_fops_x32_ro); #else @@ -692,12 +692,17 @@ void kutf_add_test_with_filters_and_data( } test_func->test_id = id; +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE + debugfs_create_u32("test_id", S_IROTH, test_func->dir, + &test_func->test_id); +#else tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir, &test_func->test_id); if (!tmp) { pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name); goto fail_file; } +#endif for (i = 0; i < suite->fixture_variants; i++) { if (create_fixture_variant(test_func, i)) { @@ -1153,7 +1158,7 @@ void kutf_test_abort(struct kutf_context *context) } EXPORT_SYMBOL(kutf_test_abort); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) /** * init_kutf_core() - Module entry point. diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c index 7f5ac51..c0fb3ba 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/kutf/kutf_utils.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* Kernel UTF utility functions */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild new file mode 100644 index 0000000..027bc27 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kbuild @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +ifeq ($(CONFIG_MALI_KUTF_CLK_RATE_TRACE),y) +obj-m += mali_kutf_clk_rate_trace_test_portal.o + +mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp new file mode 100644 index 0000000..225ad69 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/build.bp @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +bob_kernel_module { + name: "mali_kutf_clk_rate_trace_test_portal", + defaults: [ + "mali_kbase_shared_config_defaults", + "kernel_test_configs", + "kernel_test_includes", + ], + srcs: [ + "Kbuild", + "mali_kutf_clk_rate_trace_test.c", + "../mali_kutf_clk_rate_trace_test.h", + ], + extra_symbols: [ + "mali_kbase", + "kutf", + ], + enabled: false, + mali_kutf_clk_rate_trace: { + kbuild_options: ["CONFIG_MALI_KUTF_CLK_RATE_TRACE=y"], + enabled: true, + }, +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c new file mode 100644 index 0000000..f9410a5 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -0,0 +1,957 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include + +#include +#include +#include +#include +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) +#include +#else +#include +#endif +#include "mali_kbase.h" +#include "backend/gpu/mali_kbase_irq_internal.h" +#include "backend/gpu/mali_kbase_pm_internal.h" +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include +#include +#include +#include + +#include "../mali_kutf_clk_rate_trace_test.h" + +#define MINOR_FOR_FIRST_KBASE_DEV (-1) + +/* KUTF test application pointer for this test */ +struct kutf_application *kutf_app; + +enum portal_server_state { + PORTAL_STATE_NO_CLK, + PORTAL_STATE_LIVE, + PORTAL_STATE_CLOSING, +}; + +/** + * struct clk_trace_snapshot - Trace info data on a clock. + * @previous_rate: Snapshot start point clock rate. + * @current_rate: End point clock rate. It becomes the start rate of the + * next trace snapshot. + * @rate_up_cnt: Count in the snapshot duration when the clock trace + * write is a rate of higher value than the last. + * @rate_down_cnt: Count in the snapshot duration when the clock trace write + * is a rate of lower value than the last. + */ +struct clk_trace_snapshot { + unsigned long previous_rate; + unsigned long current_rate; + u32 rate_up_cnt; + u32 rate_down_cnt; +}; + +/** + * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test. + * @kbdev: kbase device for the GPU. + * @listener: Clock rate change listener structure. + * @invoke_notify: When true, invoke notify command is being executed. + * @snapshot: Clock trace update snapshot data array. A snapshot + * for each clock contains info accumulated beteen two + * GET_TRACE_SNAPSHOT requests. + * @nclks: Number of clocks visible to the trace portal. + * @pm_ctx_cnt: Net count of PM (Power Management) context INC/DEC + * PM_CTX_CNT requests made to the portal. On change from + * 0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is + * triggered. + * @total_update_cnt: Total number of received trace write callbacks. + * @server_state: Portal server operational state. + * @result_msg: Message for the test result. + * @test_status: Portal test reslt status. + */ +struct kutf_clk_rate_trace_fixture_data { + struct kbase_device *kbdev; + struct kbase_clk_rate_listener listener; + bool invoke_notify; + struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS]; + unsigned int nclks; + unsigned int pm_ctx_cnt; + unsigned int total_update_cnt; + enum portal_server_state server_state; + char const *result_msg; + enum kutf_result_status test_status; +}; + +struct clk_trace_portal_input { + struct kutf_helper_named_val cmd_input; + enum kbasep_clk_rate_trace_req portal_cmd; + int named_val_err; +}; + +struct kbasep_cmd_name_pair { + enum kbasep_clk_rate_trace_req cmd; + const char *name; +}; + +struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = { + { PORTAL_CMD_GET_PLATFORM, GET_PLATFORM }, + { PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR }, + { PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE }, + { PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT }, + { PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT }, + { PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT }, + { PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL }, + { PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ }, +}; + +/* Global pointer for the kutf_portal_trace_write() to use. When + * this pointer is engaged, new requests for create fixture will fail + * hence limiting the use of the portal at any time to a singleton. + */ +struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data; + +#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN) +static char portal_msg_buf[PORTAL_MSG_LEN]; + +static void kutf_portal_trace_write( + struct kbase_clk_rate_listener *listener, + u32 index, u32 new_rate) +{ + struct clk_trace_snapshot *snapshot; + struct kutf_clk_rate_trace_fixture_data *data; + + if (listener == NULL) { + pr_err("%s - index: %u, new_rate: %u, listener is NULL\n", + __func__, index, new_rate); + return; + } + + data = container_of(listener, struct kutf_clk_rate_trace_fixture_data, + listener); + + lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock); + + if (WARN_ON(g_ptr_portal_data == NULL)) + return; + if (WARN_ON(index >= g_ptr_portal_data->nclks)) + return; + + /* This callback is triggered by invoke notify command, skipping */ + if (data->invoke_notify) + return; + + snapshot = &g_ptr_portal_data->snapshot[index]; + if (new_rate > snapshot->current_rate) + snapshot->rate_up_cnt++; + else + snapshot->rate_down_cnt++; + snapshot->current_rate = new_rate; + g_ptr_portal_data->total_update_cnt++; +} + +static void kutf_set_pm_ctx_active(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (WARN_ON(data->pm_ctx_cnt != 1)) + return; + + kbase_pm_context_active(data->kbdev); + kbase_pm_wait_for_desired_state(data->kbdev); +#if !MALI_USE_CSF + kbase_pm_request_gpu_cycle_counter(data->kbdev); +#endif +} + +static void kutf_set_pm_ctx_idle(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (WARN_ON(data->pm_ctx_cnt > 0)) + return; +#if !MALI_USE_CSF + kbase_pm_release_gpu_cycle_counter(data->kbdev); +#endif + kbase_pm_context_idle(data->kbdev); +} + +static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + const unsigned int cnt = data->pm_ctx_cnt; + const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd; + char const *errmsg = NULL; + + WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT && + req != PORTAL_CMD_DEC_PM_CTX_CNT); + + if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) { + data->pm_ctx_cnt++; + if (data->pm_ctx_cnt == 1) + kutf_set_pm_ctx_active(context); + } + + if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) { + data->pm_ctx_cnt--; + if (data->pm_ctx_cnt == 0) + kutf_set_pm_ctx_idle(context); + } + + /* Skip the length check, no chance of overflow for two ints */ + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for adjusting pm_ctx_cnt"); + } + + return errmsg; +} + +static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + unsigned long rate; + bool idle; + int ret; + int i; + char const *errmsg = NULL; + + WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) && + (cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE)); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, RATE:[", seq); + + for (i = 0; i < data->nclks; i++) { + spin_lock(&kbdev->pm.clk_rtm.lock); + if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR) + rate = kbdev->pm.clk_rtm.clks[i]->clock_val; + else + rate = data->snapshot[i].current_rate; + idle = kbdev->pm.clk_rtm.gpu_idle; + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if ((i + 1) == data->nclks) + ret += snprintf(portal_msg_buf + ret, + PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}", + rate, idle); + else + ret += snprintf(portal_msg_buf + ret, + PORTAL_MSG_LEN - ret, "0x%lx, ", rate); + + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with rate array data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with rate array data"); + } + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending back rate array\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending rate array"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot + * @context: KUTF context + * @cmd: The decoded portal input request + * + * The accumulated clock rate trace information is kept inside as an snapshot + * record. A user request of getting the snapshot marks the closure of the + * current snapshot record, and the start of the next one. The response + * message contains the current snapshot record, with each clock's + * data sequentially placed inside (array marker) [ ]. + */ +static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct clk_trace_snapshot snapshot; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + int ret; + int i; + char const *fmt; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, SNAPSHOT_ARRAY:[", seq); + + for (i = 0; i < data->nclks; i++) { + spin_lock(&data->kbdev->pm.clk_rtm.lock); + /* copy out the snapshot of the clock */ + snapshot = data->snapshot[i]; + /* Set the next snapshot start condition */ + data->snapshot[i].previous_rate = snapshot.current_rate; + data->snapshot[i].rate_up_cnt = 0; + data->snapshot[i].rate_down_cnt = 0; + spin_unlock(&data->kbdev->pm.clk_rtm.lock); + + /* Check i corresponding to the last clock */ + if ((i + 1) == data->nclks) + fmt = "(0x%lx, 0x%lx, %u, %u)]}"; + else + fmt = "(0x%lx, 0x%lx, %u, %u), "; + ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret, + fmt, snapshot.previous_rate, snapshot.current_rate, + snapshot.rate_up_cnt, snapshot.rate_down_cnt); + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with snapshot data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with snapshot data"); + } + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending back snapshot array\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending snapshot array"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback + * @context: KUTF context + * @cmd: The decoded portal input request + * + * Invokes frequency change notification callbacks with a fake + * GPU frequency 42 kHz for the top clock domain. + */ +static char const *kutf_clk_trace_do_invoke_notify_42k( + struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + const unsigned long new_rate_hz = 42000; + int ret; + char const *errmsg = NULL; + struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ); + + spin_lock(&clk_rtm->lock); + + data->invoke_notify = true; + kbase_clk_rate_trace_manager_notify_all( + clk_rtm, 0, new_rate_hz); + data->invoke_notify = false; + + spin_unlock(&clk_rtm->lock); + + ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, HZ:%lu}", seq, new_rate_hz); + + if (ret >= PORTAL_MSG_LEN) { + pr_warn("Message buf overflow with invoked data\n"); + return kutf_dsprintf(&context->fixture_pool, + "Message buf overflow with invoked data"); + } + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " INVOKE_NOTIFY_42KHZ "request"); + } + + return errmsg; +} + +static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL); + + data->server_state = PORTAL_STATE_CLOSING; + + /* Skip the length check, no chance of overflow for two ints */ + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " CLOSE_PORTAL "reuquest"); + } + + return errmsg; +} + +/** + * kutf_clk_trace_do_get_platform() - Gets platform information + * @context: KUTF context + * @cmd: The decoded portal input request + * + * Checks the gpu node in the device tree to see if arbitration is enabled + * If so determines device tree whether platform is PV or PTM + * + * Return: A string to indicate the platform (PV/PTM/GPU/UNKNOWN) + */ +static char const *kutf_clk_trace_do_get_platform( + struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int seq = cmd->cmd_input.u.val_u64 & 0xFF; + char const *errmsg = NULL; + const void *arbiter_if_node = NULL; + const void *power_node = NULL; + const char *platform = "GPU"; +#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + arbiter_if_node = + of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); +#endif + if (arbiter_if_node) { + power_node = of_find_compatible_node(NULL, NULL, + "arm,mali-gpu-power"); + if (power_node) { + platform = "PV"; + } else { + power_node = of_find_compatible_node(NULL, NULL, + "arm,mali-ptm"); + if (power_node) + platform = "PTM"; + else + platform = "UNKNOWN"; + } + } else { + platform = "GPU"; + } + + pr_debug("%s - platform is %s\n", __func__, platform); + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, PLATFORM:%s}", seq, platform); + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_PLATFORM); + + if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) { + pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n"); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Error in sending ack for " GET_PLATFORM "request"); + } + + return errmsg; +} + +static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int i; + int err = kutf_helper_receive_named_val(context, &cmd->cmd_input); + + cmd->named_val_err = err; + if (err == KUTF_HELPER_ERR_NONE && + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + /* All portal request commands are of format (named u64): + * CMD_NAME=1234 + * where, 1234 is a (variable) sequence number tag. + */ + for (i = 0; i < PORTAL_TOTAL_CMDS; i++) { + if (strcmp(cmd->cmd_input.val_name, + kbasep_portal_cmd_name_map[i].name)) + continue; + + cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd; + return true; + } + } + + cmd->portal_cmd = PORTAL_CMD_INVALID; + return false; +} + +static void kutf_clk_trace_flag_result(struct kutf_context *context, + enum kutf_result_status result, char const *msg) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + + if (result > data->test_status) { + data->test_status = result; + if (msg) + data->result_msg = msg; + if (data->server_state == PORTAL_STATE_LIVE && + result > KUTF_RESULT_WARN) { + data->server_state = PORTAL_STATE_CLOSING; + } + } +} + +static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + char const *errmsg = NULL; + + BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) != + PORTAL_TOTAL_CMDS); + WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID); + + switch (cmd->portal_cmd) { + case PORTAL_CMD_GET_PLATFORM: + errmsg = kutf_clk_trace_do_get_platform(context, cmd); + break; + case PORTAL_CMD_GET_CLK_RATE_MGR: + /* Fall through */ + case PORTAL_CMD_GET_CLK_RATE_TRACE: + errmsg = kutf_clk_trace_do_get_rate(context, cmd); + break; + case PORTAL_CMD_GET_TRACE_SNAPSHOT: + errmsg = kutf_clk_trace_do_get_snapshot(context, cmd); + break; + case PORTAL_CMD_INC_PM_CTX_CNT: + /* Fall through */ + case PORTAL_CMD_DEC_PM_CTX_CNT: + errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd); + break; + case PORTAL_CMD_CLOSE_PORTAL: + errmsg = kutf_clk_trace_do_close_portal(context, cmd); + break; + case PORTAL_CMD_INVOKE_NOTIFY_42KHZ: + errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd); + break; + default: + pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n", + cmd->portal_cmd); + errmsg = kutf_dsprintf(&context->fixture_pool, + "Don't know how to handle portal_cmd: %d", + cmd->portal_cmd); + break; + } + + if (errmsg) + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); + + return (errmsg == NULL); +} + +/** + * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input + * @context: KUTF context + * @cmd: The erroneous input request + * + * This function deal with an erroneous input request, and respond with + * a proper 'NACK' message. + */ +static int kutf_clk_trace_do_nack_response(struct kutf_context *context, + struct clk_trace_portal_input *cmd) +{ + int seq; + int err; + char const *errmsg = NULL; + + WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID); + + if (cmd->named_val_err == KUTF_HELPER_ERR_NONE && + cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) { + /* Keep seq number as % 256 */ + seq = cmd->cmd_input.u.val_u64 & 255; + snprintf(portal_msg_buf, PORTAL_MSG_LEN, + "{SEQ:%d, MSG: Unknown command '%s'.}", seq, + cmd->cmd_input.val_name); + err = kutf_helper_send_named_str(context, "NACK", + portal_msg_buf); + } else + err = kutf_helper_send_named_str(context, "NACK", + "Wrong portal cmd format (Ref example: CMD_NAME=0X16)"); + + if (err) { + errmsg = kutf_dsprintf(&context->fixture_pool, + "Failed to send portal NACK response"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg); + } + + return err; +} + +/** + * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing + * @context: KUTF context + * + * This function carries out some basic test on the tracing operation: + * 1). GPU idle on test start, trace rate should be 0 (low power state) + * 2). Make sure GPU is powered up, the trace rate should match + * that from the clcok manager's internal recorded rate + * 3). If the GPU active transition occurs following 2), there + * must be rate change event from tracing. + */ +void kutf_clk_trace_barebone_check(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + bool fail = false; + bool idle[2] = { false }; + char const *msg = NULL; + int i; + + /* Check consistency if gpu happens to be idle */ + spin_lock(&kbdev->pm.clk_rtm.lock); + idle[0] = kbdev->pm.clk_rtm.gpu_idle; + if (kbdev->pm.clk_rtm.gpu_idle) { + for (i = 0; i < data->nclks; i++) { + if (data->snapshot[i].current_rate) { + /* Idle should have a rate 0 */ + fail = true; + break; + } + } + } + spin_unlock(&kbdev->pm.clk_rtm.lock); + if (fail) { + msg = kutf_dsprintf(&context->fixture_pool, + "GPU Idle not yielding 0-rate"); + pr_err("Trace did not see idle rate\n"); + } else { + /* Make local PM active if not done so yet */ + if (data->pm_ctx_cnt == 0) { + /* Ensure the GPU is powered */ + data->pm_ctx_cnt++; + kutf_set_pm_ctx_active(context); + } + /* Checking the rate is consistent */ + spin_lock(&kbdev->pm.clk_rtm.lock); + idle[1] = kbdev->pm.clk_rtm.gpu_idle; + for (i = 0; i < data->nclks; i++) { + /* Rate match between the manager and the trace */ + if (kbdev->pm.clk_rtm.clks[i]->clock_val != + data->snapshot[i].current_rate) { + fail = true; + break; + } + } + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if (idle[1]) { + msg = kutf_dsprintf(&context->fixture_pool, + "GPU still idle after set_pm_ctx_active"); + pr_err("GPU still idle after set_pm_ctx_active\n"); + } + + if (!msg && fail) { + msg = kutf_dsprintf(&context->fixture_pool, + "Trace rate not matching Clk manager's read"); + pr_err("Trace rate not matching Clk manager's read\n"); + } + } + + if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) { + msg = kutf_dsprintf(&context->fixture_pool, + "Trace update did not occur"); + pr_err("Trace update did not occur\n"); + } + if (msg) + kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg); + else if (!data->total_update_cnt) { + msg = kutf_dsprintf(&context->fixture_pool, + "No trace update seen during the test!"); + kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg); + } +} + +static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd) +{ + return (cmd->named_val_err == -EBUSY); +} + +void kutf_clk_trace_no_clks_dummy(struct kutf_context *context) +{ + struct clk_trace_portal_input cmd; + unsigned long timeout = jiffies + HZ * 2; + bool has_cmd; + + while (time_before(jiffies, timeout)) { + if (kutf_helper_pending_input(context)) { + has_cmd = kutf_clk_trace_dequeue_portal_cmd(context, + &cmd); + if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd)) + break; + + kutf_helper_send_named_str(context, "NACK", + "Fatal! No clocks visible, aborting"); + } + msleep(20); + } + + kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL, + "No clocks visble to the portal"); +} + +/** + * mali_kutf_clk_rate_trace_test_portal() - Service portal input + * @context: KUTF context + * + * The test portal operates on input requests. If the input request is one + * of the recognized portal commands, it handles it accordingly. Otherwise + * a negative response 'NACK' is returned. The portal service terminates + * when a 'CLOSE_PORTAL' request is received, or due to an internal error. + * Both case would result in the server_state transitioned to CLOSING. + * + * If the portal is closed on request, a sanity test on the clock rate + * trace operation is undertaken via function: + * kutf_clk_trace_barebone_check(); + */ +static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct clk_trace_portal_input new_cmd; + + pr_debug("Test portal service start\n"); + + while (data->server_state == PORTAL_STATE_LIVE) { + if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd)) + kutf_clk_trace_process_portal_cmd(context, &new_cmd); + else if (kutf_clk_trace_end_of_stream(&new_cmd)) + /* Dequeue on portal input, end of stream */ + data->server_state = PORTAL_STATE_CLOSING; + else + kutf_clk_trace_do_nack_response(context, &new_cmd); + } + + /* Closing, exhausting all the pending inputs with NACKs. */ + if (data->server_state == PORTAL_STATE_CLOSING) { + while (kutf_helper_pending_input(context) && + (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) || + !kutf_clk_trace_end_of_stream(&new_cmd))) { + kutf_helper_send_named_str(context, "NACK", + "Portal closing down"); + } + } + + /* If no portal error, do a barebone test here irrespective + * whatever the portal live session has been testing, which + * is entirely driven by the user-side via portal requests. + */ + if (data->test_status <= KUTF_RESULT_WARN) { + if (data->server_state != PORTAL_STATE_NO_CLK) + kutf_clk_trace_barebone_check(context); + else { + /* No clocks case, NACK 2-sec for the fatal situation */ + kutf_clk_trace_no_clks_dummy(context); + } + } + + /* If we have changed pm_ctx count, drop it back */ + if (data->pm_ctx_cnt) { + /* Although we count on portal requests, it only has material + * impact when from 0 -> 1. So the reverse is a simple one off. + */ + data->pm_ctx_cnt = 0; + kutf_set_pm_ctx_idle(context); + } + + /* Finally log the test result line */ + if (data->test_status < KUTF_RESULT_WARN) + kutf_test_pass(context, data->result_msg); + else if (data->test_status == KUTF_RESULT_WARN) + kutf_test_warn(context, data->result_msg); + else if (data->test_status == KUTF_RESULT_FATAL) + kutf_test_fatal(context, data->result_msg); + else + kutf_test_fail(context, data->result_msg); + + pr_debug("Test end\n"); +} + +/** + * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data + * required for mali_kutf_clk_rate_trace_test_portal. + * @context: KUTF context. + * + * Return: Fixture data created on success or NULL on failure + */ +static void *mali_kutf_clk_rate_trace_create_fixture( + struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data; + struct kbase_device *kbdev; + unsigned long rate; + int i; + + /* Acquire the kbase device */ + pr_debug("Finding device\n"); + kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV); + if (kbdev == NULL) { + kutf_test_fail(context, "Failed to find kbase device"); + return NULL; + } + + pr_debug("Creating fixture\n"); + data = kutf_mempool_alloc(&context->fixture_pool, + sizeof(struct kutf_clk_rate_trace_fixture_data)); + if (!data) + return NULL; + + *data = (const struct kutf_clk_rate_trace_fixture_data) { 0 }; + pr_debug("Hooking up the test portal to kbdev clk rate trace\n"); + spin_lock(&kbdev->pm.clk_rtm.lock); + + if (g_ptr_portal_data != NULL) { + pr_warn("Test portal is already in use, run aborted\n"); + kutf_test_fail(context, "Portal allows single session only"); + spin_unlock(&kbdev->pm.clk_rtm.lock); + return NULL; + } + + for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { + if (kbdev->pm.clk_rtm.clks[i]) { + data->nclks++; + if (kbdev->pm.clk_rtm.gpu_idle) + rate = 0; + else + rate = kbdev->pm.clk_rtm.clks[i]->clock_val; + data->snapshot[i].previous_rate = rate; + data->snapshot[i].current_rate = rate; + } + } + + spin_unlock(&kbdev->pm.clk_rtm.lock); + + if (data->nclks) { + /* Subscribe this test server portal */ + data->listener.notify = kutf_portal_trace_write; + data->invoke_notify = false; + + kbase_clk_rate_trace_manager_subscribe( + &kbdev->pm.clk_rtm, &data->listener); + /* Update the kutf_server_portal fixture_data pointer */ + g_ptr_portal_data = data; + } + + data->kbdev = kbdev; + data->result_msg = NULL; + data->test_status = KUTF_RESULT_PASS; + + if (data->nclks == 0) { + data->server_state = PORTAL_STATE_NO_CLK; + pr_debug("Kbdev has no clocks for rate trace"); + } else + data->server_state = PORTAL_STATE_LIVE; + + pr_debug("Created fixture\n"); + + return data; +} + +/** + * Destroy fixture data previously created by + * mali_kutf_clk_rate_trace_create_fixture. + * + * @context: KUTF context. + */ +static void mali_kutf_clk_rate_trace_remove_fixture( + struct kutf_context *context) +{ + struct kutf_clk_rate_trace_fixture_data *data = context->fixture; + struct kbase_device *kbdev = data->kbdev; + + if (data->nclks) { + /* Clean up the portal trace write arrangement */ + g_ptr_portal_data = NULL; + + kbase_clk_rate_trace_manager_unsubscribe( + &kbdev->pm.clk_rtm, &data->listener); + } + pr_debug("Destroying fixture\n"); + kbase_release_device(kbdev); + pr_debug("Destroyed fixture\n"); +} + +/** + * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule. + */ +int mali_kutf_clk_rate_trace_test_module_init(void) +{ + struct kutf_suite *suite; + unsigned int filters; + union kutf_callback_data suite_data = { 0 }; + + pr_debug("Creating app\n"); + + g_ptr_portal_data = NULL; + kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME); + + if (!kutf_app) { + pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME + " failed!\n"); + return -ENOMEM; + } + + pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME); + suite = kutf_create_suite_with_filters_and_data( + kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1, + mali_kutf_clk_rate_trace_create_fixture, + mali_kutf_clk_rate_trace_remove_fixture, + KUTF_F_TEST_GENERIC, + suite_data); + + if (!suite) { + pr_warn("Creation of suite %s failed!\n", + CLK_RATE_TRACE_SUITE_NAME); + kutf_destroy_application(kutf_app); + return -ENOMEM; + } + + filters = suite->suite_default_flags; + kutf_add_test_with_filters( + suite, 0x0, CLK_RATE_TRACE_PORTAL, + mali_kutf_clk_rate_trace_test_portal, + filters); + + pr_debug("Init complete\n"); + return 0; +} + +/** + * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this + * test. + */ +void mali_kutf_clk_rate_trace_test_module_exit(void) +{ + pr_debug("Exit start\n"); + kutf_destroy_application(kutf_app); + pr_debug("Exit complete\n"); +} + + +module_init(mali_kutf_clk_rate_trace_test_module_init); +module_exit(mali_kutf_clk_rate_trace_test_module_exit); + +MODULE_LICENSE("GPL"); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h new file mode 100644 index 0000000..f37efa8 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_ +#define _KUTF_CLK_RATE_TRACE_TEST_H_ + +#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace" +#define CLK_RATE_TRACE_SUITE_NAME "rate_trace" +#define CLK_RATE_TRACE_PORTAL "portal" + +/** + * enum kbasep_clk_rate_trace_req - request command to the clock rate trace + * service portal. + * + * @PORTAL_CMD_GET_PLATFORM: Request the platform that the tests are + * to be run on. + * @PORTAL_CMD_GET_CLK_RATE_MGR: Request the clock trace manager internal + * data record. On a positive acknowledgement + * the prevailing clock rates and the GPU idle + * condition flag are returned. + * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its + * data record. On a positive acknowledgement + * the last trace recorded clock rates and the + * GPU idle condition flag are returned. + * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its + * current snapshot data record. On a positive + * acknowledgement the snapshot array matching + * the number of clocks are returned. It also + * starts a fresh snapshot inside the clock + * trace portal. + * @PORTAL_CMD_INC_PM_CTX_CNT: Request the clock trace portal to increase + * its internal PM_CTX_COUNT. If this increase + * yielded a count of 0 -> 1 change, the portal + * will initiate a PM_CTX_ACTIVE call to the + * Kbase power management. Futher increase + * requests will limit to only affect the + * portal internal count value. + * @PORTAL_CMD_DEC_PM_CTX_CNT: Request the clock trace portal to decrease + * its internal PM_CTX_COUNT. If this decrease + * yielded a count of 1 -> 0 change, the portal + * will initiate a PM_CTX_IDLE call to the + * Kbase power management. + * @PORTAL_CMD_CLOSE_PORTAL: Inform the clock trace portal service the + * client has completed its session. The portal + * will start the close down action. If no + * error has occurred during the dynamic + * interactive session, an inherent basic test + * carrying out some sanity check on the clock + * trace is undertaken. + * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks + * for the top clock domain with a new GPU frequency + * set to 42 kHZ. + * @PORTAL_CMD_INVALID: Valid commands termination marker. Must be + * the highest enumeration value, as it + * represents valid command array size. + * @PORTAL_TOTAL_CMDS: Alias of PORTAL_CMD_INVALID. + */ +/* PORTAL_CMD_INVALID must be the last one, serving the size */ +enum kbasep_clk_rate_trace_req { + PORTAL_CMD_GET_PLATFORM, + PORTAL_CMD_GET_CLK_RATE_MGR, + PORTAL_CMD_GET_CLK_RATE_TRACE, + PORTAL_CMD_GET_TRACE_SNAPSHOT, + PORTAL_CMD_INC_PM_CTX_CNT, + PORTAL_CMD_DEC_PM_CTX_CNT, + PORTAL_CMD_CLOSE_PORTAL, + PORTAL_CMD_INVOKE_NOTIFY_42KHZ, + PORTAL_CMD_INVALID, + PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID, +}; + +/** + * Portal service request command names. The portal request consists of a kutf + * named u64-value. For those above enumerated PORTAL_CMD, the names defined + * here are used to mark the name and then followed with a sequence number + * value. Example (manual script here for illustration): + * exec 5<>run # open the portal kutf run as fd-5 + * echo GET_CLK_RATE_MGR=1 >&5 # send the cmd and sequence number 1 + * head -n 1 <&5 # read back the 1-line server reseponse + * ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}" # response string + * echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1 + * head -n 1 <&5 # read back the 1-line server reseponse + * ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" + * echo CLOSE_PORTAL=1 >&5 # close the portal + * cat <&5 # read back all the response lines + * ACK="{SEQ:1, PM_CTX_CNT:0}" # response to close command + * KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed. + * exec 5>&- # close the service portal fd. + * + * Expected request command return format: + * GET_CLK_RATE_MGR: ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}" + * Note, the above contains 2-clock with rates in [], GPU idle + * GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}" + * Note, 1-clock with rate in [], GPU not idle + * GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}" + * Note, 1-clock, (start_rate : 0, last_rate : 0x1ad27480, + * trace_rate_up_count: 1, trace_rate_down_count : 0) + * For the specific sample case here, there is a single rate_trace event + * that yielded a rate increase change. No rate drop event recorded in the + * reporting snapshot duration. + * INC_PM_CTX_CNT: ACK="{SEQ:1, PM_CTX_CNT:1}" + * Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1) + * DEC_PM_CTX_CNT: ACK="{SEQ:3, PM_CTX_CNT:0}" + * Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0) + * CLOSE_PORTAL: ACK="{SEQ:1, PM_CTX_CNT:1}" + * Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be + * dropped down to 0 as part of the portal close clean up. + */ +#define GET_PLATFORM "GET_PLATFORM" +#define GET_CLK_RATE_MGR "GET_CLK_RATE_MGR" +#define GET_CLK_RATE_TRACE "GET_CLK_RATE_TRACE" +#define GET_TRACE_SNAPSHOT "GET_TRACE_SNAPSHOT" +#define INC_PM_CTX_CNT "INC_PM_CTX_CNT" +#define DEC_PM_CTX_CNT "DEC_PM_CTX_CNT" +#define CLOSE_PORTAL "CLOSE_PORTAL" +#define INVOKE_NOTIFY_42KHZ "INVOKE_NOTIFY_42KHZ" + +/** + * Portal service response tag names. The response consists of a kutf + * named string-value. In case of a 'NACK' (negative acknowledgement), it + * can be one of the two formats: + * 1. NACK="{SEQ:2, MSG:xyzed}" # NACK on command with sequence tag-2. + * Note, the portal has received a valid name and valid sequence number + * but can't carry-out the request, reason in the MSG field. + * 2. NACK="Failing-message" + * Note, unable to parse a valid name or valid sequence number, + * or some internal error condition. Reason in the quoted string. + */ +#define ACK "ACK" +#define NACK "NACK" +#define MAX_REPLY_NAME_LEN 32 + +#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild index ca8c512..213d6d5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kbuild @@ -1,10 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software # Foundation, and any use by you of this program is subject to the terms -# of such GNU licence. +# of such GNU license. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,12 +16,10 @@ # along with this program; if not, you can access it online at # http://www.gnu.org/licenses/gpl-2.0.html. # -# SPDX-License-Identifier: GPL-2.0 # -# - -ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android -obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test.o +ifeq ($(CONFIG_MALI_KUTF_IRQ_TEST),y) +obj-m += mali_kutf_irq_test.o mali_kutf_irq_test-y := mali_kutf_irq_test_main.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp index 90efdcf..155875b 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp @@ -1,15 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * - * A copy of the licence is included with the program, and can also be obtained - * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * */ @@ -17,6 +23,7 @@ bob_kernel_module { name: "mali_kutf_irq_test", defaults: [ "mali_kbase_shared_config_defaults", + "kernel_test_configs", "kernel_test_includes", ], srcs: [ @@ -28,8 +35,8 @@ bob_kernel_module { "kutf", ], enabled: false, - base_build_kutf: { + mali_kutf_irq_test: { + kbuild_options: ["CONFIG_MALI_KUTF_IRQ_TEST=y"], enabled: true, - kbuild_options: ["CONFIG_MALI_IRQ_LATENCY=m"], }, } diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c index 26b442a..fdc5437 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2016-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include @@ -25,8 +24,8 @@ #include #include "mali_kbase.h" -#include -#include +#include +#include #include #include @@ -242,7 +241,7 @@ int mali_kutf_irq_test_main_init(void) irq_app = kutf_create_application("irq"); - if (NULL == irq_app) { + if (irq_app == NULL) { pr_warn("Creation of test application failed!\n"); return -ENOMEM; } @@ -251,7 +250,7 @@ int mali_kutf_irq_test_main_init(void) 1, mali_kutf_irq_default_create_fixture, mali_kutf_irq_default_remove_fixture); - if (NULL == suite) { + if (suite == NULL) { pr_warn("Creation of test suite failed!\n"); kutf_destroy_application(irq_app); return -ENOMEM; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild new file mode 100644 index 0000000..c723f3a --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/Kbuild @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += thirdparty/mali_kbase_mmap.o diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c index f266d8e..de1199a 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c @@ -1,24 +1,4 @@ /* - * - * (C) COPYRIGHT ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - * SPDX-License-Identifier: GPL-2.0 - * - *//* * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms @@ -30,7 +10,7 @@ */ #include "linux/mman.h" -#include "../mali_kbase.h" +#include /* mali_kbase_mmap.c * @@ -209,7 +189,8 @@ check_current: return -ENOMEM; if (gap_start <= high_limit && gap_end - gap_start >= length) { /* We found a suitable gap. Clip it with the original - * high_limit. */ + * high_limit. + */ if (gap_end > info->high_limit) gap_end = info->high_limit; @@ -270,19 +251,38 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, bool is_same_4gb_page = false; unsigned long ret; + /* the 'nolock' form is used here: + * - the base_pfn of the SAME_VA zone does not change + * - in normal use, va_size_pages is constant once the first allocation + * begins + * + * However, in abnormal use this function could be processing whilst + * another new zone is being setup in a different thread (e.g. to + * borrow part of the SAME_VA zone). In the worst case, this path may + * witness a higher SAME_VA end_pfn than the code setting up the new + * zone. + * + * This is safe because once we reach the main allocation functions, + * we'll see the updated SAME_VA end_pfn and will determine that there + * is no free region at the address found originally by too large a + * same_va_end_addr here, and will fail the allocation gracefully. + */ + struct kbase_reg_zone *zone = + kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); + u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; + /* err on fixed address */ if ((flags & MAP_FIXED) || addr) return -EINVAL; -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) /* too big? */ if (len > TASK_SIZE - SZ_2M) return -ENOMEM; if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) { - - high_limit = min_t(unsigned long, mm->mmap_base, - (kctx->same_va_end << PAGE_SHIFT)); + high_limit = + min_t(unsigned long, mm->mmap_base, same_va_end_addr); /* If there's enough (> 33 bits) of GPU VA space, align * to 2MB boundaries. @@ -317,19 +317,25 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, align_mask = align_offset - 1; is_shader_code = true; } +#if !MALI_USE_CSF } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { - unsigned long extent_bytes = - (unsigned long)(reg->extent << PAGE_SHIFT); + unsigned long extension_bytes = + (unsigned long)(reg->extension + << PAGE_SHIFT); /* kbase_check_alloc_sizes() already satisfies * these checks, but they're here to avoid * maintenance hazards due to the assumptions - * involved */ - WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT)); + * involved + */ + WARN_ON(reg->extension > + (ULONG_MAX >> PAGE_SHIFT)); WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT)); - WARN_ON(!is_power_of_2(extent_bytes)); - align_mask = extent_bytes - 1; + WARN_ON(!is_power_of_2(extension_bytes)); + align_mask = extension_bytes - 1; align_offset = - extent_bytes - (reg->initial_commit << PAGE_SHIFT); + extension_bytes - + (reg->initial_commit << PAGE_SHIFT); +#endif /* !MALI_USE_CSF */ } else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) { is_same_4gb_page = true; } @@ -352,11 +358,10 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, is_same_4gb_page); if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base && - high_limit < (kctx->same_va_end << PAGE_SHIFT)) { + high_limit < same_va_end_addr) { /* Retry above mmap_base */ info.low_limit = mm->mmap_base; - info.high_limit = min_t(u64, TASK_SIZE, - (kctx->same_va_end << PAGE_SHIFT)); + info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr); ret = kbase_unmapped_area_topdown(&info, is_shader_code, is_same_4gb_page); diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild b/dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild new file mode 100644 index 0000000..4344850 --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/Kbuild @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2021 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + tl/mali_kbase_timeline.o \ + tl/mali_kbase_timeline_io.o \ + tl/mali_kbase_tlstream.o \ + tl/mali_kbase_tracepoints.o + + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += tl/backend/mali_kbase_timeline_csf.o +else + mali_kbase-y += tl/backend/mali_kbase_timeline_jm.o +endif diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c new file mode 100644 index 0000000..a2868da --- /dev/null +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_csf.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include +#include +#include + +#include + +void kbase_create_timeline_objects(struct kbase_device *kbdev) +{ + unsigned int as_nr; + unsigned int slot_i; + struct kbase_context *kctx; + struct kbase_timeline *timeline = kbdev->timeline; + struct kbase_tlstream *summary = + &kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]; + + /* Summarize the Address Space objects. */ + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr); + + /* Create Legacy GPU object to track in AOM for dumping */ + __kbase_tlstream_tl_new_gpu(summary, + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores); + + + for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++) + __kbase_tlstream_tl_lifelink_as_gpu(summary, + &kbdev->as[as_nr], + kbdev); + + /* Trace the creation of a new kbase device and set its properties. */ + __kbase_tlstream_tl_kbase_new_device(summary, + kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores, kbdev->csf.global_iface.group_num, + kbdev->nr_hw_address_spaces); + + /* Lock the context list, to ensure no changes to the list are made + * while we're summarizing the contexts and their contents. + */ + mutex_lock(&timeline->tl_kctx_list_lock); + + /* Hold the scheduler lock while we emit the current state + * We also need to continue holding the lock until after the first body + * stream tracepoints are emitted to ensure we don't change the + * scheduler until after then + */ + mutex_lock(&kbdev->csf.scheduler.lock); + + for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) { + + struct kbase_queue_group *group = + kbdev->csf.scheduler.csg_slots[slot_i].resident_group; + + if (group) + __kbase_tlstream_tl_kbase_device_program_csg(summary, + kbdev->gpu_props.props.raw_props.gpu_id, + group->handle, slot_i); + } + + /* Reset body stream buffers while holding the kctx lock. + * As we are holding the lock, we can guarantee that no kctx creation or + * deletion tracepoints can be fired from outside of this function by + * some other thread. + */ + kbase_timeline_streams_body_reset(timeline); + + mutex_unlock(&kbdev->csf.scheduler.lock); + + /* For each context in the device... */ + list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { + size_t i; + struct kbase_tlstream *body = + &timeline->streams[TL_STREAM_TYPE_OBJ]; + + /* Lock the context's KCPU queues, to ensure no KCPU-queue + * related actions can occur in this context from now on. + */ + mutex_lock(&kctx->csf.kcpu_queues.lock); + + /* Acquire the MMU lock, to ensure we don't get a concurrent + * address space assignment while summarizing this context's + * address space. + */ + mutex_lock(&kbdev->mmu_hw_mutex); + + /* Trace the context itself into the body stream, not the + * summary stream. + * We place this in the body to ensure it is ordered after any + * other tracepoints related to the contents of the context that + * might have been fired before acquiring all of the per-context + * locks. + * This ensures that those tracepoints will not actually affect + * the object model state, as they reference a context that + * hasn't been traced yet. They may, however, cause benign + * errors to be emitted. + */ + __kbase_tlstream_tl_kbase_new_ctx(body, kctx->id, + kbdev->gpu_props.props.raw_props.gpu_id); + + /* Also trace with the legacy AOM tracepoint for dumping */ + __kbase_tlstream_tl_new_ctx(body, + kctx, + kctx->id, + (u32)(kctx->tgid)); + + /* Trace the currently assigned address space */ + if (kctx->as_nr != KBASEP_AS_NR_INVALID) + __kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id, + kctx->as_nr); + + + /* Trace all KCPU queues in the context into the body stream. + * As we acquired the KCPU lock after resetting the body stream, + * it's possible that some KCPU-related events for this context + * occurred between that reset and now. + * These will cause errors to be emitted when parsing the + * timeline, but they will not affect the correctness of the + * object model. + */ + for (i = 0; i < KBASEP_MAX_KCPU_QUEUES; i++) { + const struct kbase_kcpu_command_queue *kcpu_queue = + kctx->csf.kcpu_queues.array[i]; + + if (kcpu_queue) + __kbase_tlstream_tl_kbase_new_kcpuqueue( + body, kcpu_queue, kcpu_queue->kctx->id, + kcpu_queue->num_pending_cmds); + } + + mutex_unlock(&kbdev->mmu_hw_mutex); + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + /* Now that all per-context locks for this context have been + * released, any per-context tracepoints that are fired from + * any other threads will go into the body stream after + * everything that was just summarised into the body stream in + * this iteration of the loop, so will start to correctly update + * the object model state. + */ + } + + mutex_unlock(&timeline->tl_kctx_list_lock); + + /* Static object are placed into summary packet that needs to be + * transmitted first. Flush all streams to make it available to + * user space. + */ + kbase_timeline_streams_flush(timeline); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c index c368ac7..9ba89f5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/backend/mali_kbase_timeline_jm.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,13 +17,11 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ -#include "../mali_kbase_tracepoints.h" -#include "../mali_kbase_timeline.h" -#include "../mali_kbase_timeline_priv.h" +#include +#include +#include #include @@ -66,16 +65,16 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) /* Lock the context list, to ensure no changes to the list are made * while we're summarizing the contexts and their contents. */ - mutex_lock(&kbdev->kctx_list_lock); + mutex_lock(&timeline->tl_kctx_list_lock); /* For each context in the device... */ - list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) { /* Summarize the context itself */ __kbase_tlstream_tl_new_ctx(summary, kctx, kctx->id, (u32)(kctx->tgid)); - }; + } /* Reset body stream buffers while holding the kctx lock. * This ensures we can't fire both summary and normal tracepoints for @@ -87,11 +86,11 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev) */ kbase_timeline_streams_body_reset(timeline); - mutex_unlock(&kbdev->kctx_list_lock); + mutex_unlock(&timeline->tl_kctx_list_lock); /* Static object are placed into summary packet that needs to be * transmitted first. Flush all streams to make it available to * user space. */ kbase_timeline_streams_flush(timeline); -} \ No newline at end of file +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c index 88fba83..09818a5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_timeline.h" @@ -109,11 +108,14 @@ int kbase_timeline_init(struct kbase_timeline **timeline, { enum tl_stream_type i; struct kbase_timeline *result; +#if MALI_USE_CSF + struct kbase_tlstream *csffw_stream; +#endif if (!timeline || !timeline_flags) return -EINVAL; - result = kzalloc(sizeof(*result), GFP_KERNEL); + result = vzalloc(sizeof(*result)); if (!result) return -ENOMEM; @@ -125,12 +127,20 @@ int kbase_timeline_init(struct kbase_timeline **timeline, kbase_tlstream_init(&result->streams[i], i, &result->event_queue); + /* Initialize the kctx list */ + mutex_init(&result->tl_kctx_list_lock); + INIT_LIST_HEAD(&result->tl_kctx_list); + /* Initialize autoflush timer. */ atomic_set(&result->autoflush_timer_active, 0); kbase_timer_setup(&result->autoflush_timer, kbasep_timeline_autoflush_timer_callback); result->timeline_flags = timeline_flags; +#if MALI_USE_CSF + csffw_stream = &result->streams[TL_STREAM_TYPE_CSFFW]; + kbase_csf_tl_reader_init(&result->csf_tl_reader, csffw_stream); +#endif *timeline = result; return 0; @@ -143,11 +153,16 @@ void kbase_timeline_term(struct kbase_timeline *timeline) if (!timeline) return; +#if MALI_USE_CSF + kbase_csf_tl_reader_term(&timeline->csf_tl_reader); +#endif + + WARN_ON(!list_empty(&timeline->tl_kctx_list)); for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++) kbase_tlstream_term(&timeline->streams[i]); - kfree(timeline); + vfree(timeline); } #ifdef CONFIG_MALI_DEVFREQ @@ -162,11 +177,7 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) unsigned long cur_freq = 0; mutex_lock(&devfreq->lock); -#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE - cur_freq = kbdev->current_nominal_freq; -#else cur_freq = devfreq->last_status.current_frequency; -#endif KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq); mutex_unlock(&devfreq->lock); } @@ -175,13 +186,24 @@ static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev) int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) { - int ret; + int ret = 0; u32 timeline_flags = TLSTREAM_ENABLED | flags; struct kbase_timeline *timeline = kbdev->timeline; if (!atomic_cmpxchg(timeline->timeline_flags, 0, timeline_flags)) { int rcode; +#if MALI_USE_CSF + if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) { + ret = kbase_csf_tl_reader_start( + &timeline->csf_tl_reader, kbdev); + if (ret) + { + atomic_set(timeline->timeline_flags, 0); + return ret; + } + } +#endif ret = anon_inode_getfd( "[mali_tlstream]", &kbasep_tlstream_fops, @@ -189,6 +211,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) O_RDONLY | O_CLOEXEC); if (ret < 0) { atomic_set(timeline->timeline_flags, 0); +#if MALI_USE_CSF + kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); +#endif return ret; } @@ -206,6 +231,7 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); CSTD_UNUSED(rcode); +#if !MALI_USE_CSF /* If job dumping is enabled, readjust the software event's * timeout as the default value of 3 seconds is often * insufficient. @@ -216,6 +242,7 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) atomic_set(&kbdev->js_data.soft_job_timeout_ms, 1800000); } +#endif /* !MALI_USE_CSF */ /* Summary stream was cleared during acquire. * Create static timeline objects that will be @@ -235,15 +262,30 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags) ret = -EBUSY; } + if (ret >= 0) + timeline->last_acquire_time = ktime_get(); + return ret; } -void kbase_timeline_streams_flush(struct kbase_timeline *timeline) +int kbase_timeline_streams_flush(struct kbase_timeline *timeline) { enum tl_stream_type stype; + bool has_bytes = false; + size_t nbytes = 0; +#if MALI_USE_CSF + int ret = kbase_csf_tl_reader_flush_buffer(&timeline->csf_tl_reader); + + if (ret > 0) + has_bytes = true; +#endif - for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) - kbase_tlstream_flush_stream(&timeline->streams[stype]); + for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { + nbytes = kbase_tlstream_flush_stream(&timeline->streams[stype]); + if (nbytes > 0) + has_bytes = true; + } + return has_bytes ? 0 : -EIO; } void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) @@ -252,6 +294,78 @@ void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline) &timeline->streams[TL_STREAM_TYPE_OBJ]); kbase_tlstream_reset( &timeline->streams[TL_STREAM_TYPE_AUX]); +#if MALI_USE_CSF + kbase_tlstream_reset( + &timeline->streams[TL_STREAM_TYPE_CSFFW]); +#endif +} + +void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_timeline *timeline = kbdev->timeline; + + /* Remove the context from the list to ensure we don't try and + * summarize a context that is being destroyed. + * + * It's unsafe to try and summarize a context being destroyed as the + * locks we might normally attempt to acquire, and the data structures + * we would normally attempt to traverse could already be destroyed. + * + * In the case where the tlstream is acquired between this pre destroy + * call and the post destroy call, we will get a context destroy + * tracepoint without the corresponding context create tracepoint, + * but this will not affect the correctness of the object model. + */ + mutex_lock(&timeline->tl_kctx_list_lock); + list_del_init(&kctx->tl_kctx_list_node); + mutex_unlock(&timeline->tl_kctx_list_lock); +} + +void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_timeline *timeline = kbdev->timeline; + + /* On context create, add the context to the list to ensure it is + * summarized when timeline is acquired + */ + mutex_lock(&timeline->tl_kctx_list_lock); + + list_add(&kctx->tl_kctx_list_node, &timeline->tl_kctx_list); + + /* Fire the tracepoints with the lock held to ensure the tracepoints + * are either fired before or after the summarization, + * never in parallel with it. If fired in parallel, we could get + * duplicate creation tracepoints. + */ +#if MALI_USE_CSF + KBASE_TLSTREAM_TL_KBASE_NEW_CTX( + kbdev, kctx->id, kbdev->gpu_props.props.raw_props.gpu_id); +#endif + /* Trace with the AOM tracepoint even in CSF for dumping */ + KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, 0); + + mutex_unlock(&timeline->tl_kctx_list_lock); +} + +void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx) +{ + struct kbase_device *const kbdev = kctx->kbdev; + + /* Trace with the AOM tracepoint even in CSF for dumping */ + KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx); +#if MALI_USE_CSF + KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kbdev, kctx->id); +#endif + + /* Flush the timeline stream, so the user can see the termination + * tracepoints being fired. + * The "if" statement below is for optimization. It is safe to call + * kbase_timeline_streams_flush when timeline is disabled. + */ + if (atomic_read(&kbdev->timeline_flags) != 0) + kbase_timeline_streams_flush(kbdev->timeline); } #if MALI_UNIT_TEST diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h index cd48411..63926eb 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #if !defined(_KBASE_TIMELINE_H) @@ -70,8 +69,10 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags); * @timeline: Timeline instance * * Function will flush pending data in all timeline streams. + * + * Return: Zero on success, errno on failure. */ -void kbase_timeline_streams_flush(struct kbase_timeline *timeline); +int kbase_timeline_streams_flush(struct kbase_timeline *timeline); /** * kbase_timeline_streams_body_reset - reset timeline body streams. @@ -81,33 +82,31 @@ void kbase_timeline_streams_flush(struct kbase_timeline *timeline); */ void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline); -#if MALI_UNIT_TEST /** - * kbase_timeline_test - start timeline stream data generator - * @kbdev: Kernel common context - * @tpw_count: Number of trace point writers in each context - * @msg_delay: Time delay in milliseconds between trace points written by one - * writer - * @msg_count: Number of trace points written by one writer - * @aux_msg: If non-zero aux messages will be included + * kbase_timeline_post_kbase_context_create - Inform timeline that a new KBase + * Context has been created. + * @kctx: KBase Context + */ +void kbase_timeline_post_kbase_context_create(struct kbase_context *kctx); + +/** + * kbase_timeline_pre_kbase_context_destroy - Inform timeline that a KBase + * Context is about to be destroyed. + * @kctx: KBase Context + */ +void kbase_timeline_pre_kbase_context_destroy(struct kbase_context *kctx); + +/** + * kbase_timeline_post_kbase_context_destroy - Inform timeline that a KBase + * Context has been destroyed. + * @kctx: KBase Context * - * This test starts a requested number of asynchronous writers in both IRQ and - * thread context. Each writer will generate required number of test - * tracepoints (tracepoints with embedded information about writer that - * should be verified by user space reader). Tracepoints will be emitted in - * all timeline body streams. If aux_msg is non-zero writer will also - * generate not testable tracepoints (tracepoints without information about - * writer). These tracepoints are used to check correctness of remaining - * timeline message generating functions. Writer will wait requested time - * between generating another set of messages. This call blocks until all - * writers finish. + * Should be called immediately before the memory is freed, and the context ID + * and kbdev pointer should still be valid. */ -void kbase_timeline_test( - struct kbase_device *kbdev, - unsigned int tpw_count, - unsigned int msg_delay, - unsigned int msg_count, - int aux_msg); +void kbase_timeline_post_kbase_context_destroy(struct kbase_context *kctx); + +#if MALI_UNIT_TEST /** * kbase_timeline_stats - read timeline stream statistics diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c index cdde928..eff30a0 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_io.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,35 +17,38 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_timeline_priv.h" #include "mali_kbase_tlstream.h" #include "mali_kbase_tracepoints.h" +#include "mali_kbase_timeline.h" +#include #include /* The timeline stream file operations functions. */ -static ssize_t kbasep_timeline_io_read( - struct file *filp, - char __user *buffer, - size_t size, - loff_t *f_pos); -static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait); +static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, + size_t size, loff_t *f_pos); +static unsigned int kbasep_timeline_io_poll(struct file *filp, + poll_table *wait); static int kbasep_timeline_io_release(struct inode *inode, struct file *filp); +static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, + int datasync); /* The timeline stream file operations structure. */ const struct file_operations kbasep_tlstream_fops = { .owner = THIS_MODULE, .release = kbasep_timeline_io_release, - .read = kbasep_timeline_io_read, - .poll = kbasep_timeline_io_poll, + .read = kbasep_timeline_io_read, + .poll = kbasep_timeline_io_poll, + .fsync = kbasep_timeline_io_fsync, }; /** - * kbasep_timeline_io_packet_pending - check timeline streams for pending packets + * kbasep_timeline_io_packet_pending - check timeline streams for pending + * packets + * * @timeline: Timeline instance * @ready_stream: Pointer to variable where stream will be placed * @rb_idx_raw: Pointer to variable where read buffer index will be placed @@ -56,10 +60,10 @@ const struct file_operations kbasep_tlstream_fops = { * * Return: non-zero if any of timeline streams has at last one packet ready */ -static int kbasep_timeline_io_packet_pending( - struct kbase_timeline *timeline, - struct kbase_tlstream **ready_stream, - unsigned int *rb_idx_raw) +static int +kbasep_timeline_io_packet_pending(struct kbase_timeline *timeline, + struct kbase_tlstream **ready_stream, + unsigned int *rb_idx_raw) { enum tl_stream_type i; @@ -78,12 +82,28 @@ static int kbasep_timeline_io_packet_pending( *ready_stream = stream; return 1; } - } return 0; } +/** + * kbasep_timeline_has_header_data() - check timeline headers for pending + * packets + * + * @timeline: Timeline instance + * + * Return: non-zero if any of timeline headers has at last one packet ready. + */ +static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline) +{ + return timeline->obj_header_btc || timeline->aux_header_btc +#if MALI_USE_CSF + || timeline->csf_tl_reader.tl_header.btc +#endif + ; +} + /** * copy_stream_header() - copy timeline stream header. * @@ -97,11 +117,9 @@ static int kbasep_timeline_io_packet_pending( * * Returns: 0 if success, -1 otherwise. */ -static inline int copy_stream_header( - char __user *buffer, size_t size, ssize_t *copy_len, - const char *hdr, - size_t hdr_size, - size_t *hdr_btc) +static inline int copy_stream_header(char __user *buffer, size_t size, + ssize_t *copy_len, const char *hdr, + size_t hdr_size, size_t *hdr_btc) { const size_t offset = hdr_size - *hdr_btc; const size_t copy_size = MIN(size - *copy_len, *hdr_btc); @@ -123,6 +141,7 @@ static inline int copy_stream_header( /** * kbasep_timeline_copy_header - copy timeline headers to the user + * * @timeline: Timeline instance * @buffer: Pointer to the buffer provided by user * @size: Maximum amount of data that can be stored in the buffer @@ -135,29 +154,30 @@ static inline int copy_stream_header( * * Returns: 0 if success, -1 if copy_to_user has failed. */ -static inline int kbasep_timeline_copy_headers( - struct kbase_timeline *timeline, - char __user *buffer, - size_t size, - ssize_t *copy_len) +static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline, + char __user *buffer, size_t size, + ssize_t *copy_len) { - if (copy_stream_header(buffer, size, copy_len, - obj_desc_header, - obj_desc_header_size, - &timeline->obj_header_btc)) + if (copy_stream_header(buffer, size, copy_len, obj_desc_header, + obj_desc_header_size, &timeline->obj_header_btc)) return -1; + if (copy_stream_header(buffer, size, copy_len, aux_desc_header, + aux_desc_header_size, &timeline->aux_header_btc)) + return -1; +#if MALI_USE_CSF if (copy_stream_header(buffer, size, copy_len, - aux_desc_header, - aux_desc_header_size, - &timeline->aux_header_btc)) + timeline->csf_tl_reader.tl_header.data, + timeline->csf_tl_reader.tl_header.size, + &timeline->csf_tl_reader.tl_header.btc)) return -1; +#endif return 0; } - /** * kbasep_timeline_io_read - copy data from streams to buffer provided by user + * * @filp: Pointer to file structure * @buffer: Pointer to the buffer provided by user * @size: Maximum amount of data that can be stored in the buffer @@ -165,11 +185,8 @@ static inline int kbasep_timeline_copy_headers( * * Return: number of bytes stored in the buffer */ -static ssize_t kbasep_timeline_io_read( - struct file *filp, - char __user *buffer, - size_t size, - loff_t *f_pos) +static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer, + size_t size, loff_t *f_pos) { ssize_t copy_len = 0; struct kbase_timeline *timeline; @@ -180,25 +197,25 @@ static ssize_t kbasep_timeline_io_read( if (WARN_ON(!filp->private_data)) return -EFAULT; - timeline = (struct kbase_timeline *) filp->private_data; + timeline = (struct kbase_timeline *)filp->private_data; if (!buffer) return -EINVAL; - if ((*f_pos < 0) || (size < PACKET_SIZE)) + if (*f_pos < 0) return -EINVAL; mutex_lock(&timeline->reader_lock); while (copy_len < size) { struct kbase_tlstream *stream = NULL; - unsigned int rb_idx_raw = 0; - unsigned int wb_idx_raw; - unsigned int rb_idx; - size_t rb_size; + unsigned int rb_idx_raw = 0; + unsigned int wb_idx_raw; + unsigned int rb_idx; + size_t rb_size; - if (kbasep_timeline_copy_headers( - timeline, buffer, size, ©_len)) { + if (kbasep_timeline_copy_headers(timeline, buffer, size, + ©_len)) { copy_len = -EFAULT; break; } @@ -210,17 +227,13 @@ static ssize_t kbasep_timeline_io_read( */ if (copy_len > 0) { if (!kbasep_timeline_io_packet_pending( - timeline, - &stream, - &rb_idx_raw)) + timeline, &stream, &rb_idx_raw)) break; } else { if (wait_event_interruptible( - timeline->event_queue, - kbasep_timeline_io_packet_pending( - timeline, - &stream, - &rb_idx_raw))) { + timeline->event_queue, + kbasep_timeline_io_packet_pending( + timeline, &stream, &rb_idx_raw))) { copy_len = -ERESTARTSYS; break; } @@ -238,10 +251,8 @@ static ssize_t kbasep_timeline_io_read( rb_size = atomic_read(&stream->buffer[rb_idx].size); if (rb_size > size - copy_len) break; - if (copy_to_user( - &buffer[copy_len], - stream->buffer[rb_idx].data, - rb_size)) { + if (copy_to_user(&buffer[copy_len], stream->buffer[rb_idx].data, + rb_size)) { copy_len = -EFAULT; break; } @@ -283,7 +294,7 @@ static ssize_t kbasep_timeline_io_read( static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) { struct kbase_tlstream *stream; - unsigned int rb_idx; + unsigned int rb_idx; struct kbase_timeline *timeline; KBASE_DEBUG_ASSERT(filp); @@ -292,7 +303,11 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) if (WARN_ON(!filp->private_data)) return -EFAULT; - timeline = (struct kbase_timeline *) filp->private_data; + timeline = (struct kbase_timeline *)filp->private_data; + + /* If there are header bytes to copy, read will not block */ + if (kbasep_timeline_has_header_data(timeline)) + return POLLIN; poll_wait(filp, &timeline->event_queue, wait); if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx)) @@ -310,6 +325,8 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait) static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) { struct kbase_timeline *timeline; + ktime_t elapsed_time; + s64 elapsed_time_ms, time_to_sleep; KBASE_DEBUG_ASSERT(inode); KBASE_DEBUG_ASSERT(filp); @@ -317,8 +334,23 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) CSTD_UNUSED(inode); - timeline = (struct kbase_timeline *) filp->private_data; + timeline = (struct kbase_timeline *)filp->private_data; + + /* Get the amount of time passed since the timeline was acquired and ensure + * we sleep for long enough such that it has been at least + * TIMELINE_HYSTERESIS_TIMEOUT_MS amount of time between acquire and release. + * This prevents userspace from spamming acquire and release too quickly. + */ + elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time); + elapsed_time_ms = ktime_to_ms(elapsed_time); + time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS, + TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms); + if (time_to_sleep > 0) + msleep(time_to_sleep); +#if MALI_USE_CSF + kbase_csf_tl_reader_stop(&timeline->csf_tl_reader); +#endif /* Stop autoflush timer before releasing access to streams. */ atomic_set(&timeline->autoflush_timer_active, 0); @@ -327,3 +359,20 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp) atomic_set(timeline->timeline_flags, 0); return 0; } + +static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) +{ + struct kbase_timeline *timeline; + + CSTD_UNUSED(start); + CSTD_UNUSED(end); + CSTD_UNUSED(datasync); + + if (WARN_ON(!filp->private_data)) + return -EFAULT; + + timeline = (struct kbase_timeline *)filp->private_data; + + return kbase_timeline_streams_flush(timeline); +} diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h index 3596584..bf2c385 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_timeline_priv.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #if !defined(_KBASE_TIMELINE_PRIV_H) @@ -26,14 +25,25 @@ #include #include "mali_kbase_tlstream.h" +#if MALI_USE_CSF +#include "csf/mali_kbase_csf_tl_reader.h" +#include "csf/mali_kbase_csf_trace_buffer.h" +#endif #include #include #include +/* The minimum amount of time timeline must be acquired for before release is + * allowed, to prevent DoS attacks. + */ +#define TIMELINE_HYSTERESIS_TIMEOUT_MS ((s64)500) + /** * struct kbase_timeline - timeline state structure * @streams: The timeline streams generated by kernel + * @tl_kctx_list: List of contexts for timeline. + * @tl_kctx_list_lock: Lock to protect @tl_kctx_list. * @autoflush_timer: Autoflush timer * @autoflush_timer_active: If non-zero autoflush timer is active * @reader_lock: Reader lock. Only one reader is allowed to @@ -44,9 +54,13 @@ * otherwise. See kbase_timeline_io_acquire(). * @obj_header_btc: Remaining bytes to copy for the object stream header * @aux_header_btc: Remaining bytes to copy for the aux stream header + * @last_acquire_time: The time at which timeline was last acquired. + * @csf_tl_reader: CSFFW timeline reader */ struct kbase_timeline { struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT]; + struct list_head tl_kctx_list; + struct mutex tl_kctx_list_lock; struct timer_list autoflush_timer; atomic_t autoflush_timer_active; struct mutex reader_lock; @@ -57,6 +71,10 @@ struct kbase_timeline { atomic_t *timeline_flags; size_t obj_header_btc; size_t aux_header_btc; + ktime_t last_acquire_time; +#if MALI_USE_CSF + struct kbase_csf_tl_reader csf_tl_reader; +#endif }; extern const struct file_operations kbasep_tlstream_fops; diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h index 3e37827..30d120d 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tl_serialize.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #if !defined(_KBASE_TL_SERIALIZE_H) diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c index bec4be7..0275bad 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #include "mali_kbase_tlstream.h" @@ -57,20 +56,19 @@ static void kbasep_packet_header_setup( * @numbered: non-zero if the stream is numbered * * Function updates mutable part of packet header in the given buffer. - * Note that value of data_size must not including size of the header. + * Note that value of data_size must not include size of the header. */ static void kbasep_packet_header_update( char *buffer, size_t data_size, int numbered) { - u32 word0; u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered); KBASE_DEBUG_ASSERT(buffer); - CSTD_UNUSED(word0); - memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); + /* we copy the contents of word1 to its respective position in the buffer */ + memcpy(&buffer[sizeof(u32)], &word1, sizeof(word1)); } /** @@ -131,6 +129,14 @@ static const struct { TL_PACKET_TYPE_BODY, TL_STREAM_ID_KERNEL, }, +#if MALI_USE_CSF + { + TL_PACKET_FAMILY_TL, + TL_PACKET_CLASS_OBJ, + TL_PACKET_TYPE_BODY, + TL_STREAM_ID_CSFFW, + }, +#endif }; void kbase_tlstream_init( @@ -141,12 +147,12 @@ void kbase_tlstream_init( unsigned int i; KBASE_DEBUG_ASSERT(stream); - KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + KBASE_DEBUG_ASSERT(stream_type < TL_STREAM_TYPE_COUNT); spin_lock_init(&stream->lock); /* All packets carrying tracepoints shall be numbered. */ - if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) + if (tl_stream_cfg[stream_type].pkt_type == TL_PACKET_TYPE_BODY) stream->numbered = 1; else stream->numbered = 0; @@ -209,7 +215,8 @@ static size_t kbasep_tlstream_msgbuf_submit( /* Increasing write buffer index will expose this packet to the reader. * As stream->lock is not taken on reader side we must make sure memory - * is updated correctly before this will happen. */ + * is updated correctly before this will happen. + */ smp_wmb(); atomic_inc(&stream->wbi); @@ -243,7 +250,7 @@ char *kbase_tlstream_msgbuf_acquire( wb_size = atomic_read(&stream->buffer[wb_idx].size); /* Select next buffer if data will not fit into current one. */ - if (PACKET_SIZE < wb_size + msg_size) { + if (wb_size + msg_size > PACKET_SIZE) { wb_size = kbasep_tlstream_msgbuf_submit( stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; @@ -269,7 +276,7 @@ void kbase_tlstream_msgbuf_release( spin_unlock_irqrestore(&stream->lock, flags); } -void kbase_tlstream_flush_stream( +size_t kbase_tlstream_flush_stream( struct kbase_tlstream *stream) { unsigned long flags; @@ -278,6 +285,7 @@ void kbase_tlstream_flush_stream( size_t wb_size; size_t min_size = PACKET_HEADER_SIZE; + if (stream->numbered) min_size += PACKET_NUMBER_SIZE; @@ -292,7 +300,14 @@ void kbase_tlstream_flush_stream( stream, wb_idx_raw, wb_size); wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; atomic_set(&stream->buffer[wb_idx].size, wb_size); + } else { + /* we return that there is no bytes to be read.*/ + /* Timeline io fsync will use this info the decide whether + * fsync should return an error + */ + wb_size = 0; } + spin_unlock_irqrestore(&stream->lock, flags); + return wb_size; } - diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h index 427bb09..6660cf5 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tlstream.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2015-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #if !defined(_KBASE_TLSTREAM_H) @@ -44,6 +43,8 @@ * struct kbase_tlstream - timeline stream structure * @lock: Message order lock * @buffer: Array of buffers + * @buffer.size: Number of bytes in buffer + * @buffer.data: Buffer's data * @wbi: Write buffer index * @rbi: Read buffer index * @numbered: If non-zero stream's packets are sequentially numbered @@ -76,8 +77,8 @@ struct kbase_tlstream { spinlock_t lock; struct { - atomic_t size; /* number of bytes in buffer */ - char data[PACKET_SIZE]; /* buffer's data */ + atomic_t size; + char data[PACKET_SIZE]; } buffer[PACKET_COUNT]; atomic_t wbi; @@ -97,6 +98,9 @@ enum tl_stream_type { TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST, TL_STREAM_TYPE_OBJ, TL_STREAM_TYPE_AUX, +#if MALI_USE_CSF + TL_STREAM_TYPE_CSFFW, +#endif TL_STREAM_TYPE_COUNT }; @@ -159,8 +163,10 @@ void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream, * @stream: Pointer to the stream structure * * Flush pending data in the timeline stream. + * + * Return: Number of bytes available flushed and available to be read + * */ -void kbase_tlstream_flush_stream(struct kbase_tlstream *stream); +size_t kbase_tlstream_flush_stream(struct kbase_tlstream *stream); #endif /* _KBASE_TLSTREAM_H */ - diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c index e445a3a..2c0de01 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -66,10 +65,11 @@ enum tl_msg_id_obj { KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, KBASE_TL_EVENT_ATOM_SOFTJOB_START, KBASE_TL_EVENT_ATOM_SOFTJOB_END, - KBASE_TL_EVENT_ARB_GRANTED, - KBASE_TL_EVENT_ARB_STARTED, - KBASE_TL_EVENT_ARB_STOP_REQUESTED, - KBASE_TL_EVENT_ARB_STOPPED, + KBASE_TL_ARBITER_GRANTED, + KBASE_TL_ARBITER_STARTED, + KBASE_TL_ARBITER_STOP_REQUESTED, + KBASE_TL_ARBITER_STOPPED, + KBASE_TL_ARBITER_REQUESTED, KBASE_JD_GPU_SOFT_RESET, KBASE_TL_KBASE_NEW_DEVICE, KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, @@ -82,18 +82,13 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_DEL_KCPUQUEUE, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT, - KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, - KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, - KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, - KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, - KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, - KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, - KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, - KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, - KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, + KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, @@ -107,8 +102,6 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, - KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, - KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, @@ -123,8 +116,11 @@ enum tl_msg_id_obj { KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, - KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, + KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, + KBASE_TL_KBASE_CSFFW_RESET, KBASE_OBJ_MSG_COUNT, }; @@ -139,6 +135,7 @@ enum tl_msg_id_aux { KBASE_AUX_PROTECTED_LEAVE_START, KBASE_AUX_PROTECTED_LEAVE_END, KBASE_AUX_JIT_STATS, + KBASE_AUX_TILER_HEAP_STATS, KBASE_AUX_EVENT_JOB_SLOT, KBASE_AUX_MSG_COUNT, }; @@ -276,22 +273,26 @@ enum tl_msg_id_aux { "atom soft job has completed", \ "@p", \ "atom") \ - TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_GRANTED, \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_GRANTED, \ "Arbiter has granted gpu access", \ "@p", \ "gpu") \ - TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STARTED, \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_STARTED, \ "Driver is running again and able to process jobs", \ "@p", \ "gpu") \ - TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOP_REQUESTED, \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_STOP_REQUESTED, \ "Arbiter has requested driver to stop using gpu", \ "@p", \ "gpu") \ - TRACEPOINT_DESC(KBASE_TL_EVENT_ARB_STOPPED, \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_STOPPED, \ "Driver has stopped using gpu", \ "@p", \ "gpu") \ + TRACEPOINT_DESC(KBASE_TL_ARBITER_REQUESTED, \ + "Driver has requested the arbiter for gpu access", \ + "@p", \ + "gpu") \ TRACEPOINT_DESC(KBASE_JD_GPU_SOFT_RESET, \ "gpu soft reset", \ "@p", \ @@ -340,42 +341,14 @@ enum tl_msg_id_aux { "KCPU Queue enqueues Wait on Fence", \ "@pp", \ "kcpu_queue,fence") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ - "Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ - "Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ - "@pLI", \ - "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ - "End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \ - "Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \ - "Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \ + "KCPU Queue enqueues Wait on Cross Queue Sync Object", \ + "@pLII", \ + "kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \ + "KCPU Queue enqueues Set on Cross Queue Sync Object", \ "@pL", \ "kcpu_queue,cqs_obj_gpu_addr") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \ - "End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ - "Begin array of KCPU Queue enqueues Debug Copy", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ - "Array item of KCPU Queue enqueues Debug Copy", \ - "@pL", \ - "kcpu_queue,debugcopy_dst_size") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \ - "End array of KCPU Queue enqueues Debug Copy", \ - "@p", \ - "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \ "KCPU Queue enqueues Map Import", \ "@pL", \ @@ -388,6 +361,14 @@ enum tl_msg_id_aux { "KCPU Queue enqueues Unmap Import ignoring reference count", \ "@pL", \ "kcpu_queue,map_import_buf_gpu_addr") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \ + "KCPU Queue enqueues Error Barrier", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \ + "KCPU Queue enqueues Group Suspend", \ + "@ppI", \ + "kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \ "Begin array of KCPU Queue enqueues JIT Alloc", \ "@p", \ @@ -418,60 +399,52 @@ enum tl_msg_id_aux { "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \ "KCPU Queue ends a Signal on Fence", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \ "KCPU Queue starts a Wait on Fence", \ "@p", \ "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \ "KCPU Queue ends a Wait on Fence", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \ "KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \ "@p", \ "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \ "KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \ "KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \ - "KCPU Queue starts an array of Debug Copys", \ - "@p", \ - "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \ - "KCPU Queue ends an array of Debug Copys", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \ "KCPU Queue starts a Map Import", \ "@p", \ "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \ "KCPU Queue ends a Map Import", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \ "KCPU Queue starts an Unmap Import", \ "@p", \ "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \ "KCPU Queue ends an Unmap Import", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \ "KCPU Queue starts an Unmap Import ignoring reference count", \ "@p", \ "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \ "KCPU Queue ends an Unmap Import ignoring reference count", \ - "@p", \ - "kcpu_queue") \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \ "KCPU Queue starts an array of JIT Allocs", \ "@p", \ @@ -482,8 +455,8 @@ enum tl_msg_id_aux { "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "Array item of KCPU Queue ends an array of JIT Allocs", \ - "@pLL", \ - "kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ + "@pILL", \ + "kcpu_queue,execute_error,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \ "End array of KCPU Queue ends an array of JIT Allocs", \ "@p", \ @@ -498,20 +471,32 @@ enum tl_msg_id_aux { "kcpu_queue") \ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "Array item of KCPU Queue ends an array of JIT Frees", \ - "@pL", \ - "kcpu_queue,jit_free_pages_used") \ + "@pIL", \ + "kcpu_queue,execute_error,jit_free_pages_used") \ TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \ "End array of KCPU Queue ends an array of JIT Frees", \ "@p", \ "kcpu_queue") \ - TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, \ "KCPU Queue executes an Error Barrier", \ "@p", \ "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, \ + "KCPU Queue starts a group suspend", \ + "@p", \ + "kcpu_queue") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, \ + "KCPU Queue ends a group suspend", \ + "@pI", \ + "kcpu_queue,execute_error") \ TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \ "An overflow has happened with the CSFFW Timeline stream", \ "@LL", \ "csffw_timestamp,csffw_cycle") \ + TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_RESET, \ + "A reset has happened with the CSFFW", \ + "@L", \ + "csffw_cycle") \ #define MIPE_HEADER_BLOB_VAR_NAME __obj_desc_header #define MIPE_HEADER_STREAM_ID TL_STREAM_ID_KERNEL @@ -561,6 +546,10 @@ const size_t obj_desc_header_size = sizeof(__obj_desc_header); "per-bin JIT statistics", \ "@IIIIII", \ "ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \ + TRACEPOINT_DESC(KBASE_AUX_TILER_HEAP_STATS, \ + "Tiler Heap statistics", \ + "@ILIIIIIII", \ + "ctx_nr,heap_id,va_pages,ph_pages,max_chunks,chunk_size,chunk_count,target_in_flight,nr_in_flight") \ TRACEPOINT_DESC(KBASE_AUX_EVENT_JOB_SLOT, \ "event on a given job slot", \ "@pIII", \ @@ -1493,11 +1482,33 @@ void __kbase_tlstream_tl_event_atom_softjob_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_arb_granted( +void __kbase_tlstream_tl_arbiter_granted( + struct kbase_tlstream *stream, + const void *gpu) +{ + const u32 msg_id = KBASE_TL_ARBITER_GRANTED; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(gpu) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu, sizeof(gpu)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_arbiter_started( struct kbase_tlstream *stream, const void *gpu) { - const u32 msg_id = KBASE_TL_EVENT_ARB_GRANTED; + const u32 msg_id = KBASE_TL_ARBITER_STARTED; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(gpu) ; @@ -1515,11 +1526,11 @@ void __kbase_tlstream_tl_event_arb_granted( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_arb_started( +void __kbase_tlstream_tl_arbiter_stop_requested( struct kbase_tlstream *stream, const void *gpu) { - const u32 msg_id = KBASE_TL_EVENT_ARB_STARTED; + const u32 msg_id = KBASE_TL_ARBITER_STOP_REQUESTED; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(gpu) ; @@ -1537,11 +1548,11 @@ void __kbase_tlstream_tl_event_arb_started( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_arb_stop_requested( +void __kbase_tlstream_tl_arbiter_stopped( struct kbase_tlstream *stream, const void *gpu) { - const u32 msg_id = KBASE_TL_EVENT_ARB_STOP_REQUESTED; + const u32 msg_id = KBASE_TL_ARBITER_STOPPED; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(gpu) ; @@ -1559,11 +1570,11 @@ void __kbase_tlstream_tl_event_arb_stop_requested( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_event_arb_stopped( +void __kbase_tlstream_tl_arbiter_requested( struct kbase_tlstream *stream, const void *gpu) { - const u32 msg_id = KBASE_TL_EVENT_ARB_STOPPED; + const u32 msg_id = KBASE_TL_ARBITER_REQUESTED; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(gpu) ; @@ -1837,6 +1848,60 @@ void __kbase_tlstream_aux_jit_stats( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_aux_tiler_heap_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 heap_id, + u32 va_pages, + u32 ph_pages, + u32 max_chunks, + u32 chunk_size, + u32 chunk_count, + u32 target_in_flight, + u32 nr_in_flight) +{ + const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(ctx_nr) + + sizeof(heap_id) + + sizeof(va_pages) + + sizeof(ph_pages) + + sizeof(max_chunks) + + sizeof(chunk_size) + + sizeof(chunk_count) + + sizeof(target_in_flight) + + sizeof(nr_in_flight) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &ctx_nr, sizeof(ctx_nr)); + pos = kbasep_serialize_bytes(buffer, + pos, &heap_id, sizeof(heap_id)); + pos = kbasep_serialize_bytes(buffer, + pos, &va_pages, sizeof(va_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &ph_pages, sizeof(ph_pages)); + pos = kbasep_serialize_bytes(buffer, + pos, &max_chunks, sizeof(max_chunks)); + pos = kbasep_serialize_bytes(buffer, + pos, &chunk_size, sizeof(chunk_size)); + pos = kbasep_serialize_bytes(buffer, + pos, &chunk_count, sizeof(chunk_count)); + pos = kbasep_serialize_bytes(buffer, + pos, &target_in_flight, sizeof(target_in_flight)); + pos = kbasep_serialize_bytes(buffer, + pos, &nr_in_flight, sizeof(nr_in_flight)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + void __kbase_tlstream_aux_event_job_slot( struct kbase_tlstream *stream, const void *ctx, @@ -2161,39 +2226,19 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr, - u32 cqs_obj_compare_value) + u32 cqs_obj_compare_value, + u32 cqs_obj_inherit_error) { - const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(cqs_obj_gpu_addr) + sizeof(cqs_obj_compare_value) + + sizeof(cqs_obj_inherit_error) ; char *buffer; unsigned long acq_flags; @@ -2209,60 +2254,18 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr)); pos = kbasep_serialize_bytes(buffer, pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); + pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr) { - const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(cqs_obj_gpu_addr) @@ -2283,13 +2286,15 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u64 map_import_buf_gpu_addr) { - const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(map_import_buf_gpu_addr) ; char *buffer; unsigned long acq_flags; @@ -2301,41 +2306,21 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( struct kbase_tlstream *stream, const void *kcpu_queue, - u64 debugcopy_dst_size) + u64 map_import_buf_gpu_addr) { - const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) - + sizeof(debugcopy_dst_size) + + sizeof(map_import_buf_gpu_addr) ; char *buffer; unsigned long acq_flags; @@ -2348,39 +2333,17 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); pos = kbasep_serialize_bytes(buffer, - pos, &debugcopy_dst_size, sizeof(debugcopy_dst_size)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); + pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( struct kbase_tlstream *stream, const void *kcpu_queue, u64 map_import_buf_gpu_addr) { - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + sizeof(map_import_buf_gpu_addr) @@ -2401,15 +2364,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 map_import_buf_gpu_addr) + const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) - + sizeof(map_import_buf_gpu_addr) ; char *buffer; unsigned long acq_flags; @@ -2421,21 +2382,21 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); - pos = kbasep_serialize_bytes(buffer, - pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( struct kbase_tlstream *stream, const void *kcpu_queue, - u64 map_import_buf_gpu_addr) + const void *group_suspend_buf, + u32 gpu_cmdq_grp_handle) { - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) - + sizeof(map_import_buf_gpu_addr) + + sizeof(group_suspend_buf) + + sizeof(gpu_cmdq_grp_handle) ; char *buffer; unsigned long acq_flags; @@ -2448,7 +2409,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); pos = kbasep_serialize_bytes(buffer, - pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr)); + pos, &group_suspend_buf, sizeof(group_suspend_buf)); + pos = kbasep_serialize_bytes(buffer, + pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2649,11 +2612,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2665,6 +2630,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2693,11 +2660,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2709,6 +2678,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2737,11 +2708,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2753,17 +2726,21 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2775,50 +2752,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); - pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); - - kbase_tlstream_msgbuf_release(stream, acq_flags); -} - -void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( - struct kbase_tlstream *stream, - const void *kcpu_queue) -{ - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END; - const size_t msg_size = sizeof(msg_id) + sizeof(u64) - + sizeof(kcpu_queue) - ; - char *buffer; - unsigned long acq_flags; - size_t pos = 0; - - buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); - - pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, - pos, &kcpu_queue, sizeof(kcpu_queue)); + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2847,11 +2782,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2863,6 +2800,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2891,11 +2830,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2907,6 +2848,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -2935,11 +2878,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( struct kbase_tlstream *stream, - const void *kcpu_queue) + const void *kcpu_queue, + u32 execute_error) { const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -2951,6 +2896,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -3002,12 +2949,14 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( struct kbase_tlstream *stream, const void *kcpu_queue, + u32 execute_error, u64 jit_alloc_gpu_alloc_addr, u64 jit_alloc_mmu_flags) { const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) + sizeof(jit_alloc_gpu_alloc_addr) + sizeof(jit_alloc_mmu_flags) ; @@ -3021,6 +2970,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); pos = kbasep_serialize_bytes(buffer, pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr)); pos = kbasep_serialize_bytes(buffer, @@ -3098,11 +3049,13 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( struct kbase_tlstream *stream, const void *kcpu_queue, + u32 execute_error, u64 jit_free_pages_used) { const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) + sizeof(jit_free_pages_used) ; char *buffer; @@ -3115,6 +3068,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); pos = kbasep_serialize_bytes(buffer, pos, &jit_free_pages_used, sizeof(jit_free_pages_used)); @@ -3143,13 +3098,59 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( kbase_tlstream_msgbuf_release(stream, acq_flags); } -void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( + struct kbase_tlstream *stream, + const void *kcpu_queue) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( struct kbase_tlstream *stream, const void *kcpu_queue) { - const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER; + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(kcpu_queue) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &kcpu_queue, sizeof(kcpu_queue)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 execute_error) +{ + const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END; const size_t msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(kcpu_queue) + + sizeof(execute_error) ; char *buffer; unsigned long acq_flags; @@ -3161,6 +3162,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( pos = kbasep_serialize_timestamp(buffer, pos); pos = kbasep_serialize_bytes(buffer, pos, &kcpu_queue, sizeof(kcpu_queue)); + pos = kbasep_serialize_bytes(buffer, + pos, &execute_error, sizeof(execute_error)); kbase_tlstream_msgbuf_release(stream, acq_flags); } @@ -3191,4 +3194,26 @@ void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( kbase_tlstream_msgbuf_release(stream, acq_flags); } +void __kbase_tlstream_tl_kbase_csffw_reset( + struct kbase_tlstream *stream, + u64 csffw_cycle) +{ + const u32 msg_id = KBASE_TL_KBASE_CSFFW_RESET; + const size_t msg_size = sizeof(msg_id) + sizeof(u64) + + sizeof(csffw_cycle) + ; + char *buffer; + unsigned long acq_flags; + size_t pos = 0; + + buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags); + + pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_serialize_timestamp(buffer, pos); + pos = kbasep_serialize_bytes(buffer, + pos, &csffw_cycle, sizeof(csffw_cycle)); + + kbase_tlstream_msgbuf_release(stream, acq_flags); +} + /* clang-format on */ diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h index 7ea8ba8..887a1aa 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h +++ b/dvalin/kernel/drivers/gpu/arm/midgard/tl/mali_kbase_tracepoints.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /* @@ -226,16 +225,19 @@ void __kbase_tlstream_tl_event_atom_softjob_start( void __kbase_tlstream_tl_event_atom_softjob_end( struct kbase_tlstream *stream, const void *atom); -void __kbase_tlstream_tl_event_arb_granted( +void __kbase_tlstream_tl_arbiter_granted( + struct kbase_tlstream *stream, + const void *gpu); +void __kbase_tlstream_tl_arbiter_started( struct kbase_tlstream *stream, const void *gpu); -void __kbase_tlstream_tl_event_arb_started( +void __kbase_tlstream_tl_arbiter_stop_requested( struct kbase_tlstream *stream, const void *gpu); -void __kbase_tlstream_tl_event_arb_stop_requested( +void __kbase_tlstream_tl_arbiter_stopped( struct kbase_tlstream *stream, const void *gpu); -void __kbase_tlstream_tl_event_arb_stopped( +void __kbase_tlstream_tl_arbiter_requested( struct kbase_tlstream *stream, const void *gpu); void __kbase_tlstream_jd_gpu_soft_reset( @@ -277,6 +279,17 @@ void __kbase_tlstream_aux_jit_stats( u32 allocs, u32 va_pages, u32 ph_pages); +void __kbase_tlstream_aux_tiler_heap_stats( + struct kbase_tlstream *stream, + u32 ctx_nr, + u64 heap_id, + u32 va_pages, + u32 ph_pages, + u32 max_chunks, + u32 chunk_size, + u32 chunk_count, + u32 target_in_flight, + u32 nr_in_flight); void __kbase_tlstream_aux_event_job_slot( struct kbase_tlstream *stream, const void *ctx, @@ -328,37 +341,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( struct kbase_tlstream *stream, const void *kcpu_queue, const void *fence); -void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_wait( +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr, - u32 cqs_obj_compare_value); -void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_wait( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_cqs_set( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_cqs_set( + u32 cqs_obj_compare_value, + u32 cqs_obj_inherit_error); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( struct kbase_tlstream *stream, const void *kcpu_queue, u64 cqs_obj_gpu_addr); -void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_cqs_set( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue, - u64 debugcopy_dst_size); -void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_debugcopy( - struct kbase_tlstream *stream, - const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( struct kbase_tlstream *stream, const void *kcpu_queue, @@ -371,6 +363,14 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( struct kbase_tlstream *stream, const void *kcpu_queue, u64 map_import_buf_gpu_addr); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( + struct kbase_tlstream *stream, + const void *kcpu_queue, + const void *group_suspend_buf, + u32 gpu_cmdq_grp_handle); void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( struct kbase_tlstream *stream, const void *kcpu_queue); @@ -404,46 +404,47 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( struct kbase_tlstream *stream, const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( struct kbase_tlstream *stream, const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_start( - struct kbase_tlstream *stream, - const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_kcpuqueue_execute_debugcopy_end( - struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( struct kbase_tlstream *stream, const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( struct kbase_tlstream *stream, const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( struct kbase_tlstream *stream, const void *kcpu_queue); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( struct kbase_tlstream *stream, - const void *kcpu_queue); + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( struct kbase_tlstream *stream, const void *kcpu_queue); @@ -453,6 +454,7 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( struct kbase_tlstream *stream, const void *kcpu_queue, + u32 execute_error, u64 jit_alloc_gpu_alloc_addr, u64 jit_alloc_mmu_flags); void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( @@ -467,17 +469,28 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( struct kbase_tlstream *stream, const void *kcpu_queue, + u32 execute_error, u64 jit_free_pages_used); void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( struct kbase_tlstream *stream, const void *kcpu_queue); -void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier( +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( + struct kbase_tlstream *stream, + const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( struct kbase_tlstream *stream, const void *kcpu_queue); +void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( + struct kbase_tlstream *stream, + const void *kcpu_queue, + u32 execute_error); void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( struct kbase_tlstream *stream, u64 csffw_timestamp, u64 csffw_cycle); +void __kbase_tlstream_tl_kbase_csffw_reset( + struct kbase_tlstream *stream, + u64 csffw_cycle); struct kbase_tlstream; @@ -1215,77 +1228,96 @@ struct kbase_tlstream; } while (0) /** - * KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED - + * KBASE_TLSTREAM_TL_ARBITER_GRANTED - * Arbiter has granted gpu access * * @kbdev: Kbase device * @gpu: Name of the GPU object */ -#define KBASE_TLSTREAM_TL_EVENT_ARB_GRANTED( \ +#define KBASE_TLSTREAM_TL_ARBITER_GRANTED( \ kbdev, \ gpu \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_arb_granted( \ + __kbase_tlstream_tl_arbiter_granted( \ __TL_DISPATCH_STREAM(kbdev, obj), \ gpu); \ } while (0) /** - * KBASE_TLSTREAM_TL_EVENT_ARB_STARTED - + * KBASE_TLSTREAM_TL_ARBITER_STARTED - * Driver is running again and able to process jobs * * @kbdev: Kbase device * @gpu: Name of the GPU object */ -#define KBASE_TLSTREAM_TL_EVENT_ARB_STARTED( \ +#define KBASE_TLSTREAM_TL_ARBITER_STARTED( \ kbdev, \ gpu \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_arb_started( \ + __kbase_tlstream_tl_arbiter_started( \ __TL_DISPATCH_STREAM(kbdev, obj), \ gpu); \ } while (0) /** - * KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED - + * KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED - * Arbiter has requested driver to stop using gpu * * @kbdev: Kbase device * @gpu: Name of the GPU object */ -#define KBASE_TLSTREAM_TL_EVENT_ARB_STOP_REQUESTED( \ +#define KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED( \ kbdev, \ gpu \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_arb_stop_requested( \ + __kbase_tlstream_tl_arbiter_stop_requested( \ __TL_DISPATCH_STREAM(kbdev, obj), \ gpu); \ } while (0) /** - * KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED - + * KBASE_TLSTREAM_TL_ARBITER_STOPPED - * Driver has stopped using gpu * * @kbdev: Kbase device * @gpu: Name of the GPU object */ -#define KBASE_TLSTREAM_TL_EVENT_ARB_STOPPED( \ +#define KBASE_TLSTREAM_TL_ARBITER_STOPPED( \ kbdev, \ gpu \ ) \ do { \ int enabled = atomic_read(&kbdev->timeline_flags); \ if (enabled & TLSTREAM_ENABLED) \ - __kbase_tlstream_tl_event_arb_stopped( \ + __kbase_tlstream_tl_arbiter_stopped( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + gpu); \ + } while (0) + +/** + * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - + * Driver has requested the arbiter for gpu access + * + * @kbdev: Kbase device + * @gpu: Name of the GPU object + */ +#define KBASE_TLSTREAM_TL_ARBITER_REQUESTED( \ + kbdev, \ + gpu \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_tl_arbiter_requested( \ __TL_DISPATCH_STREAM(kbdev, obj), \ gpu); \ } while (0) @@ -1499,6 +1531,42 @@ struct kbase_tlstream; ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages); \ } while (0) +/** + * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS - + * Tiler Heap statistics + * + * @kbdev: Kbase device + * @ctx_nr: Kernel context number + * @heap_id: Unique id used to represent a heap under a context + * @va_pages: Number of virtual pages allocated in this bin + * @ph_pages: Number of physical pages allocated in this bin + * @max_chunks: The maximum number of chunks that the heap should be allowed to use + * @chunk_size: Size of each chunk in tiler heap, in bytes + * @chunk_count: The number of chunks currently allocated in the tiler heap + * @target_in_flight: Number of render-passes that the driver should attempt + * to keep in flight for which allocation of new chunks is allowed + * @nr_in_flight: Number of render-passes that are in flight + */ +#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS( \ + kbdev, \ + ctx_nr, \ + heap_id, \ + va_pages, \ + ph_pages, \ + max_chunks, \ + chunk_size, \ + chunk_count, \ + target_in_flight, \ + nr_in_flight \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & TLSTREAM_ENABLED) \ + __kbase_tlstream_aux_tiler_heap_stats( \ + __TL_DISPATCH_STREAM(kbdev, aux), \ + ctx_nr, heap_id, va_pages, ph_pages, max_chunks, chunk_size, chunk_count, target_in_flight, nr_in_flight); \ + } while (0) + /** * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - * event on a given job slot @@ -1534,6 +1602,22 @@ struct kbase_tlstream; * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports * @kbase_device_as_count: The number of address spaces the physical hardware has available */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ + kbdev, \ + kbase_device_id, \ + kbase_device_gpu_core_count, \ + kbase_device_max_num_csgs, \ + kbase_device_as_count \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_new_device( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE( \ kbdev, \ kbase_device_id, \ @@ -1542,6 +1626,7 @@ struct kbase_tlstream; kbase_device_as_count \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - @@ -1552,6 +1637,21 @@ struct kbase_tlstream; * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + gpu_cmdq_grp_handle, \ + kbase_device_csg_slot_index \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_device_program_csg( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kbase_device_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG( \ kbdev, \ kbase_device_id, \ @@ -1559,6 +1659,7 @@ struct kbase_tlstream; kbase_device_csg_slot_index \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - @@ -1568,12 +1669,27 @@ struct kbase_tlstream; * @kbase_device_id: The id of the physical hardware * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ + kbdev, \ + kbase_device_id, \ + kbase_device_csg_slot_index \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_device_deprogram_csg( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kbase_device_id, kbase_device_csg_slot_index); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG( \ kbdev, \ kbase_device_id, \ kbase_device_csg_slot_index \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - @@ -1583,12 +1699,27 @@ struct kbase_tlstream; * @kernel_ctx_id: Unique ID for the KBase Context * @kbase_device_id: The id of the physical hardware */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_new_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kernel_ctx_id, kbase_device_id); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_NEW_CTX( \ kbdev, \ kernel_ctx_id, \ kbase_device_id \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - @@ -1597,11 +1728,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kernel_ctx_id: Unique ID for the KBase Context */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_del_ctx( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kernel_ctx_id); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_DEL_CTX( \ kbdev, \ kernel_ctx_id \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - @@ -1611,12 +1756,27 @@ struct kbase_tlstream; * @kernel_ctx_id: Unique ID for the KBase Context * @kbase_device_as_index: The index of the device address space being assigned */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ + kbdev, \ + kernel_ctx_id, \ + kbase_device_as_index \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_ctx_assign_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kernel_ctx_id, kbase_device_as_index); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS( \ kbdev, \ kernel_ctx_id, \ kbase_device_as_index \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - @@ -1625,11 +1785,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kernel_ctx_id: Unique ID for the KBase Context */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ + kbdev, \ + kernel_ctx_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_ctx_unassign_as( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kernel_ctx_id); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS( \ kbdev, \ kernel_ctx_id \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - @@ -1641,6 +1815,21 @@ struct kbase_tlstream; * @kcpuq_num_pending_cmds: Number of commands already enqueued * in the KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ + kbdev, \ + kcpu_queue, \ + kernel_ctx_id, \ + kcpuq_num_pending_cmds \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_new_kcpuqueue( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, kernel_ctx_id, kcpuq_num_pending_cmds); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE( \ kbdev, \ kcpu_queue, \ @@ -1648,6 +1837,7 @@ struct kbase_tlstream; kcpuq_num_pending_cmds \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - @@ -1656,11 +1846,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_del_kcpuqueue( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - @@ -1670,12 +1874,27 @@ struct kbase_tlstream; * @kcpu_queue: KCPU queue * @fence: Fence object handle */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ + kbdev, \ + kcpu_queue, \ + fence \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, fence); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL( \ kbdev, \ kcpu_queue, \ fence \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - @@ -1685,183 +1904,244 @@ struct kbase_tlstream; * @kcpu_queue: KCPU queue * @fence: Fence object handle */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ kbdev, \ kcpu_queue, \ fence \ ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT - - * Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, fence); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + fence \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT - - * Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT - + * KCPU Queue enqueues Wait on Cross Queue Sync Object * * @kbdev: Kbase device * @kcpu_queue: KCPU queue - * @cqs_obj_gpu_addr: CQS Object GPU ptr + * @cqs_obj_gpu_addr: CQS Object GPU pointer * @cqs_obj_compare_value: Semaphore value that should be exceeded * for the WAIT to pass + * @cqs_obj_inherit_error: Indicates the error state should be inherited into the queue or not */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr, \ + cqs_obj_compare_value, \ + cqs_obj_inherit_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value, cqs_obj_inherit_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ kbdev, \ kcpu_queue, \ cqs_obj_gpu_addr, \ - cqs_obj_compare_value \ + cqs_obj_compare_value, \ + cqs_obj_inherit_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT - - * End array of KCPU Queue enqueues Wait on Cross Queue Sync Object + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET - + * KCPU Queue enqueues Set on Cross Queue Sync Object * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @cqs_obj_gpu_addr: CQS Object GPU pointer */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + cqs_obj_gpu_addr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, cqs_obj_gpu_addr); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET( \ + kbdev, \ + kcpu_queue, \ + cqs_obj_gpu_addr \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET - - * Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - + * KCPU Queue enqueues Map Import * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU pointer */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, map_import_buf_gpu_addr); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET - - * Array item of KCPU Queue enqueues Set on Cross Queue Sync Object + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - + * KCPU Queue enqueues Unmap Import * * @kbdev: Kbase device * @kcpu_queue: KCPU queue - * @cqs_obj_gpu_addr: CQS Object GPU ptr + * @map_import_buf_gpu_addr: Map import buffer GPU pointer */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ kbdev, \ kcpu_queue, \ - cqs_obj_gpu_addr \ + map_import_buf_gpu_addr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, map_import_buf_gpu_addr); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET - - * End array of KCPU Queue enqueues Set on Cross Queue Sync Object + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - + * KCPU Queue enqueues Unmap Import ignoring reference count * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @map_import_buf_gpu_addr: Map import buffer GPU pointer */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + map_import_buf_gpu_addr \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, map_import_buf_gpu_addr); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ + kbdev, \ + kcpu_queue, \ + map_import_buf_gpu_addr \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY - - * Begin array of KCPU Queue enqueues Debug Copy + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - + * KCPU Queue enqueues Error Barrier * * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY - - * Array item of KCPU Queue enqueues Debug Copy + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - + * KCPU Queue enqueues Group Suspend * * @kbdev: Kbase device * @kcpu_queue: KCPU queue - * @debugcopy_dst_size: Debug Copy destination size - */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ - kbdev, \ - kcpu_queue, \ - debugcopy_dst_size \ - ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY - - * End array of KCPU Queue enqueues Debug Copy - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY( \ - kbdev, \ - kcpu_queue \ - ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - - * KCPU Queue enqueues Map Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @map_import_buf_gpu_addr: Map import buffer GPU ptr - */ -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT( \ - kbdev, \ - kcpu_queue, \ - map_import_buf_gpu_addr \ - ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - - * KCPU Queue enqueues Unmap Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @map_import_buf_gpu_addr: Map import buffer GPU ptr + * @group_suspend_buf: Pointer to the suspend buffer structure + * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace */ -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ kbdev, \ kcpu_queue, \ - map_import_buf_gpu_addr \ + group_suspend_buf, \ + gpu_cmdq_grp_handle \ ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - - * KCPU Queue enqueues Unmap Import ignoring reference count - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - * @map_import_buf_gpu_addr: Map import buffer GPU ptr - */ -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE( \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, group_suspend_buf, gpu_cmdq_grp_handle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND( \ kbdev, \ kcpu_queue, \ - map_import_buf_gpu_addr \ + group_suspend_buf, \ + gpu_cmdq_grp_handle \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - @@ -1870,11 +2150,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - @@ -1902,6 +2196,28 @@ struct kbase_tlstream; * reused. The kernel should attempt to use a previous allocation with the same * usage_id */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_gpu_alloc_addr_dest, \ + jit_alloc_va_pages, \ + jit_alloc_commit_pages, \ + jit_alloc_extent, \ + jit_alloc_jit_id, \ + jit_alloc_bin_id, \ + jit_alloc_max_allocations, \ + jit_alloc_flags, \ + jit_alloc_usage_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, jit_alloc_gpu_alloc_addr_dest, jit_alloc_va_pages, jit_alloc_commit_pages, jit_alloc_extent, jit_alloc_jit_id, jit_alloc_bin_id, jit_alloc_max_allocations, jit_alloc_flags, jit_alloc_usage_id); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ kbdev, \ kcpu_queue, \ @@ -1916,6 +2232,7 @@ struct kbase_tlstream; jit_alloc_usage_id \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - @@ -1924,11 +2241,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - @@ -1937,11 +2268,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - @@ -1952,12 +2297,27 @@ struct kbase_tlstream; * @jit_alloc_jit_id: Unique ID provided by the caller, this is used * to pair allocation and free requests. Zero is not a valid value */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue, \ + jit_alloc_jit_id \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, jit_alloc_jit_id); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE( \ kbdev, \ kcpu_queue, \ jit_alloc_jit_id \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - @@ -1966,11 +2326,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - @@ -1979,11 +2353,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - @@ -1991,12 +2379,29 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - @@ -2005,11 +2410,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - @@ -2017,12 +2436,29 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - @@ -2031,11 +2467,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - @@ -2043,12 +2493,29 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - @@ -2056,51 +2523,56 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START - - * KCPU Queue starts an array of Debug Copys - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_START( \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END - - * KCPU Queue ends an array of Debug Copys + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - + * KCPU Queue starts a Map Import * * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_DEBUGCOPY_END( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ kbdev, \ kcpu_queue \ ) \ - do { } while (0) - -/** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - - * KCPU Queue starts a Map Import - * - * @kbdev: Kbase device - * @kcpu_queue: KCPU queue - */ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - @@ -2108,12 +2580,29 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - @@ -2122,11 +2611,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - @@ -2134,12 +2637,29 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - @@ -2148,11 +2668,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - @@ -2160,12 +2694,29 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ kbdev, \ - kcpu_queue \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - @@ -2174,11 +2725,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - @@ -2187,11 +2752,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - @@ -2199,16 +2778,35 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ kbdev, \ kcpu_queue, \ + execute_error, \ + jit_alloc_gpu_alloc_addr, \ + jit_alloc_mmu_flags \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue, \ + execute_error, \ jit_alloc_gpu_alloc_addr, \ jit_alloc_mmu_flags \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - @@ -2217,11 +2815,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - @@ -2230,11 +2842,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - @@ -2243,11 +2869,25 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - @@ -2255,15 +2895,33 @@ struct kbase_tlstream; * * @kbdev: Kbase device * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero * @jit_free_pages_used: The actual number of pages used by the JIT * allocation */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue, \ + execute_error, \ + jit_free_pages_used \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error, jit_free_pages_used); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ kbdev, \ kcpu_queue, \ + execute_error, \ jit_free_pages_used \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - @@ -2272,24 +2930,109 @@ struct kbase_tlstream; * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /** - * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER - + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER - * KCPU Queue executes an Error Barrier * * @kbdev: Kbase device * @kcpu_queue: KCPU queue */ -#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER( \ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER( \ + kbdev, \ + kcpu_queue \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START - + * KCPU Queue starts a group suspend + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ + kbdev, \ + kcpu_queue \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START( \ kbdev, \ kcpu_queue \ ) \ do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END - + * KCPU Queue ends a group suspend + * + * @kbdev: Kbase device + * @kcpu_queue: KCPU queue + * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + kcpu_queue, execute_error); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END( \ + kbdev, \ + kcpu_queue, \ + execute_error \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ /** * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - @@ -2299,12 +3042,54 @@ struct kbase_tlstream; * @csffw_timestamp: Timestamp of a CSFFW event * @csffw_cycle: Cycle number of a CSFFW event */ +#if MALI_USE_CSF #define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ kbdev, \ csffw_timestamp, \ csffw_cycle \ ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_tlstream_overflow( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_timestamp, csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW( \ + kbdev, \ + csffw_timestamp, \ + csffw_cycle \ + ) \ + do { } while (0) +#endif /* MALI_USE_CSF */ + +/** + * KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET - + * A reset has happened with the CSFFW + * + * @kbdev: Kbase device + * @csffw_cycle: Cycle number of a CSFFW event + */ +#if MALI_USE_CSF +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \ + kbdev, \ + csffw_cycle \ + ) \ + do { \ + int enabled = atomic_read(&kbdev->timeline_flags); \ + if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) \ + __kbase_tlstream_tl_kbase_csffw_reset( \ + __TL_DISPATCH_STREAM(kbdev, obj), \ + csffw_cycle); \ + } while (0) +#else +#define KBASE_TLSTREAM_TL_KBASE_CSFFW_RESET( \ + kbdev, \ + csffw_cycle \ + ) \ do { } while (0) +#endif /* MALI_USE_CSF */ /* Gator tracepoints are hooked into TLSTREAM interface. diff --git a/dvalin/kernel/include/linux/dma-buf-test-exporter.h b/dvalin/kernel/include/linux/dma-buf-test-exporter.h index 95bc6f8..5a310f6 100644 --- a/dvalin/kernel/include/linux/dma-buf-test-exporter.h +++ b/dvalin/kernel/include/linux/dma-buf-test-exporter.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2012-2013, 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2013, 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,11 +17,8 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ - #ifndef _LINUX_DMA_BUF_TEST_EXPORTER_H_ #define _LINUX_DMA_BUF_TEST_EXPORTER_H_ diff --git a/dvalin/kernel/include/linux/memory_group_manager.h b/dvalin/kernel/include/linux/memory_group_manager.h index b1ac253..c0e808b 100644 --- a/dvalin/kernel/include/linux/memory_group_manager.h +++ b/dvalin/kernel/include/linux/memory_group_manager.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _MEMORY_GROUP_MANAGER_H_ diff --git a/dvalin/kernel/include/linux/priority_control_manager.h b/dvalin/kernel/include/linux/priority_control_manager.h new file mode 100644 index 0000000..002e78b --- /dev/null +++ b/dvalin/kernel/include/linux/priority_control_manager.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _PRIORITY_CONTROL_MANAGER_H_ +#define _PRIORITY_CONTROL_MANAGER_H_ + +#include +#include +#include + +struct priority_control_manager_device; + +/** + * struct priority_control_manager_ops - Callbacks for priority control manager operations + * + * @pcm_scheduler_priority_check: Callback to check if scheduling priority level can be requested + */ +struct priority_control_manager_ops { + /** + * pcm_scheduler_priority_check: This function can be used to check what priority its work + * would be treated as based on the requested_priority value. + * + * @pcm_dev: The priority control manager through which the request is + * being made. + * @task: The task struct of the process requesting the priority check. + * @requested_priority: The priority level being requested. + * + * The returned value will be: + * The same as requested_priority if the process has permission to use requested_priority + * A lower priority value if the process does not have permission to use requested_priority + * + * requested_priority has the following value range: + * 0-3 : Priority level, 0 being highest and 3 being lowest + * + * Return: The priority that would actually be given, could be lower than requested_priority + */ + int (*pcm_scheduler_priority_check)( + struct priority_control_manager_device *pcm_dev, + struct task_struct *task, int requested_priority); +}; + +/** + * struct priority_control_manager_device - Device structure for priority + * control manager + * + * @ops: Callbacks associated with this device + * @data: Pointer to device private data + * @owner: Pointer to the module owner + * + * This structure should be registered with the platform device using + * platform_set_drvdata(). + */ +struct priority_control_manager_device { + struct priority_control_manager_ops ops; + void *data; + struct module *owner; +}; + +#endif /* _PRIORITY_CONTROL_MANAGER_H_ */ diff --git a/dvalin/kernel/include/linux/protected_memory_allocator.h b/dvalin/kernel/include/linux/protected_memory_allocator.h index 3b9205b..1e05bb8 100644 --- a/dvalin/kernel/include/linux/protected_memory_allocator.h +++ b/dvalin/kernel/include/linux/protected_memory_allocator.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _PROTECTED_MEMORY_ALLOCATOR_H_ diff --git a/dvalin/kernel/include/linux/protected_mode_switcher.h b/dvalin/kernel/include/linux/protected_mode_switcher.h index 8778d81..1a3c5f7 100644 --- a/dvalin/kernel/include/linux/protected_mode_switcher.h +++ b/dvalin/kernel/include/linux/protected_mode_switcher.h @@ -1,11 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2017 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ #ifndef _PROTECTED_MODE_SWITCH_H_ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h new file mode 100644 index 0000000..72572e5 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -0,0 +1,765 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_CSF_KERNEL_H_ +#define _UAPI_BASE_CSF_KERNEL_H_ + +#include + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* IN/OUT */ +/* Should be cached on the CPU, returned if actually cached + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the alloc + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/* CSF event memory + * + * If Outer shareable coherence is not specified or not available, then on + * allocation kbase will automatically use the uncached GPU mapping. + * There is no need for the client to specify BASE_MEM_UNCACHED_GPU + * themselves when allocating memory with the BASE_MEM_CSF_EVENT flag. + * + * This memory requires a permanent mapping + * + * See also kbase_reg_needs_kernel_mapping() + */ +#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19) + +#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* OUT */ +/* Kernel side cache sync ops required */ +#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 29 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_20 + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47< for future special handles */ +#define BASEP_MEM_CSF_USER_REG_PAGE_HANDLE (47ul << 12) +#define BASEP_MEM_CSF_USER_IO_PAGES_HANDLE (48ul << 12) +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +#define KBASE_CSF_NUM_USER_IO_PAGES_HANDLE \ + ((BASE_MEM_COOKIE_BASE - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE) >> \ + LOCAL_PAGE_SHIFT) + +/** + * Valid set of just-in-time memory allocation flags + */ +#define BASE_JIT_ALLOC_VALID_FLAGS ((__u8)0) + +/* Flags to pass to ::base_context_init. + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef __u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Base context creates a CSF event notification thread. + * + * The creation of a CSF event notification thread is conditional but + * mandatory for the handling of CSF events. + */ +#define BASE_CONTEXT_CSF_EVENT_THREAD ((base_context_create_flags)1 << 2) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | \ + BASE_CONTEXT_CSF_EVENT_THREAD | \ + BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +/* Enable KBase tracepoints for CSF builds */ +#define BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS (1 << 2) + +/* Enable additional CSF Firmware side tracepoints */ +#define BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS (1 << 3) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED | \ + BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS | \ + BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) + +/* Number of pages mapped into the process address space for a bound GPU + * command queue. A pair of input/output pages and a Hw doorbell page + * are mapped to enable direct submission of commands to Hw. + */ +#define BASEP_QUEUE_NR_MMAP_USER_PAGES ((size_t)3) + +#define BASE_QUEUE_MAX_PRIORITY (15U) + +/* CQS Sync object is an array of __u32 event_mem[2], error field index is 1 */ +#define BASEP_EVENT_VAL_INDEX (0U) +#define BASEP_EVENT_ERR_INDEX (1U) + +/* The upper limit for number of objects that could be waited/set per command. + * This limit is now enforced as internally the error inherit inputs are + * converted to 32-bit flags in a __u32 variable occupying a previously padding + * field. + */ +#define BASEP_KCPU_CQS_MAX_NUM_OBJS ((size_t)32) + +#if MALI_UNIT_TEST +/** + * enum base_kcpu_command_type - Kernel CPU queue command type. + * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, + * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, + * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, + * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, + * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, + * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, + * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, + * @BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME: sample_time, + */ +enum base_kcpu_command_type { + BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, + BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_SET, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, + BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, + BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, + BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, + BASE_KCPU_COMMAND_TYPE_JIT_FREE, + BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, + BASE_KCPU_COMMAND_TYPE_SAMPLE_TIME, +}; +#else +/** + * enum base_kcpu_command_type - Kernel CPU queue command type. + * @BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: fence_signal, + * @BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: fence_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT: cqs_wait, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET: cqs_set, + * @BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: cqs_wait_operation, + * @BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: cqs_set_operation, + * @BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: map_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: unmap_import, + * @BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: unmap_import_force, + * @BASE_KCPU_COMMAND_TYPE_JIT_ALLOC: jit_alloc, + * @BASE_KCPU_COMMAND_TYPE_JIT_FREE: jit_free, + * @BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: group_suspend, + * @BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER: error_barrier, + */ +enum base_kcpu_command_type { + BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL, + BASE_KCPU_COMMAND_TYPE_FENCE_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT, + BASE_KCPU_COMMAND_TYPE_CQS_SET, + BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION, + BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION, + BASE_KCPU_COMMAND_TYPE_MAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT, + BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE, + BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, + BASE_KCPU_COMMAND_TYPE_JIT_FREE, + BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, +}; +#endif /* MALI_UNIT_TEST */ + +/** + * enum base_queue_group_priority - Priority of a GPU Command Queue Group. + * @BASE_QUEUE_GROUP_PRIORITY_HIGH: GPU Command Queue Group is of high + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_MEDIUM: GPU Command Queue Group is of medium + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_LOW: GPU Command Queue Group is of low + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_REALTIME: GPU Command Queue Group is of real-time + * priority. + * @BASE_QUEUE_GROUP_PRIORITY_COUNT: Number of GPU Command Queue Group + * priority levels. + * + * Currently this is in order of highest to lowest, but if new levels are added + * then those new levels may be out of order to preserve the ABI compatibility + * with previous releases. At that point, ensure assignment to + * the 'priority' member in &kbase_queue_group is updated to ensure it remains + * a linear ordering. + * + * There should be no gaps in the enum, otherwise use of + * BASE_QUEUE_GROUP_PRIORITY_COUNT in kbase must be updated. + */ +enum base_queue_group_priority { + BASE_QUEUE_GROUP_PRIORITY_HIGH = 0, + BASE_QUEUE_GROUP_PRIORITY_MEDIUM, + BASE_QUEUE_GROUP_PRIORITY_LOW, + BASE_QUEUE_GROUP_PRIORITY_REALTIME, + BASE_QUEUE_GROUP_PRIORITY_COUNT +}; + +struct base_kcpu_command_fence_info { + __u64 fence; +}; + +struct base_cqs_wait_info { + __u64 addr; + __u32 val; + __u32 padding; +}; + +struct base_kcpu_command_cqs_wait_info { + __u64 objs; + __u32 nr_objs; + __u32 inherit_err_flags; +}; + +struct base_cqs_set { + __u64 addr; +}; + +struct base_kcpu_command_cqs_set_info { + __u64 objs; + __u32 nr_objs; + __u32 padding; +}; + +/** + * basep_cqs_data_type - Enumeration of CQS Data Types + * + * @BASEP_CQS_DATA_TYPE_U32: The Data Type of a CQS Object's value + * is an unsigned 32-bit integer + * @BASEP_CQS_DATA_TYPE_U64: The Data Type of a CQS Object's value + * is an unsigned 64-bit integer + */ +typedef enum PACKED { + BASEP_CQS_DATA_TYPE_U32 = 0, + BASEP_CQS_DATA_TYPE_U64 = 1, +} basep_cqs_data_type; + +/** + * basep_cqs_wait_operation_op - Enumeration of CQS Object Wait + * Operation conditions + * + * @BASEP_CQS_WAIT_OPERATION_LE: CQS Wait Operation indicating that a + * wait will be satisfied when a CQS Object's + * value is Less than or Equal to + * the Wait Operation value + * @BASEP_CQS_WAIT_OPERATION_GT: CQS Wait Operation indicating that a + * wait will be satisfied when a CQS Object's + * value is Greater than the Wait Operation value + */ +typedef enum { + BASEP_CQS_WAIT_OPERATION_LE = 0, + BASEP_CQS_WAIT_OPERATION_GT = 1, +} basep_cqs_wait_operation_op; + +struct base_cqs_wait_operation_info { + __u64 addr; + __u64 val; + __u8 operation; + __u8 data_type; + __u8 padding[6]; +}; + +/** + * struct base_kcpu_command_cqs_wait_operation_info - structure which contains information + * about the Timeline CQS wait objects + * + * @objs: An array of Timeline CQS waits. + * @nr_objs: Number of Timeline CQS waits in the array. + * @inherit_err_flags: Bit-pattern for the CQSs in the array who's error field + * to be served as the source for importing into the + * queue's error-state. + */ +struct base_kcpu_command_cqs_wait_operation_info { + __u64 objs; + __u32 nr_objs; + __u32 inherit_err_flags; +}; + +/** + * basep_cqs_set_operation_op - Enumeration of CQS Set Operations + * + * @BASEP_CQS_SET_OPERATION_ADD: CQS Set operation for adding a value + * to a synchronization object + * @BASEP_CQS_SET_OPERATION_SET: CQS Set operation for setting the value + * of a synchronization object + */ +typedef enum { + BASEP_CQS_SET_OPERATION_ADD = 0, + BASEP_CQS_SET_OPERATION_SET = 1, +} basep_cqs_set_operation_op; + +struct base_cqs_set_operation_info { + __u64 addr; + __u64 val; + __u8 operation; + __u8 data_type; + __u8 padding[6]; +}; + +/** + * struct base_kcpu_command_cqs_set_operation_info - structure which contains information + * about the Timeline CQS set objects + * + * @objs: An array of Timeline CQS sets. + * @nr_objs: Number of Timeline CQS sets in the array. + * @padding: Structure padding, unused bytes. + */ +struct base_kcpu_command_cqs_set_operation_info { + __u64 objs; + __u32 nr_objs; + __u32 padding; +}; + +/** + * struct base_kcpu_command_import_info - structure which contains information + * about the imported buffer. + * + * @handle: Address of imported user buffer. + */ +struct base_kcpu_command_import_info { + __u64 handle; +}; + +/** + * struct base_kcpu_command_jit_alloc_info - structure which contains + * information about jit memory allocation. + * + * @info: An array of elements of the + * struct base_jit_alloc_info type. + * @count: The number of elements in the info array. + * @padding: Padding to a multiple of 64 bits. + */ +struct base_kcpu_command_jit_alloc_info { + __u64 info; + __u8 count; + __u8 padding[7]; +}; + +/** + * struct base_kcpu_command_jit_free_info - structure which contains + * information about jit memory which is to be freed. + * + * @ids: An array containing the JIT IDs to free. + * @count: The number of elements in the ids array. + * @padding: Padding to a multiple of 64 bits. + */ +struct base_kcpu_command_jit_free_info { + __u64 ids; + __u8 count; + __u8 padding[7]; +}; + +/** + * struct base_kcpu_command_group_suspend_info - structure which contains + * suspend buffer data captured for a suspended queue group. + * + * @buffer: Pointer to an array of elements of the type char. + * @size: Number of elements in the @buffer array. + * @group_handle: Handle to the mapping of CSG. + * @padding: padding to a multiple of 64 bits. + */ +struct base_kcpu_command_group_suspend_info { + __u64 buffer; + __u32 size; + __u8 group_handle; + __u8 padding[3]; +}; + +#if MALI_UNIT_TEST +struct base_kcpu_command_sample_time_info { + __u64 time; +}; +#endif /* MALI_UNIT_TEST */ + +/** + * struct base_kcpu_command - kcpu command. + * @type: type of the kcpu command, one enum base_kcpu_command_type + * @padding: padding to a multiple of 64 bits + * @info: structure which contains information about the kcpu command; + * actual type is determined by @p type + * @info.fence: Fence + * @info.cqs_wait: CQS wait + * @info.cqs_set: CQS set + * @info.import: import + * @info.jit_alloc: jit allocation + * @info.jit_free: jit deallocation + * @info.suspend_buf_copy: suspend buffer copy + * @info.sample_time: sample time + * @info.padding: padding + */ +struct base_kcpu_command { + __u8 type; + __u8 padding[sizeof(__u64) - sizeof(__u8)]; + union { + struct base_kcpu_command_fence_info fence; + struct base_kcpu_command_cqs_wait_info cqs_wait; + struct base_kcpu_command_cqs_set_info cqs_set; + struct base_kcpu_command_cqs_wait_operation_info cqs_wait_operation; + struct base_kcpu_command_cqs_set_operation_info cqs_set_operation; + struct base_kcpu_command_import_info import; + struct base_kcpu_command_jit_alloc_info jit_alloc; + struct base_kcpu_command_jit_free_info jit_free; + struct base_kcpu_command_group_suspend_info suspend_buf_copy; +#if MALI_UNIT_TEST + struct base_kcpu_command_sample_time_info sample_time; +#endif /* MALI_UNIT_TEST */ + __u64 padding[2]; /* No sub-struct should be larger */ + } info; +}; + +/** + * struct basep_cs_stream_control - CSI capabilities. + * + * @features: Features of this stream + * @padding: Padding to a multiple of 64 bits. + */ +struct basep_cs_stream_control { + __u32 features; + __u32 padding; +}; + +/** + * struct basep_cs_group_control - CSG interface capabilities. + * + * @features: Features of this group + * @stream_num: Number of streams in this group + * @suspend_size: Size in bytes of the suspend buffer for this group + * @padding: Padding to a multiple of 64 bits. + */ +struct basep_cs_group_control { + __u32 features; + __u32 stream_num; + __u32 suspend_size; + __u32 padding; +}; + +/** + * struct base_gpu_queue_group_error_fatal_payload - Unrecoverable fault + * error information associated with GPU command queue group. + * + * @sideband: Additional information of the unrecoverable fault. + * @status: Unrecoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * CS_INVALID_INSTRUCTION (0x49). + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_group_error_fatal_payload { + __u64 sideband; + __u32 status; + __u32 padding; +}; + +/** + * struct base_gpu_queue_error_fatal_payload - Unrecoverable fault + * error information related to GPU command queue. + * + * @sideband: Additional information about this unrecoverable fault. + * @status: Unrecoverable fault information. + * This consists of exception type (least significant byte) and + * data (remaining bytes). One example of exception type is + * CS_INVALID_INSTRUCTION (0x49). + * @csi_index: Index of the CSF interface the queue is bound to. + * @padding: Padding to make multiple of 64bits + */ +struct base_gpu_queue_error_fatal_payload { + __u64 sideband; + __u32 status; + __u8 csi_index; + __u8 padding[3]; +}; + +/** + * enum base_gpu_queue_group_error_type - GPU Fatal error type. + * + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU + * command queue group. + * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL: Fatal error associated with GPU + * command queue. + * @BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT: Fatal error associated with + * progress timeout. + * @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out + * of tiler heap memory. + * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types + * + * This type is used for &struct_base_gpu_queue_group_error.error_type. + */ +enum base_gpu_queue_group_error_type { + BASE_GPU_QUEUE_GROUP_ERROR_FATAL = 0, + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT, + BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM, + BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT +}; + +/** + * struct base_gpu_queue_group_error - Unrecoverable fault information + * @error_type: Error type of @base_gpu_queue_group_error_type + * indicating which field in union payload is filled + * @padding: Unused bytes for 64bit boundary + * @payload: Input Payload + * @payload.fatal_group: Unrecoverable fault error associated with + * GPU command queue group + * @payload.fatal_queue: Unrecoverable fault error associated with command queue + */ +struct base_gpu_queue_group_error { + __u8 error_type; + __u8 padding[7]; + union { + struct base_gpu_queue_group_error_fatal_payload fatal_group; + struct base_gpu_queue_error_fatal_payload fatal_queue; + } payload; +}; + +/** + * enum base_csf_notification_type - Notification type + * + * @BASE_CSF_NOTIFICATION_EVENT: Notification with kernel event + * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal + * error + * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP: Notification with dumping cpu + * queue + * @BASE_CSF_NOTIFICATION_COUNT: The number of notification type + * + * This type is used for &struct_base_csf_notification.type. + */ +enum base_csf_notification_type { + BASE_CSF_NOTIFICATION_EVENT = 0, + BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP, + BASE_CSF_NOTIFICATION_COUNT +}; + +/** + * struct base_csf_notification - Event or error notification + * + * @type: Notification type of @base_csf_notification_type + * @padding: Padding for 64bit boundary + * @payload: Input Payload + * @payload.align: To fit the struct into a 64-byte cache line + * @payload.csg_error: CSG error + * @payload.csg_error.handle: Handle of GPU command queue group associated with + * fatal error + * @payload.csg_error.padding: Padding + * @payload.csg_error.error: Unrecoverable fault error + * + */ +struct base_csf_notification { + __u8 type; + __u8 padding[7]; + union { + struct { + __u8 handle; + __u8 padding[7]; + struct base_gpu_queue_group_error error; + } csg_error; + + __u8 align[56]; + } payload; +}; + +#endif /* _UAPI_BASE_CSF_KERNEL_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h new file mode 100644 index 0000000..b62a8b0 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_control_registers.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ +#define _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ + +/* GPU_REGISTERS register offsets */ +#define GPU_CONTROL_MCU 0x3000 /* () MCU control registers */ + +#endif /* _UAPI_GPU_CSF_CONTROL_REGISTERS_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h new file mode 100644 index 0000000..06cc4c2 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h @@ -0,0 +1,1488 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * This header was autogenerated, it should not be edited. + */ + +#ifndef _UAPI_GPU_CSF_REGISTERS_H_ +#define _UAPI_GPU_CSF_REGISTERS_H_ + +/* + * Begin register sets + */ + +/* DOORBELLS base address */ +#define DOORBELLS_BASE 0x0080000 +#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r)) + +/* CS_KERNEL_INPUT_BLOCK base address */ +#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000 +#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r)) + +/* CS_KERNEL_OUTPUT_BLOCK base address */ +#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000 +#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r)) + +/* CS_USER_INPUT_BLOCK base address */ +#define CS_USER_INPUT_BLOCK_BASE 0x0000 +#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r)) + +/* CS_USER_OUTPUT_BLOCK base address */ +#define CS_USER_OUTPUT_BLOCK_BASE 0x0000 +#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r)) + +/* CSG_INPUT_BLOCK base address */ +#define CSG_INPUT_BLOCK_BASE 0x0000 +#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r)) + +/* CSG_OUTPUT_BLOCK base address */ +#define CSG_OUTPUT_BLOCK_BASE 0x0000 +#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r)) + +/* GLB_CONTROL_BLOCK base address */ +#define GLB_CONTROL_BLOCK_BASE 0x04000000 +#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r)) + +/* GLB_INPUT_BLOCK base address */ +#define GLB_INPUT_BLOCK_BASE 0x0000 +#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r)) + +/* GLB_OUTPUT_BLOCK base address */ +#define GLB_OUTPUT_BLOCK_BASE 0x0000 +#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r)) + +/* USER base address */ +#define USER_BASE 0x0010000 +#define USER_REG(r) (USER_BASE + (r)) + +/* End register sets */ + +/* + * Begin register offsets + */ + +/* DOORBELLS register offsets */ +#define DOORBELL_0 0x0000 /* () Doorbell 0 register */ +#define DOORBELL(n) (DOORBELL_0 + (n)*65536) +#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r)) +#define DOORBELL_COUNT 1024 + +/* DOORBELL_BLOCK register offsets */ +#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */ + +/* CS_KERNEL_INPUT_BLOCK register offsets */ +#define CS_REQ 0x0000 /* () CS request flags */ +#define CS_CONFIG 0x0004 /* () CS configuration */ +#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */ +#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */ +#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */ +#define CS_SIZE 0x0018 /* () Size of the ring buffer */ +#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */ +#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */ +#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */ +#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */ +#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */ +#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */ +#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */ +#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */ +#define CS_INSTR_CONFIG 0x0040 /* () Instrumentation buffer configuration */ +#define CS_INSTR_BUFFER_SIZE 0x0044 /* () Instrumentation buffer size */ +#define CS_INSTR_BUFFER_BASE_LO 0x0048 /* () Instrumentation buffer base pointer, low word */ +#define CS_INSTR_BUFFER_BASE_HI 0x004C /* () Instrumentation buffer base pointer, high word */ +#define CS_INSTR_BUFFER_OFFSET_POINTER_LO 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */ +#define CS_INSTR_BUFFER_OFFSET_POINTER_HI 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */ + +/* CS_KERNEL_OUTPUT_BLOCK register offsets */ +#define CS_ACK 0x0000 /* () CS acknowledge flags */ +#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */ +#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */ +#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */ +#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */ +#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */ +#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */ +#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */ +#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */ +#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */ +#define CS_FAULT 0x0080 /* () Recoverable fault information */ +#define CS_FATAL 0x0084 /* () Unrecoverable fault information */ +#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */ +#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */ +#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */ +#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */ +#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */ +#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */ +#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */ +#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */ +#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */ + +/* CS_USER_INPUT_BLOCK register offsets */ +#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */ +#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */ +#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */ +#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */ + +/* CS_USER_OUTPUT_BLOCK register offsets */ +#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */ +#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */ +#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */ + +/* CSG_INPUT_BLOCK register offsets */ +#define CSG_REQ 0x0000 /* () CSG request */ +#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ +#define CSG_DB_REQ 0x0008 /* () Global doorbell request */ +#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ +#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ +#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ +#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ +#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ +#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ +#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ +#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ +#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ +#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ +#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */ +#define CSG_CONFIG 0x0050 /* () CSG configuration options */ +#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */ + +/* CSG_OUTPUT_BLOCK register offsets */ +#define CSG_ACK 0x0000 /* () CSG acknowledge flags */ +#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */ +#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */ +#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */ +#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */ +#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */ + +/* GLB_CONTROL_BLOCK register offsets */ +#define GLB_VERSION 0x0000 /* () Global interface version */ +#define GLB_FEATURES 0x0004 /* () Global interface features */ +#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */ +#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */ +#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */ +#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */ +#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */ +#define GLB_INSTR_FEATURES \ + 0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */ +#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */ +#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256) +#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r)) +#define GROUP_CONTROL_COUNT 16 + +/* STREAM_CONTROL_BLOCK register offsets */ +#define STREAM_FEATURES 0x0000 /* () CSI features */ +#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */ +#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */ + +/* GROUP_CONTROL_BLOCK register offsets */ +#define GROUP_FEATURES 0x0000 /* () CSG interface features */ +#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */ +#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */ +#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */ +#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */ +#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */ +#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces */ +#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */ +#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12) +#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r)) +#define STREAM_CONTROL_COUNT 16 + +/* GLB_INPUT_BLOCK register offsets */ +#define GLB_REQ 0x0000 /* () Global request */ +#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ +#define GLB_DB_REQ 0x0008 /* () Global doorbell request */ +#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */ +#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */ +#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */ +#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */ +#define GLB_PROTM_COHERENCY 0x0020 /* () Configure COHERENCY_ENABLE register value to use in protected mode execution */ + +#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */ +#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */ +#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */ +#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */ +#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */ +#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */ +#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */ +#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */ +#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */ +#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */ +#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */ +#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */ + +#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */ +#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */ +#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */ +#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */ +#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */ +#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */ + +/* GLB_OUTPUT_BLOCK register offsets */ +#define GLB_ACK 0x0000 /* () Global acknowledge */ +#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */ +#define GLB_HALT_STATUS 0x0010 /* () Global halt status */ +#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */ +#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */ +#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */ +#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */ + +/* USER register offsets */ +#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */ + +/* End register offsets */ + +/* CS_KERNEL_INPUT_BLOCK register set definitions */ +/* GLB_VERSION register */ +#define GLB_VERSION_PATCH_SHIFT (0) +#define GLB_VERSION_MINOR_SHIFT (16) +#define GLB_VERSION_MAJOR_SHIFT (24) + +/* CS_REQ register */ +#define CS_REQ_STATE_SHIFT 0 +#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT) +#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT) +#define CS_REQ_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK)) +/* CS_REQ_STATE values */ +#define CS_REQ_STATE_STOP 0x0 +#define CS_REQ_STATE_START 0x1 +/* End of CS_REQ_STATE values */ +#define CS_REQ_EXTRACT_EVENT_SHIFT 4 +#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT) +#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT) +#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK)) + +#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8 +#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT) +#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT) +#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) | \ + (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK)) +#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9 +#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT) +#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT) +#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) | \ + (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK)) +#define CS_REQ_IDLE_EMPTY_SHIFT 10 +#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT) +#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT) +#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK)) +#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11 +#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) +#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \ + (((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT) +#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) | \ + (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK)) +#define CS_REQ_TILER_OOM_SHIFT 26 +#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT) +#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT) +#define CS_REQ_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK)) +#define CS_REQ_PROTM_PEND_SHIFT 27 +#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT) +#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT) +#define CS_REQ_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK)) +#define CS_REQ_FATAL_SHIFT 30 +#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT) +#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT) +#define CS_REQ_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK)) +#define CS_REQ_FAULT_SHIFT 31 +#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT) +#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT) +#define CS_REQ_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK)) + +/* CS_CONFIG register */ +#define CS_CONFIG_PRIORITY_SHIFT 0 +#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT) +#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT) +#define CS_CONFIG_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK)) +#define CS_CONFIG_USER_DOORBELL_SHIFT 8 +#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT) +#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT) +#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) | \ + (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK)) + +/* CS_ACK_IRQ_MASK register */ +#define CS_ACK_IRQ_MASK_STATE_SHIFT 0 +#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT) +#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT) +#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK)) +/* CS_ACK_IRQ_MASK_STATE values */ +#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0 +#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7 +/* End of CS_ACK_IRQ_MASK_STATE values */ +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4 +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK)) +#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26 +#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) +#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) +#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK)) +#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27 +#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) +#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) +#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK)) +#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30 +#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT) +#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT) +#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK)) +#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31 +#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT) +#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT) +#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) | \ + (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK)) + +/* CS_BASE register */ +#define CS_BASE_POINTER_SHIFT 0 +#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT) +#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT) +#define CS_BASE_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK)) + +/* CS_SIZE register */ +#define CS_SIZE_SIZE_SHIFT 0 +#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT) +#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT) +#define CS_SIZE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK)) + +/* CS_TILER_HEAP_START register */ +#define CS_TILER_HEAP_START_POINTER_SHIFT 0 +#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT) +#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \ + (((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT) +#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) | \ + (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK)) +/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ +/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */ + +/* CS_TILER_HEAP_END register */ +#define CS_TILER_HEAP_END_POINTER_SHIFT 0 +#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT) +#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \ + (((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT) +#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) | \ + (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK)) +/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ +/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */ + +/* CS_USER_INPUT register */ +#define CS_USER_INPUT_POINTER_SHIFT 0 +#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT) +#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT) +#define CS_USER_INPUT_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_USER_INPUT_POINTER_MASK) | \ + (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK)) + +/* CS_USER_OUTPUT register */ +#define CS_USER_OUTPUT_POINTER_SHIFT 0 +#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT) +#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT) +#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) | \ + (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK)) + +/* CS_INSTR_CONFIG register */ +#define CS_INSTR_CONFIG_JASID_SHIFT (0) +#define CS_INSTR_CONFIG_JASID_MASK ((u32)0xF << CS_INSTR_CONFIG_JASID_SHIFT) +#define CS_INSTR_CONFIG_JASID_GET(reg_val) (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT) +#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) | \ + (((value) << CS_INSTR_CONFIG_JASID_SHIFT) & CS_INSTR_CONFIG_JASID_MASK)) +#define CS_INSTR_CONFIG_EVENT_SIZE_SHIFT (4) +#define CS_INSTR_CONFIG_EVENT_SIZE_MASK ((u32)0xF << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) +#define CS_INSTR_CONFIG_EVENT_SIZE_GET(reg_val) \ + (((reg_val)&CS_INSTR_CONFIG_EVENT_SIZE_MASK) >> CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) +#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) | \ + (((value) << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) & CS_INSTR_CONFIG_EVENT_SIZE_MASK)) +#define CS_INSTR_CONFIG_EVENT_STATE_SHIFT (16) +#define CS_INSTR_CONFIG_EVENT_STATE_MASK ((u32)0xFF << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) +#define CS_INSTR_CONFIG_EVENT_STATE_GET(reg_val) \ + (((reg_val)&CS_INSTR_CONFIG_EVENT_STATE_MASK) >> CS_INSTR_CONFIG_EVENT_STATE_SHIFT) +#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) | \ + (((value) << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) & CS_INSTR_CONFIG_EVENT_STATE_MASK)) + +/* CS_INSTR_BUFFER_SIZE register */ +#define CS_INSTR_BUFFER_SIZE_SIZE_SHIFT (0) +#define CS_INSTR_BUFFER_SIZE_SIZE_MASK ((u32)0xFFFFFFFF << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) +#define CS_INSTR_BUFFER_SIZE_SIZE_GET(reg_val) \ + (((reg_val)&CS_INSTR_BUFFER_SIZE_SIZE_MASK) >> CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) +#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) | \ + (((value) << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) & CS_INSTR_BUFFER_SIZE_SIZE_MASK)) + +/* CS_INSTR_BUFFER_BASE register */ +#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0) +#define CS_INSTR_BUFFER_BASE_POINTER_MASK ((u64)0xFFFFFFFFFFFFFFFF << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) +#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \ + (((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT) +#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) | \ + (((value) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) & CS_INSTR_BUFFER_BASE_POINTER_MASK)) + +/* CS_INSTR_BUFFER_OFFSET_POINTER register */ +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0) +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \ + ((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \ + (((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) +#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) | \ + (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK)) + +/* End of CS_KERNEL_INPUT_BLOCK register set definitions */ + +/* CS_KERNEL_OUTPUT_BLOCK register set definitions */ + +/* CS_ACK register */ +#define CS_ACK_STATE_SHIFT 0 +#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT) +#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT) +#define CS_ACK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK)) +/* CS_ACK_STATE values */ +#define CS_ACK_STATE_STOP 0x0 +#define CS_ACK_STATE_START 0x1 +/* End of CS_ACK_STATE values */ +#define CS_ACK_EXTRACT_EVENT_SHIFT 4 +#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT) +#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK)) +#define CS_ACK_TILER_OOM_SHIFT 26 +#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT) +#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT) +#define CS_ACK_TILER_OOM_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK)) +#define CS_ACK_PROTM_PEND_SHIFT 27 +#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT) +#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT) +#define CS_ACK_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK)) +#define CS_ACK_FATAL_SHIFT 30 +#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT) +#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT) +#define CS_ACK_FATAL_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK)) +#define CS_ACK_FAULT_SHIFT 31 +#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT) +#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT) +#define CS_ACK_FAULT_SET(reg_val, value) \ + (((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK)) + +/* CS_STATUS_CMD_PTR register */ +#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0 +#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT) +#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \ + (((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT) +#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) | \ + (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK)) + +/* CS_STATUS_WAIT register */ +#define CS_STATUS_WAIT_SB_MASK_SHIFT 0 +#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT) +#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT) +#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) | \ + (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24 +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK)) +/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0 +#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1 +/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */ +#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28 +#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) +#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) +#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) | \ + (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK)) +#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29 +#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT) +#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT) +#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) | \ + (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK)) +#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31 +#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT) +#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK)) + +/* CS_STATUS_REQ_RESOURCE register */ +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0 +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1 +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2 +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK)) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3 +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \ + (((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) +#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ + (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) + +/* CS_STATUS_WAIT_SYNC_POINTER register */ +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) +#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK)) + +/* CS_STATUS_WAIT_SYNC_VALUE register */ +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0 +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \ + (((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) +#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) | \ + (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK)) + +/* CS_STATUS_SCOREBOARDS register */ +#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0) +#define CS_STATUS_SCOREBOARDS_NONZERO_MASK \ + ((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) +#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val) \ + (((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >> \ + CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) +#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) | \ + (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) & \ + CS_STATUS_SCOREBOARDS_NONZERO_MASK)) + +/* CS_STATUS_BLOCKED_REASON register */ +#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0) +#define CS_STATUS_BLOCKED_REASON_REASON_MASK \ + ((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) +#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val) \ + (((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >> \ + CS_STATUS_BLOCKED_REASON_REASON_SHIFT) +#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value) \ + (((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) | \ + (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) & \ + CS_STATUS_BLOCKED_REASON_REASON_MASK)) +/* CS_STATUS_BLOCKED_REASON_reason values */ +#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0 +#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1 +#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2 +#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3 +#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4 +#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5 +#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6 +/* End of CS_STATUS_BLOCKED_REASON_reason values */ + +/* CS_FAULT register */ +#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0 +#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT) +#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT) +#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) | \ + (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK)) +/* CS_FAULT_EXCEPTION_TYPE values */ +#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F +#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B +#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50 +#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51 +#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55 +#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58 +#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59 +#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A +#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B +#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69 +/* End of CS_FAULT_EXCEPTION_TYPE values */ +#define CS_FAULT_EXCEPTION_DATA_SHIFT 8 +#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK)) + +/* CS_FATAL register */ +#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0 +#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT) +#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT) +#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) | \ + (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK)) +/* CS_FATAL_EXCEPTION_TYPE values */ +#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40 +#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44 +#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48 +#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49 +#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A +#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68 +/* End of CS_FATAL_EXCEPTION_TYPE values */ +#define CS_FATAL_EXCEPTION_DATA_SHIFT 8 +#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK)) + +/* CS_FAULT_INFO register */ +#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0 +#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \ + (((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK)) + +/* CS_FATAL_INFO register */ +#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0 +#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \ + (((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) +#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \ + (((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) | \ + (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK)) + +/* CS_HEAP_VT_START register */ +#define CS_HEAP_VT_START_VALUE_SHIFT 0 +#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT) +#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT) +#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) | \ + (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK)) + +/* CS_HEAP_VT_END register */ +#define CS_HEAP_VT_END_VALUE_SHIFT 0 +#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT) +#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT) +#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK)) + +/* CS_HEAP_FRAG_END register */ +#define CS_HEAP_FRAG_END_VALUE_SHIFT 0 +#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT) +#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT) +#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) | \ + (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK)) + +/* CS_HEAP_ADDRESS register */ +#define CS_HEAP_ADDRESS_POINTER_SHIFT 0 +#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT) +#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT) +#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) | \ + (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK)) +/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */ + +/* CS_USER_INPUT_BLOCK register set definitions */ + +/* CS_INSERT register */ +#define CS_INSERT_VALUE_SHIFT 0 +#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT) +#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT) +#define CS_INSERT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK)) + +/* CS_EXTRACT_INIT register */ +#define CS_EXTRACT_INIT_VALUE_SHIFT 0 +#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT) +#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT) +#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) | \ + (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK)) +/* End of CS_USER_INPUT_BLOCK register set definitions */ + +/* CS_USER_OUTPUT_BLOCK register set definitions */ + +/* CS_EXTRACT register */ +#define CS_EXTRACT_VALUE_SHIFT 0 +#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT) +#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT) +#define CS_EXTRACT_VALUE_SET(reg_val, value) \ + (((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK)) + +/* CS_ACTIVE register */ +#define CS_ACTIVE_HW_ACTIVE_SHIFT 0 +#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT) +#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT) +#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \ + (((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK)) +/* End of CS_USER_OUTPUT_BLOCK register set definitions */ + +/* CSG_INPUT_BLOCK register set definitions */ + +/* CSG_REQ register */ +#define CSG_REQ_STATE_SHIFT 0 +#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT) +#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT) +#define CSG_REQ_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK)) +/* CSG_REQ_STATE values */ +#define CSG_REQ_STATE_TERMINATE 0x0 +#define CSG_REQ_STATE_START 0x1 +#define CSG_REQ_STATE_SUSPEND 0x2 +#define CSG_REQ_STATE_RESUME 0x3 +/* End of CSG_REQ_STATE values */ +#define CSG_REQ_EP_CFG_SHIFT 4 +#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT) +#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT) +#define CSG_REQ_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK)) +#define CSG_REQ_STATUS_UPDATE_SHIFT 5 +#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT) +#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT) +#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) | \ + (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK)) +#define CSG_REQ_SYNC_UPDATE_SHIFT 28 +#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT) +#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT) +#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK)) +#define CSG_REQ_IDLE_SHIFT 29 +#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT) +#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT) +#define CSG_REQ_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK)) +#define CSG_REQ_DOORBELL_SHIFT 30 +#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT) +#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT) +#define CSG_REQ_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK)) +#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31 +#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK)) + +/* CSG_ACK_IRQ_MASK register */ +#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0 +#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK)) +/* CSG_ACK_IRQ_MASK_STATE values */ +#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0 +#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7 +/* End of CSG_ACK_IRQ_MASK_STATE values */ +#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4 +#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) +#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) +#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK)) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5 +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK)) +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28 +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK)) +#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29 +#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT) +#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT) +#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK)) +#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30 +#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) +#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) +#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK)) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31 +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK)) + +/* CSG_EP_REQ register */ +#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 +#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ + (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) +#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 +#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ + (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) +#define CSG_EP_REQ_TILER_EP_SHIFT 16 +#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) +#define CSG_EP_REQ_PRIORITY_SHIFT 28 +#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) + +/* CSG_SUSPEND_BUF register */ +#define CSG_SUSPEND_BUF_POINTER_SHIFT 0 +#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) | \ + (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK)) + +/* CSG_PROTM_SUSPEND_BUF register */ +#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0 +#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \ + (((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) +#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \ + (((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) | \ + (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK)) + +/* End of CSG_INPUT_BLOCK register set definitions */ + +/* CSG_OUTPUT_BLOCK register set definitions */ + +/* CSG_ACK register */ +#define CSG_ACK_STATE_SHIFT 0 +#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT) +#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT) +#define CSG_ACK_STATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK)) +/* CSG_ACK_STATE values */ +#define CSG_ACK_STATE_TERMINATE 0x0 +#define CSG_ACK_STATE_START 0x1 +#define CSG_ACK_STATE_SUSPEND 0x2 +#define CSG_ACK_STATE_RESUME 0x3 +/* End of CSG_ACK_STATE values */ +#define CSG_ACK_EP_CFG_SHIFT 4 +#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT) +#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT) +#define CSG_ACK_EP_CFG_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK)) +#define CSG_ACK_STATUS_UPDATE_SHIFT 5 +#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT) +#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) | \ + (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK)) +#define CSG_ACK_SYNC_UPDATE_SHIFT 28 +#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT) +#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK)) +#define CSG_ACK_IDLE_SHIFT 29 +#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT) +#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT) +#define CSG_ACK_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK)) +#define CSG_ACK_DOORBELL_SHIFT 30 +#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT) +#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT) +#define CSG_ACK_DOORBELL_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK)) +#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31 +#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \ + (((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) +#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) | \ + (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK)) + +/* CSG_STATUS_EP_CURRENT register */ +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0 +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8 +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK)) +#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16 +#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) +#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ + (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) + +/* CSG_STATUS_EP_REQ register */ +#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 +#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK)) +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8 +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK)) +#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16 +#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) +#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT) +#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK)) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) + +/* End of CSG_OUTPUT_BLOCK register set definitions */ + +/* STREAM_CONTROL_BLOCK register set definitions */ + +/* STREAM_FEATURES register */ +#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0 +#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT) +#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT) +#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) | \ + (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK)) +#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8 +#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT) +#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT) +#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) | \ + (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK)) +#define STREAM_FEATURES_COMPUTE_SHIFT 16 +#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT) +#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT) +#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) | \ + (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK)) +#define STREAM_FEATURES_FRAGMENT_SHIFT 17 +#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT) +#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \ + (((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT) +#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) | \ + (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK)) +#define STREAM_FEATURES_TILER_SHIFT 18 +#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT) +#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT) +#define STREAM_FEATURES_TILER_SET(reg_val, value) \ + (((reg_val) & ~STREAM_FEATURES_TILER_MASK) | \ + (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK)) + +/* STREAM_INPUT_VA register */ +#define STREAM_INPUT_VA_VALUE_SHIFT 0 +#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT) +#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT) +#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) | \ + (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK)) + +/* STREAM_OUTPUT_VA register */ +#define STREAM_OUTPUT_VA_VALUE_SHIFT 0 +#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT) +#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT) +#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \ + (((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) | \ + (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK)) +/* End of STREAM_CONTROL_BLOCK register set definitions */ + +/* GLB_INPUT_BLOCK register set definitions */ + +/* GLB_REQ register */ +#define GLB_REQ_HALT_SHIFT 0 +#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT) +#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT) +#define GLB_REQ_HALT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK)) +#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1 +#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK)) +#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2 +#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT) +#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT) +#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK)) +#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3 +#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) +#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \ + (((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT) +#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) | \ + (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK)) +#define GLB_REQ_PROTM_ENTER_SHIFT 4 +#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT) +#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT) +#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK)) +#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5 +#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT) +#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT) +#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) | \ + (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK)) +#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6 +#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT) +#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT) +#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) | \ + (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK)) +#define GLB_REQ_COUNTER_ENABLE_SHIFT 7 +#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT) +#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT) +#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) | \ + (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK)) +#define GLB_REQ_PING_SHIFT 8 +#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT) +#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT) +#define GLB_REQ_PING_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK)) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9 +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK \ + (0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ + (((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >> \ + GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) | \ + (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ + GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK)) +#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20 +#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT) +#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT) +#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) | \ + (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK)) +#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21 +#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) +#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT) +#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) | \ + (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK)) +#define GLB_REQ_INACTIVE_TILER_SHIFT 22 +#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT) +#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT) +#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) | \ + (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK)) +#define GLB_REQ_PROTM_EXIT_SHIFT 23 +#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT) +#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT) +#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK)) +#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24 +#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) +#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \ + GLB_REQ_PRFCNT_THRESHOLD_SHIFT) +#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \ + (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \ + GLB_REQ_PRFCNT_THRESHOLD_MASK)) +#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25 +#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) +#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \ + (((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \ + GLB_REQ_PRFCNT_OVERFLOW_SHIFT) +#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \ + (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \ + GLB_REQ_PRFCNT_OVERFLOW_MASK)) +#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30 +#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT) +#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT) +#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) | \ + (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK)) +#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31 +#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT) +#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT) +#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) | \ + (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK)) + +/* GLB_ACK_IRQ_MASK register */ +#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0 +#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT) +#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT) +#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1 +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2 +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK)) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3 +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) +#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK)) +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4 +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5 +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6 +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK)) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7 +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) +#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK)) +#define GLB_ACK_IRQ_MASK_PING_SHIFT 8 +#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT) +#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT) +#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK)) +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9 +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >> \ + GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) +#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) & \ + GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20 +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21 +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK)) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22 +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) +#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK)) +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23 +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) +#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24 +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \ + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK)) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25 +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \ + (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \ + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) +#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \ + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK)) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30 +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK)) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31 +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \ + (((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) +#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) | \ + (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK)) + +/* GLB_PROGRESS_TIMER register */ +#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0 +#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) +#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \ + (((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) +#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK)) + +/* GLB_PWROFF_TIMER register */ +#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0 +#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) +#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \ + (((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT) +#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK)) +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31 +#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \ + (((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) | \ + (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK)) +/* GLB_PWROFF_TIMER_TIMER_SOURCE values */ +#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 +#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 +/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ + +/* GLB_ALLOC_EN register */ +#define GLB_ALLOC_EN_MASK_SHIFT 0 +#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT) +#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT) +#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \ + (((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK)) + +/* GLB_PROTM_COHERENCY register */ +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT 0 +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK \ + (0xFFFFFFFF << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_GET(reg_val) \ + (((reg_val)&GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) >> \ + GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) +#define GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SET(reg_val, value) \ + (((reg_val) & ~GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK) | \ + (((value) << GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_SHIFT) & \ + GLB_PROTM_COHERENCY_L2_CACHE_PROTOCOL_SELECT_MASK)) +/* End of GLB_INPUT_BLOCK register set definitions */ + +/* GLB_OUTPUT_BLOCK register set definitions */ + +/* GLB_ACK register */ +#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1 +#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \ + (((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) +#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) | \ + (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK)) +#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2 +#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT) +#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \ + (((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK)) +/* End of GLB_OUTPUT_BLOCK register set definitions */ + +/* The following register and fields are for headers before 10.x.7/11.x.4 */ +#define GLB_REQ_IDLE_ENABLE_SHIFT (10) +#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT) +#define GLB_REQ_IDLE_EVENT_SHIFT (26) +#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) +#define GLB_ACK_IDLE_ENABLE_SHIFT (10) +#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT) +#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT) +#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT) +#define GLB_ACK_IDLE_EVENT_SHIFT (26) +#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT) + +#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26) +#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT) + +#define GLB_IDLE_TIMER (0x0080) +/* GLB_IDLE_TIMER register */ +#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0) +#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) +#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT) +#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) | \ + (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK)) +#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31) +#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \ + (((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) +#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) | \ + (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK)) +/* GLB_IDLE_TIMER_TIMER_SOURCE values */ +#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0 +#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 +/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ + +/* GLB_INSTR_FEATURES register */ +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0) +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \ + (((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) +#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \ + (((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) | \ + (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK)) +#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT (4) +#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK ((u32)0xF << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) +#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \ + (((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) +#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \ + (((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) | \ + (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK)) + +#define CSG_STATUS_STATE (0x0018) /* CSG state status register */ +/* CSG_STATUS_STATE register */ +#define CSG_STATUS_STATE_IDLE_SHIFT (0) +#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT) +#define CSG_STATUS_STATE_IDLE_GET(reg_val) \ + (((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT) +#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \ + (((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) | \ + (((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK)) + +#endif /* _UAPI_GPU_CSF_REGISTERS_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h new file mode 100644 index 0000000..d2d7ce2 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -0,0 +1,433 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_CSF_IOCTL_H_ +#define _UAPI_KBASE_CSF_IOCTL_H_ + +#include +#include + +/* + * 1.0: + * - CSF IOCTL header separated from JM + * 1.1: + * - Add a new priority level BASE_QUEUE_GROUP_PRIORITY_REALTIME + * - Add ioctl 54: This controls the priority setting. + * 1.2: + * - Add new CSF GPU_FEATURES register into the property structure + * returned by KBASE_IOCTL_GET_GPUPROPS + * 1.3: + * - Add __u32 group_uid member to + * &struct_kbase_ioctl_cs_queue_group_create.out + * 1.4: + * - Replace padding in kbase_ioctl_cs_get_glb_iface with + * instr_features member of same size + * 1.5: + * - Add ioctl 40: kbase_ioctl_cs_queue_register_ex, this is a new + * queue registration call with extended format for supporting CS + * trace configurations with CSF trace_command. + */ + +#define BASE_UK_VERSION_MAJOR 1 +#define BASE_UK_VERSION_MINOR 5 + +/** + * struct kbase_ioctl_version_check - Check version compatibility between + * kernel and userspace + * + * @major: Major version number + * @minor: Minor version number + */ +struct kbase_ioctl_version_check { + __u16 major; + __u16 minor; +}; + +#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ + _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) + + +/** + * struct kbase_ioctl_cs_queue_register - Register a GPU command queue with the + * base back-end + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + * @buffer_size: Size of the buffer in bytes + * @priority: Priority of the queue within a group when run within a process + * @padding: Currently unused, must be zero + * + * @Note: There is an identical sub-section in kbase_ioctl_cs_queue_register_ex. + * Any change of this struct should also be mirrored to the latter. + */ +struct kbase_ioctl_cs_queue_register { + __u64 buffer_gpu_addr; + __u32 buffer_size; + __u8 priority; + __u8 padding[3]; +}; + +#define KBASE_IOCTL_CS_QUEUE_REGISTER \ + _IOW(KBASE_IOCTL_TYPE, 36, struct kbase_ioctl_cs_queue_register) + +/** + * struct kbase_ioctl_cs_queue_kick - Kick the GPU command queue group scheduler + * to notify that a queue has been updated + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + */ +struct kbase_ioctl_cs_queue_kick { + __u64 buffer_gpu_addr; +}; + +#define KBASE_IOCTL_CS_QUEUE_KICK \ + _IOW(KBASE_IOCTL_TYPE, 37, struct kbase_ioctl_cs_queue_kick) + +/** + * union kbase_ioctl_cs_queue_bind - Bind a GPU command queue to a group + * + * @in: Input parameters + * @in.buffer_gpu_addr: GPU address of the buffer backing the queue + * @in.group_handle: Handle of the group to which the queue should be bound + * @in.csi_index: Index of the CSF interface the queue should be bound to + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.mmap_handle: Handle to be used for creating the mapping of CS + * input/output pages + */ +union kbase_ioctl_cs_queue_bind { + struct { + __u64 buffer_gpu_addr; + __u8 group_handle; + __u8 csi_index; + __u8 padding[6]; + } in; + struct { + __u64 mmap_handle; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_BIND \ + _IOWR(KBASE_IOCTL_TYPE, 39, union kbase_ioctl_cs_queue_bind) + +/** + * struct kbase_ioctl_cs_queue_register_ex - Register a GPU command queue with the + * base back-end in extended format, + * involving trace buffer configuration + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + * @buffer_size: Size of the buffer in bytes + * @priority: Priority of the queue within a group when run within a process + * @padding: Currently unused, must be zero + * @ex_offset_var_addr: GPU address of the trace buffer write offset variable + * @ex_buffer_base: Trace buffer GPU base address for the queue + * @ex_buffer_size: Size of the trace buffer in bytes + * @ex_event_size: Trace event write size, in log2 designation + * @ex_event_state: Trace event states configuration + * @ex_padding: Currently unused, must be zero + * + * @Note: There is an identical sub-section at the start of this struct to that + * of @ref kbase_ioctl_cs_queue_register. Any change of this sub-section + * must also be mirrored to the latter. Following the said sub-section, + * the remaining fields forms the extension, marked with ex_*. + */ +struct kbase_ioctl_cs_queue_register_ex { + __u64 buffer_gpu_addr; + __u32 buffer_size; + __u8 priority; + __u8 padding[3]; + __u64 ex_offset_var_addr; + __u64 ex_buffer_base; + __u32 ex_buffer_size; + __u8 ex_event_size; + __u8 ex_event_state; + __u8 ex_padding[2]; +}; + +#define KBASE_IOCTL_CS_QUEUE_REGISTER_EX \ + _IOW(KBASE_IOCTL_TYPE, 40, struct kbase_ioctl_cs_queue_register_ex) + +/** + * struct kbase_ioctl_cs_queue_terminate - Terminate a GPU command queue + * + * @buffer_gpu_addr: GPU address of the buffer backing the queue + */ +struct kbase_ioctl_cs_queue_terminate { + __u64 buffer_gpu_addr; +}; + +#define KBASE_IOCTL_CS_QUEUE_TERMINATE \ + _IOW(KBASE_IOCTL_TYPE, 41, struct kbase_ioctl_cs_queue_terminate) + +/** + * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group + * @in: Input parameters + * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @in.compute_mask: Mask of compute endpoints the group is allowed to use. + * @in.cs_min: Minimum number of CSs required. + * @in.priority: Queue group's priority within a process. + * @in.tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @in.fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @in.compute_max: Maximum number of compute endpoints the group is allowed + * to use. + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.group_handle: Handle of a newly created queue group. + * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. + */ +union kbase_ioctl_cs_queue_group_create { + struct { + __u64 tiler_mask; + __u64 fragment_mask; + __u64 compute_mask; + __u8 cs_min; + __u8 priority; + __u8 tiler_max; + __u8 fragment_max; + __u8 compute_max; + __u8 padding[3]; + + } in; + struct { + __u8 group_handle; + __u8 padding[3]; + __u32 group_uid; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE \ + _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create) + +/** + * struct kbase_ioctl_cs_queue_group_term - Terminate a GPU command queue group + * + * @group_handle: Handle of the queue group to be terminated + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_cs_queue_group_term { + __u8 group_handle; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_TERMINATE \ + _IOW(KBASE_IOCTL_TYPE, 43, struct kbase_ioctl_cs_queue_group_term) + +#define KBASE_IOCTL_CS_EVENT_SIGNAL \ + _IO(KBASE_IOCTL_TYPE, 44) + +typedef __u8 base_kcpu_queue_id; /* We support up to 256 active KCPU queues */ + +/** + * struct kbase_ioctl_kcpu_queue_new - Create a KCPU command queue + * + * @id: ID of the new command queue returned by the kernel + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_kcpu_queue_new { + base_kcpu_queue_id id; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_KCPU_QUEUE_CREATE \ + _IOR(KBASE_IOCTL_TYPE, 45, struct kbase_ioctl_kcpu_queue_new) + +/** + * struct kbase_ioctl_kcpu_queue_delete - Destroy a KCPU command queue + * + * @id: ID of the command queue to be destroyed + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_kcpu_queue_delete { + base_kcpu_queue_id id; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_KCPU_QUEUE_DELETE \ + _IOW(KBASE_IOCTL_TYPE, 46, struct kbase_ioctl_kcpu_queue_delete) + +/** + * struct kbase_ioctl_kcpu_queue_enqueue - Enqueue commands into the KCPU queue + * + * @addr: Memory address of an array of struct base_kcpu_queue_command + * @nr_commands: Number of commands in the array + * @id: kcpu queue identifier, returned by KBASE_IOCTL_KCPU_QUEUE_CREATE ioctl + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_kcpu_queue_enqueue { + __u64 addr; + __u32 nr_commands; + base_kcpu_queue_id id; + __u8 padding[3]; +}; + +#define KBASE_IOCTL_KCPU_QUEUE_ENQUEUE \ + _IOW(KBASE_IOCTL_TYPE, 47, struct kbase_ioctl_kcpu_queue_enqueue) + +/** + * union kbase_ioctl_cs_tiler_heap_init - Initialize chunked tiler memory heap + * @in: Input parameters + * @in.chunk_size: Size of each chunk. + * @in.initial_chunks: Initial number of chunks that heap will be created with. + * @in.max_chunks: Maximum number of chunks that the heap is allowed to use. + * @in.target_in_flight: Number of render-passes that the driver should attempt to + * keep in flight for which allocation of new chunks is + * allowed. + * @in.group_id: Group ID to be used for physical allocations. + * @in.padding: Padding + * @out: Output parameters + * @out.gpu_heap_va: GPU VA (virtual address) of Heap context that was set up + * for the heap. + * @out.first_chunk_va: GPU VA of the first chunk allocated for the heap, + * actually points to the header of heap chunk and not to + * the low address of free memory in the chunk. + */ +union kbase_ioctl_cs_tiler_heap_init { + struct { + __u32 chunk_size; + __u32 initial_chunks; + __u32 max_chunks; + __u16 target_in_flight; + __u8 group_id; + __u8 padding; + } in; + struct { + __u64 gpu_heap_va; + __u64 first_chunk_va; + } out; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_INIT \ + _IOWR(KBASE_IOCTL_TYPE, 48, union kbase_ioctl_cs_tiler_heap_init) + +/** + * struct kbase_ioctl_cs_tiler_heap_term - Terminate a chunked tiler heap + * instance + * + * @gpu_heap_va: GPU VA of Heap context that was set up for the heap. + */ +struct kbase_ioctl_cs_tiler_heap_term { + __u64 gpu_heap_va; +}; + +#define KBASE_IOCTL_CS_TILER_HEAP_TERM \ + _IOW(KBASE_IOCTL_TYPE, 49, struct kbase_ioctl_cs_tiler_heap_term) + +/** + * union kbase_ioctl_cs_get_glb_iface - Request the global control block + * of CSF interface capabilities + * + * @in: Input parameters + * @in.max_group_num: The maximum number of groups to be read. Can be 0, in + * which case groups_ptr is unused. + * @in.max_total_stream _num: The maximum number of CSs to be read. Can be 0, in + * which case streams_ptr is unused. + * @in.groups_ptr: Pointer where to store all the group data (sequentially). + * @in.streams_ptr: Pointer where to store all the CS data (sequentially). + * @out: Output parameters + * @out.glb_version: Global interface version. + * @out.features: Bit mask of features (e.g. whether certain types of job + * can be suspended). + * @out.group_num: Number of CSGs supported. + * @out.prfcnt_size: Size of CSF performance counters, in bytes. Bits 31:16 + * hold the size of firmware performance counter data + * and 15:0 hold the size of hardware performance counter + * data. + * @out.total_stream_num: Total number of CSs, summed across all groups. + * @out.instr_features: Instrumentation features. Bits 7:4 hold the maximum + * size of events. Bits 3:0 hold the offset update rate. + * (csf >= 1.1.0) + * + */ +union kbase_ioctl_cs_get_glb_iface { + struct { + __u32 max_group_num; + __u32 max_total_stream_num; + __u64 groups_ptr; + __u64 streams_ptr; + } in; + struct { + __u32 glb_version; + __u32 features; + __u32 group_num; + __u32 prfcnt_size; + __u32 total_stream_num; + __u32 instr_features; + } out; +}; + +#define KBASE_IOCTL_CS_GET_GLB_IFACE \ + _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface) + +struct kbase_ioctl_cs_cpu_queue_info { + __u64 buffer; + __u64 size; +}; + +#define KBASE_IOCTL_VERSION_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) + +#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \ + _IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info) + +/*************** + * test ioctls * + ***************/ +#if MALI_UNIT_TEST +/* These ioctls are purely for test purposes and are not used in the production + * driver, they therefore may change without notice + */ + +/** + * struct kbase_ioctl_cs_event_memory_write - Write an event memory address + * @cpu_addr: Memory address to write + * @value: Value to write + * @padding: Currently unused, must be zero + */ +struct kbase_ioctl_cs_event_memory_write { + __u64 cpu_addr; + __u8 value; + __u8 padding[7]; +}; + +/** + * union kbase_ioctl_cs_event_memory_read - Read an event memory address + * @in: Input parameters + * @in.cpu_addr: Memory address to read + * @out: Output parameters + * @out.value: Value read + * @out.padding: Currently unused, must be zero + */ +union kbase_ioctl_cs_event_memory_read { + struct { + __u64 cpu_addr; + } in; + struct { + __u8 value; + __u8 padding[7]; + } out; +}; + +#endif /* MALI_UNIT_TEST */ + +#endif /* _UAPI_KBASE_CSF_IOCTL_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h new file mode 100644 index 0000000..2041739 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ +#define _UAPI_KBASE_GPU_REGMAP_CSF_H_ + +#include + +#if !MALI_USE_CSF && defined(__KERNEL__) +#error "Cannot be compiled with JM" +#endif + +/* IPA control registers */ + +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE+(r)) +#define COMMAND 0x000 /* (WO) Command register */ +#define STATUS 0x004 /* (RO) Status register */ +#define TIMER 0x008 /* (RW) Timer control register */ + +#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ +#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ +#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ +#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ +#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ +#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ +#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ +#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ + +/* Accumulated counter values for CS hardware */ +#define VALUE_CSHW_BASE 0x100 +#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +/* Accumulated counter values for memory system */ +#define VALUE_MEMSYS_BASE 0x140 +#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#define VALUE_TILER_BASE 0x180 +#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#define VALUE_SHADER_BASE 0x1C0 +#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ + +#include "../../csf/mali_gpu_csf_control_registers.h" + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull +/* Set to shared memory, that is inner cacheable on ACE and inner or outer + * shared, otherwise inner non-cacheable. + * Outer cacheable if inner or outer shared, otherwise outer non-cacheable. + */ +#define AS_MEMATTR_AARCH64_SHARED 0x8ull + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 +/* Normal memory, shared between MCU and Host */ +#define AS_MEMATTR_INDEX_SHARED 6 + +/* Configuration bits for the CSF. */ +#define CSF_CONFIG 0xF00 + +/* CSF_CONFIG register */ +#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2 + +/* GPU control registers */ +#define CORE_FEATURES 0x008 /* () Shader Core Features */ +#define MCU_CONTROL 0x700 +#define MCU_STATUS 0x704 + +#define MCU_CNTRL_ENABLE (1 << 0) +#define MCU_CNTRL_AUTO (1 << 1) +#define MCU_CNTRL_DISABLE (0) + +#define MCU_STATUS_HALTED (1 << 1) + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter + * configuration + */ + +#define PRFCNT_CSHW_EN 0x06C /* (RW) Performance counter + * enable for CS Hardware + */ + +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores + */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ + +/* JOB IRQ flags */ +#define JOB_IRQ_GLOBAL_IF (1 << 31) /* Global interface interrupt received */ + +/* GPU_COMMAND codes */ +#define GPU_COMMAND_CODE_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_CODE_RESET 0x01 /* Reset the GPU */ +#define GPU_COMMAND_CODE_PRFCNT 0x02 /* Clear or sample performance counters */ +#define GPU_COMMAND_CODE_TIME 0x03 /* Configure time sources */ +#define GPU_COMMAND_CODE_FLUSH_CACHES 0x04 /* Flush caches */ +#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */ +#define GPU_COMMAND_CODE_FINISH_HALT 0x06 /* Halt CSF */ +#define GPU_COMMAND_CODE_CLEAR_FAULT 0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */ + +/* GPU_COMMAND_RESET payloads */ + +/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state. + * Power domains will remain powered on. + */ +#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00 + +/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and + * idle state. + */ +#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01 + +/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave + * the system bus in an inconsistent state. Use only as a last resort when nothing else works. + */ +#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02 + +/* GPU_COMMAND_PRFCNT payloads */ +#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */ +#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR 0x02 /* Clear performance counters */ + +/* GPU_COMMAND_TIME payloads */ +#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */ +#define GPU_COMMAND_TIME_ENABLE 0x01 /* Enable cycle counter */ + +/* GPU_COMMAND_FLUSH_CACHES payloads */ +#define GPU_COMMAND_FLUSH_PAYLOAD_NONE 0x00 /* No flush */ +#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN 0x01 /* Clean the caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_INVALIDATE 0x02 /* Invalidate the caches */ +#define GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE 0x03 /* Clean and invalidate the caches */ + +/* GPU_COMMAND command + payload */ +#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \ + ((__u32)opcode | ((__u32)payload << 8)) + +/* Final GPU_COMMAND form */ +/* No operation, nothing happens */ +#define GPU_COMMAND_NOP \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0) + +/* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_SOFT_RESET \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET) + +/* Immediately reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET) + +/* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_CLEAR \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR) + +/* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_PRFCNT_SAMPLE \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE) + +/* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_START \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE) + +/* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE) + +/* Clean all caches */ +#define GPU_COMMAND_CLEAN_CACHES \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN) + +/* Clean and invalidate all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FLUSH_CACHES, GPU_COMMAND_FLUSH_PAYLOAD_CLEAN_INVALIDATE) + +/* Places the GPU in protected mode */ +#define GPU_COMMAND_SET_PROTECTED_MODE \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0) + +/* Halt CSF */ +#define GPU_COMMAND_FINISH_HALT \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0) + +/* Clear GPU faults */ +#define GPU_COMMAND_CLEAR_FAULT \ + GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0) + +/* End Command Values */ + +/* GPU_FAULTSTATUS register */ +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul) +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \ + >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ + (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 +#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ + (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) + +#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 +#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ + (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) + +#define GPU_FAULTSTATUS_JASID_SHIFT 12 +#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) +#define GPU_FAULTSTATUS_JASID_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT) +#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \ + (((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) | \ + (((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK)) + +#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define GPU_FAULTSTATUS_SOURCE_ID_MASK \ + (0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT) +/* End GPU_FAULTSTATUS register */ + +/* GPU_FAULTSTATUS_ACCESS_TYPE values */ +#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0 +#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1 +#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2 +#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3 +/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */ + +/* Implementation-dependent exception codes used to indicate CSG + * and CS errors that are not specified in the specs. + */ +#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70) +#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71) +#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72) + +/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */ +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89 +#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A +/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */ + +#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) +#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \ + (((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \ + (((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) | \ + (((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK)) + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define GPU_PROTECTED_FAULT (1 << 1) /* A GPU fault has occurred in protected mode */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ +#define DOORBELL_MIRROR (1 << 18) /* Mirrors the doorbell interrupt line to the CPU */ +#define MCU_STATUS_GPU_IRQ (1 << 19) /* MCU requires attention */ + +/* + * In Debug build, + * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ + * by writing it onto GPU_IRQ_CLEAR/MASK registers. + * + * In Release build, + * GPU_IRQ_REG_COMMON is used. + * + * Note: + * CLEAN_CACHES_COMPLETED - Used separately for cache operation. + * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON + * as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \ + | POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ) + +/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */ +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when performance count sample has completed */ + +#endif /* _UAPI_KBASE_GPU_REGMAP_CSF_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h new file mode 100644 index 0000000..1be3541 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -0,0 +1,287 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_ +#define _UAPI_KBASE_GPU_REGMAP_JM_H_ + +#if MALI_USE_CSF && defined(__KERNEL__) +#error "Cannot be compiled with CSF" +#endif + +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull +/* Set to inner non-cacheable, outer-non-cacheable + * Setting defined by the alloc bits is ignored, but set to a valid encoding: + * - no-alloc on read + * - no alloc on write + */ +#define AS_MEMATTR_AARCH64_NON_CACHEABLE 0x4Cull + +/* Symbols for default MEMATTR to use + * Default is - HW implementation defined caching + */ +#define AS_MEMATTR_INDEX_DEFAULT 0 +#define AS_MEMATTR_INDEX_DEFAULT_ACE 3 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 +/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */ +#define AS_MEMATTR_INDEX_NON_CACHEABLE 5 + +/* GPU control registers */ + +#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest + * clean-and-invalidate operation + */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter + * configuration + */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable + * flags for Job Manager + */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores + */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define JM_CONFIG 0xF00 /* (RW) Job manager configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +/* (RO) Extended affinity mask for job slot n*/ +#define JS_XAFFINITY 0x1C + +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +/* (RW) Next extended affinity mask for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C + +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ + +/* No JM-specific MMU control registers */ +/* No JM-specific MMU address space control registers */ + +/* JS_COMMAND register commands */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU (1u << 10) +#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION +#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) +#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JS_XAFFINITY register values */ +#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) +#define JS_XAFFINITY_TILER_ENABLE (1u << 8) +#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) + +/* JS_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status instead of a particular fault */ +#define JS_STATUS_NO_EXCEPTION_BASE 0x00 +#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JS_STATUS_FAULT_BASE 0x40 +#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JS_STATUS_MEMORY_FAULT_BASE 0x60 +#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* JS_FEATURES register */ +#define JS_FEATURE_NULL_JOB (1u << 1) +#define JS_FEATURE_SET_VALUE_JOB (1u << 2) +#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JS_FEATURE_COMPUTE_JOB (1u << 4) +#define JS_FEATURE_VERTEX_JOB (1u << 5) +#define JS_FEATURE_GEOMETRY_JOB (1u << 6) +#define JS_FEATURE_TILER_JOB (1u << 7) +#define JS_FEATURE_FUSED_JOB (1u << 8) +#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + +/* JM_CONFIG register */ +#define JM_TIMESTAMP_OVERRIDE (1ul << 0) +#define JM_CLOCK_GATE_OVERRIDE (1ul << 1) +#define JM_JOB_THROTTLE_ENABLE (1ul << 2) +#define JM_JOB_THROTTLE_LIMIT_SHIFT (3) +#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F) +#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2) + +/* GPU_COMMAND values */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ +#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down. */ +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +/* + * In Debug build, + * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ + * by writing it onto GPU_IRQ_CLEAR/MASK registers. + * + * In Release build, + * GPU_IRQ_REG_COMMON is used. + * + * Note: + * CLEAN_CACHES_COMPLETED - Used separately for cache operation. + */ +#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + +#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h new file mode 100644 index 0000000..83d8413 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_COHERENCY_H_ +#define _UAPI_KBASE_GPU_COHERENCY_H_ + +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 +#define COHERENCY_FEATURE_BIT(x) (1 << (x)) + +#endif /* _UAPI_KBASE_GPU_COHERENCY_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h new file mode 100644 index 0000000..d093ce4 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_ID_H_ +#define _UAPI_KBASE_GPU_ID_H_ + +#include + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xFu << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFFu << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xFu << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +#define GPU_ID2_VERSION_STATUS_SHIFT 0 +#define GPU_ID2_VERSION_MINOR_SHIFT 4 +#define GPU_ID2_VERSION_MAJOR_SHIFT 12 +#define GPU_ID2_PRODUCT_MAJOR_SHIFT 16 +#define GPU_ID2_ARCH_REV_SHIFT 20 +#define GPU_ID2_ARCH_MINOR_SHIFT 24 +#define GPU_ID2_ARCH_MAJOR_SHIFT 28 +#define GPU_ID2_VERSION_STATUS (0xFu << GPU_ID2_VERSION_STATUS_SHIFT) +#define GPU_ID2_VERSION_MINOR (0xFFu << GPU_ID2_VERSION_MINOR_SHIFT) +#define GPU_ID2_VERSION_MAJOR (0xFu << GPU_ID2_VERSION_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MAJOR (0xFu << GPU_ID2_PRODUCT_MAJOR_SHIFT) +#define GPU_ID2_ARCH_REV (0xFu << GPU_ID2_ARCH_REV_SHIFT) +#define GPU_ID2_ARCH_MINOR (0xFu << GPU_ID2_ARCH_MINOR_SHIFT) +#define GPU_ID2_ARCH_MAJOR (0xFu << GPU_ID2_ARCH_MAJOR_SHIFT) +#define GPU_ID2_PRODUCT_MODEL (GPU_ID2_ARCH_MAJOR | GPU_ID2_PRODUCT_MAJOR) +#define GPU_ID2_VERSION (GPU_ID2_VERSION_MAJOR | \ + GPU_ID2_VERSION_MINOR | \ + GPU_ID2_VERSION_STATUS) + +/* Helper macro to create a partial GPU_ID (new format) that defines + * a product ignoring its version. + */ +#define GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, product_major) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)arch_minor) << GPU_ID2_ARCH_MINOR_SHIFT) | \ + (((__u32)arch_rev) << GPU_ID2_ARCH_REV_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Helper macro to create a partial GPU_ID (new format) that specifies the + * revision (major, minor, status) of a product + */ +#define GPU_ID2_VERSION_MAKE(version_major, version_minor, version_status) \ + ((((__u32)version_major) << GPU_ID2_VERSION_MAJOR_SHIFT) | \ + (((__u32)version_minor) << GPU_ID2_VERSION_MINOR_SHIFT) | \ + (((__u32)version_status) << GPU_ID2_VERSION_STATUS_SHIFT)) + +/* Helper macro to create a complete GPU_ID (new format) */ +#define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ + version_major, version_minor, version_status) \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ + product_major) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + version_status)) + +/* Helper macro to create a partial GPU_ID (new format) that identifies + * a particular GPU model by its arch_major and product_major. + */ +#define GPU_ID2_MODEL_MAKE(arch_major, product_major) \ + ((((__u32)arch_major) << GPU_ID2_ARCH_MAJOR_SHIFT) | \ + (((__u32)product_major) << GPU_ID2_PRODUCT_MAJOR_SHIFT)) + +/* Strip off the non-relevant bits from a product_id value and make it suitable + * for comparison against the GPU_ID2_PRODUCT_xxx values which identify a GPU + * model. + */ +#define GPU_ID2_MODEL_MATCH_VALUE(product_id) \ + ((((__u32)product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ + GPU_ID2_PRODUCT_MODEL) + +#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) +#define GPU_ID2_PRODUCT_THEX GPU_ID2_MODEL_MAKE(6, 1) +#define GPU_ID2_PRODUCT_TSIX GPU_ID2_MODEL_MAKE(7, 0) +#define GPU_ID2_PRODUCT_TDVX GPU_ID2_MODEL_MAKE(7, 3) +#define GPU_ID2_PRODUCT_TNOX GPU_ID2_MODEL_MAKE(7, 1) +#define GPU_ID2_PRODUCT_TGOX GPU_ID2_MODEL_MAKE(7, 2) +#define GPU_ID2_PRODUCT_TTRX GPU_ID2_MODEL_MAKE(9, 0) +#define GPU_ID2_PRODUCT_TNAX GPU_ID2_MODEL_MAKE(9, 1) +#define GPU_ID2_PRODUCT_TBEX GPU_ID2_MODEL_MAKE(9, 2) +#define GPU_ID2_PRODUCT_LBEX GPU_ID2_MODEL_MAKE(9, 4) +#define GPU_ID2_PRODUCT_TBAX GPU_ID2_MODEL_MAKE(9, 5) +#define GPU_ID2_PRODUCT_TDUX GPU_ID2_MODEL_MAKE(10, 1) +#define GPU_ID2_PRODUCT_TODX GPU_ID2_MODEL_MAKE(10, 2) +#define GPU_ID2_PRODUCT_TGRX GPU_ID2_MODEL_MAKE(10, 3) +#define GPU_ID2_PRODUCT_TVAX GPU_ID2_MODEL_MAKE(10, 4) +#define GPU_ID2_PRODUCT_LODX GPU_ID2_MODEL_MAKE(10, 7) + +/* Helper macro to create a GPU_ID assuming valid values for id, major, + * minor, status + */ +#define GPU_ID_MAKE(id, major, minor, status) \ + ((((__u32)id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + (((__u32)major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + (((__u32)minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + (((__u32)status) << GPU_ID_VERSION_STATUS_SHIFT)) + +#endif /* _UAPI_KBASE_GPU_ID_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h new file mode 100644 index 0000000..84fad8d --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -0,0 +1,434 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_GPU_REGMAP_H_ +#define _UAPI_KBASE_GPU_REGMAP_H_ + +#include "mali_kbase_gpu_coherency.h" +#include "mali_kbase_gpu_id.h" +#if MALI_USE_CSF +#include "backend/mali_kbase_gpu_regmap_csf.h" +#else +#include "backend/mali_kbase_gpu_regmap_jm.h" +#endif + +/* Begin Register Offsets */ +/* GPU control registers */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + +#define GPU_DBGEN (1 << 8) /* DBGEN wire status */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ +#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core + * supergroup are l2 coherent + */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ +#define GPU_FEATURES_LO 0x060 /* (RO) GPU features, low word */ +#define GPU_FEATURES_HI 0x064 /* (RO) GPU features, high word */ +#define PRFCNT_FEATURES 0x068 /* (RO) Performance counter features */ +#define TIMESTAMP_OFFSET_LO 0x088 /* (RW) Global time stamp offset, low word */ +#define TIMESTAMP_OFFSET_HI 0x08C /* (RW) Global time stamp offset, high word */ +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ +#define THREAD_TLS_ALLOC 0x310 /* (RO) Number of threads per core that TLS must be allocated for */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ +#define TEXTURE_FEATURES_3 0x0BC /* (RO) Support flags for texture order */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + +#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ +#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ +#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ +#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define STACK_PWROFF_LO 0xE30 /* (RO) Core stack power off bitmap, low word */ +#define STACK_PWROFF_HI 0xE34 /* (RO) Core stack power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define ASN_HASH_0 0x02C0 /* (RW) ASN hash function argument 0 */ +#define ASN_HASH(n) (ASN_HASH_0 + (n)*4) +#define ASN_HASH_COUNT 3 + +#define STACK_PWRTRANS_LO 0xE40 /* (RO) Core stack power transition bitmap, low word */ +#define STACK_PWRTRANS_HI 0xE44 /* (RO) Core stack power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + +#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ +#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration (implementation-specific) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration (implementation-specific) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) L2 cache and MMU configuration (implementation-specific) */ + +/* Job control registers */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ + +/* MMU control registers */ + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +/* MMU address space control registers */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + +/* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_LO 0x30 +/* (RW) Translation table configuration for address space n, high word */ +#define AS_TRANSCFG_HI 0x34 +/* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_LO 0x38 +/* (RO) Secondary fault address for address space n, high word */ +#define AS_FAULTEXTRA_HI 0x3C + +/* End Register Offsets */ + +#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON) + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers + */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin AARCH64 MMU TRANSTAB register values + */ +#define MMU_HW_OUTA_BITS 40 +#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) + +#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0 +#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT) +#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0 + +#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8 +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) +#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) + +#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) + +#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16 +#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT) +#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \ + (((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT) + +#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0) +#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK \ + ((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) +#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val) \ + (((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >> \ + PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT) + +/* + * Begin MMU TRANSCFG register values + */ +#define AS_TRANSCFG_ADRMODE_LEGACY 0 +#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 +#define AS_TRANSCFG_ADRMODE_IDENTITY 2 +#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 +#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 + +#define AS_TRANSCFG_ADRMODE_MASK 0xF + +/* + * Begin TRANSCFG register values + */ +#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24) +#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24) + +#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28)) +#define AS_TRANSCFG_PTW_SH_OS (2ull << 28) +#define AS_TRANSCFG_PTW_SH_IS (3ull << 28) +#define AS_TRANSCFG_R_ALLOCATE (1ull << 30) + +/* + * Begin Command Values + */ + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +/* Flush all L2 caches then issue a flush region command to all MMUs + * (deprecated - only for use with T60x) + */ +#define AS_COMMAND_FLUSH 0x04 +/* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_PT 0x04 +/* Wait for memory accesses to complete, flush all the L1s cache then flush all + * L2 caches then issue a flush region command to all MMUs + */ +#define AS_COMMAND_FLUSH_MEM 0x05 + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ +#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ + +/* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_OFF 0 +/* The performance counters are enabled, but are only written out when a + * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. + */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 +/* The performance counters are enabled, and are written out each time a tile + * finishes rendering. + */ +#define PRFCNT_CONFIG_MODE_TILE 2 + +/* AS_MEMATTR values from MMU_MEMATTR_STAGE1: */ +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_WRITE_ALLOC 0x8Dull + +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull +/* There is no LPAE support for non-cacheable, since the memory type is always + * write-back. + * Marking this setting as reserved for LPAE + */ +#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) +#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) + +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* SHADER_CONFIG register */ +#define SC_LS_ALLOW_ATTR_TYPES (1ul << 16) +#define SC_TLS_HASH_ENABLE (1ul << 17) +#define SC_LS_ATTR_CHECK_DISABLE (1ul << 18) +#define SC_VAR_ALGORITHM (1ul << 29) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) +/* End TILER_CONFIG register */ + +/* L2_CONFIG register */ +#define L2_CONFIG_SIZE_SHIFT 16 +#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT) +#define L2_CONFIG_HASH_SHIFT 24 +#define L2_CONFIG_HASH_MASK (0xFFul << L2_CONFIG_HASH_SHIFT) +#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT 24 +#define L2_CONFIG_ASN_HASH_ENABLE_MASK (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT) +/* End L2_CONFIG register */ + +/* IDVS_GROUP register */ +#define IDVS_GROUP_SIZE_SHIFT (16) +#define IDVS_GROUP_MAX_SIZE (0x3F) + +#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h new file mode 100644 index 0000000..749e1fa --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h @@ -0,0 +1,1200 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_JM_KERNEL_H_ +#define _UAPI_BASE_JM_KERNEL_H_ + +#include + +/* Memory allocation, access/hint flags. + * + * See base_mem_alloc_flags. + */ + +/* IN */ +/* Read access CPU side + */ +#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0) + +/* Write access CPU side + */ +#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1) + +/* Read access GPU side + */ +#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2) + +/* Write access GPU side + */ +#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3) + +/* Execute allowed on the GPU side + */ +#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4) + +/* Will be permanently mapped in kernel space. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5) + +/* The allocation will completely reside within the same 4GB chunk in the GPU + * virtual space. + * Since this flag is primarily required only for the TLS memory which will + * not be used to contain executable code and also not used for Tiler heap, + * it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags. + */ +#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6) + +/* Userspace is not allowed to free this memory. + * Flag is only allowed on allocations originating from kbase. + */ +#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7) + +#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8) + +/* Grow backing store on GPU Page Fault + */ +#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9) + +/* Page coherence Outer shareable, if available + */ +#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10) + +/* Page coherence Inner shareable + */ +#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11) + +/* IN/OUT */ +/* Should be cached on the CPU, returned if actually cached + */ +#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12) + +/* IN/OUT */ +/* Must have same VA on both the GPU and the CPU + */ +#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13) + +/* OUT */ +/* Must call mmap to acquire a GPU address for the allocation + */ +#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14) + +/* IN */ +/* Page coherence Outer shareable, required. + */ +#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15) + +/* Protected memory + */ +#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16) + +/* Not needed physical memory + */ +#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17) + +/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the + * addresses to be the same + */ +#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18) + +/** + * Bit 19 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19) + +/** + * Memory starting from the end of the initial commit is aligned to 'extension' + * pages, where 'extension' must be a power of 2 and no more than + * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES + */ +#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20) + +/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu + * mode. Some components within the GPU might only be able to access memory + * that is GPU cacheable. Refer to the specific GPU implementation for more + * details. The 3 shareability flags will be ignored for GPU uncached memory. + * If used while importing USER_BUFFER type memory, then the import will fail + * if the memory is not aligned to GPU and CPU cache line width. + */ +#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21) + +/* + * Bits [22:25] for group_id (0~15). + * + * base_mem_group_id_set() should be used to pack a memory group ID into a + * base_mem_alloc_flags value instead of accessing the bits directly. + * base_mem_group_id_get() should be used to extract the memory group ID from + * a base_mem_alloc_flags value. + */ +#define BASEP_MEM_GROUP_ID_SHIFT 22 +#define BASE_MEM_GROUP_ID_MASK \ + ((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT) + +/* Must do CPU cache maintenance when imported memory is mapped/unmapped + * on GPU. Currently applicable to dma-buf type only. + */ +#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26) + +/* Use the GPU VA chosen by the kernel client */ +#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27) + +/* OUT */ +/* Kernel side cache sync ops required */ +#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28) + +/* Force trimming of JIT allocations when creating a new allocation */ +#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29) + +/* Number of bits used as flags for base memory management + * + * Must be kept in sync with the base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 30 + +/* A mask of all the flags which are only valid for allocations within kbase, + * and may not be passed from user space. + */ +#define BASEP_MEM_FLAGS_KERNEL_ONLY \ + (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \ + BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM) + +/* A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/* A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + +/* A mask of all currently reserved flags + */ +#define BASE_MEM_FLAGS_RESERVED \ + (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19) + +#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-47< for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the + * initial commit is aligned to 'extension' pages, where 'extension' must be a power + * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES + */ +#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0) + +/** + * If set, the heap info address points to a __u32 holding the used size in bytes; + * otherwise it points to a __u64 holding the lowest address of unused memory. + */ +#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1) + +/** + * Valid set of just-in-time memory allocation flags + * + * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr + * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set + * and heap_info_gpu_addr being 0 will be rejected). + */ +#define BASE_JIT_ALLOC_VALID_FLAGS \ + (BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) + +/** + * typedef base_context_create_flags - Flags to pass to ::base_context_init. + * + * Flags can be ORed together to enable multiple things. + * + * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * not collide with them. + */ +typedef __u32 base_context_create_flags; + +/* No flags set */ +#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0) + +/* Base context is embedded in a cctx object (flag used for CINSTR + * software counter macros) + */ +#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0) + +/* Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. + */ +#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \ + ((base_context_create_flags)1 << 1) + +/* Bit-shift used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3) + +/* Bitmask used to encode a memory group ID in base_context_create_flags + */ +#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \ + ((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT) + +/* Bitpattern describing the base_context_create_flags that can be + * passed to the kernel + */ +#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \ + (BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \ + BASEP_CONTEXT_MMU_GROUP_ID_MASK) + +/* Bitpattern describing the ::base_context_create_flags that can be + * passed to base_context_init() + */ +#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \ + (BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS) + +/* + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as base_context_create_flags, and so must + * not collide with them. + */ + +/* Private flag tracking whether job descriptor dumping is disabled */ +#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \ + ((base_context_create_flags)(1 << 31)) + +/* Enable additional tracepoints for latency measurements (TL_ATOM_READY, + * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) + */ +#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0) + +/* Indicate that job dumping is enabled. This could affect certain timers + * to account for the performance impact. + */ +#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1) + +#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \ + BASE_TLSTREAM_JOB_DUMPING_ENABLED) +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +/* Maximum number of concurrent render passes. + */ +#define BASE_JD_RP_COUNT (256) + +/* Set/reset values for a software event */ +#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) +#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) + +/** + * struct base_jd_udata - Per-job data + * + * This structure is used to store per-job data, and is completely unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + * + * @blob: per-job data array + */ +struct base_jd_udata { + __u64 blob[2]; +}; + +/** + * typedef base_jd_dep_type - Job dependency type. + * + * A flags field will be inserted into the atom structure to specify whether a + * dependency is a data or ordering dependency (by putting it before/after + * 'core_req' in the structure it should be possible to add without changing + * the structure size). + * When the flag is set for a particular dependency to signal that it is an + * ordering only dependency then errors will not be propagated. + */ +typedef __u8 base_jd_dep_type; + +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** + * typedef base_jd_core_req - Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef __u32 base_jd_core_req; + +/* Requirements that come from the HW */ + +/* No requirement, dependency only + */ +#define BASE_JD_REQ_DEP ((base_jd_core_req)0) + +/* Requires fragment shaders + */ +#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) + +/* Requires compute shaders + * + * This covers any of the following GPU job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) + +/* Requires tiling */ +#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) + +/* Requires cache flushes */ +#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) + +/* Requires value writeback */ +#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) + +/* SW-only requirements - the HW does not expose these as part of the job slot + * capabilities + */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) + +/* SW-only requirement: coalesce completion events. + * If this bit is set then completion of this atom will not cause an event to + * be sent to userspace, whether successful or not; completion events will be + * deferred until an atom completes which does not have this bit set. + * + * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. + */ +#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) + +/* SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) + +/* SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ +#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) + +/* SW Only requirement: External resources are referenced by this atom. + * + * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and + * BASE_JD_REQ_SOFT_EVENT_WAIT. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) + +/* SW Only requirement: Software defined job. Jobs with this bit set will not be + * submitted to the hardware but will cause some action to happen within the + * driver + */ +#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/* 0x4 RESERVED for now */ + +/* SW only requirement: event wait/trigger job. + * + * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. + * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the + * other waiting jobs. It completes immediately. + * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it + * possible for other jobs to wait upon. It completes immediately. + */ +#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) +#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) +#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) + +#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) + +/* SW only requirement: Just In Time allocation + * + * This job requests a single or multiple just-in-time allocations through a + * list of base_jit_alloc_info structure which is passed via the jc element of + * the atom. The number of base_jit_alloc_info structures present in the + * list is passed via the nr_extres element of the atom + * + * It should be noted that the id entry in base_jit_alloc_info must not + * be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE. + * + * Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE + * soft job to free the JIT allocation is still made. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) + +/* SW only requirement: Just In Time free + * + * This job requests a single or multiple just-in-time allocations created by + * BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time + * allocations is passed via the jc element of the atom. + * + * The job will complete immediately. + */ +#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) + +/* SW only requirement: Map external resource + * + * This job requests external resource(s) are mapped once the dependencies + * of the job have been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) + +/* SW only requirement: Unmap external resource + * + * This job requests external resource(s) are unmapped once the dependencies + * of the job has been satisfied. The list of external resources are + * passed via the jc element of the atom which is a pointer to a + * base_external_resource_list. + */ +#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) + +/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains GPU jobs of the 'Compute + * Shaders' type. + * + * In contrast to BASE_JD_REQ_CS, this does not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + */ +#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) + +/* HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag + * takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned + * off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) + +/* SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) + +/* SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) + +/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job starts which does not have this bit set or a job completes + * which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use + * if the CPU may have written to memory addressed by the job since the last job + * without this bit set was submitted. + */ +#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) + +/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. + * + * If this bit is set then the GPU's cache will not be cleaned and invalidated + * until a GPU job completes which does not have this bit set or a job starts + * which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use + * if the CPU may read from or partially overwrite memory addressed by the job + * before the next job without this bit set completes. + */ +#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) + +/* Request the atom be executed on a specific job slot. + * + * When this flag is specified, it takes precedence over any existing job slot + * selection logic. + */ +#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17) + +/* SW-only requirement: The atom is the start of a renderpass. + * + * If this bit is set then the job chain will be soft-stopped if it causes the + * GPU to write beyond the end of the physical pages backing the tiler heap, and + * committing more memory to the heap would exceed an internal threshold. It may + * be resumed after running one of the job chains attached to an atom with + * BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be + * resumed multiple times until it completes without memory usage exceeding the + * threshold. + * + * Usually used with BASE_JD_REQ_T. + */ +#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18) + +/* SW-only requirement: The atom is the end of a renderpass. + * + * If this bit is set then the atom incorporates the CPU address of a + * base_jd_fragment object instead of the GPU address of a job chain. + * + * Which job chain is run depends upon whether the atom with the same renderpass + * ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or + * was soft-stopped when it exceeded an upper threshold for tiler heap memory + * usage. + * + * It also depends upon whether one of the job chains attached to the atom has + * already been run as part of the same renderpass (in which case it would have + * written unresolved multisampled and otherwise-discarded output to temporary + * buffers that need to be read back). The job chain for doing a forced read and + * forced write (from/to temporary buffers) is run as many times as necessary. + * + * Usually used with BASE_JD_REQ_FS. + */ +#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19) + +/* SW-only requirement: The atom needs to run on a limited core mask affinity. + * + * If this bit is set then the kbase_context.limited_core_mask will be applied + * to the affinity. + */ +#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20) + +/* These requirement bits are currently unused in base_jd_core_req + */ +#define BASEP_JD_REQ_RESERVED \ + (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ + BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ + BASE_JD_REQ_EVENT_COALESCE | \ + BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ + BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ + BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \ + BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \ + BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK)) + +/* Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASE_JD_REQ_ATOM_TYPE \ + (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ + BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) + +/** + * Mask of all bits in base_jd_core_req that control the type of a soft job. + */ +#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) + +/* Returns non-zero value if core requirements passed define a soft job or + * a dependency only job. + */ +#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \ + (((core_req) & BASE_JD_REQ_SOFT_JOB) || \ + ((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) + +/** + * enum kbase_jd_atom_state + * + * @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used. + * @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD. + * @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running). + * @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet + * handed back to job dispatcher for + * dependency resolution. + * @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed + * back to userspace. + */ +enum kbase_jd_atom_state { + KBASE_JD_ATOM_STATE_UNUSED, + KBASE_JD_ATOM_STATE_QUEUED, + KBASE_JD_ATOM_STATE_IN_JS, + KBASE_JD_ATOM_STATE_HW_COMPLETED, + KBASE_JD_ATOM_STATE_COMPLETED +}; + +/** + * typedef base_atom_id - Type big enough to store an atom number in. + */ +typedef __u8 base_atom_id; + +/** + * struct base_dependency - + * + * @atom_id: An atom number + * @dependency_type: Dependency type + */ +struct base_dependency { + base_atom_id atom_id; + base_jd_dep_type dependency_type; +}; + +/** + * struct base_jd_fragment - Set of GPU fragment job chains used for rendering. + * + * @norm_read_norm_write: Job chain for full rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not exceed + * its memory usage threshold and no fragment job chain + * was previously run for the same renderpass. + * It is used no more than once per renderpass. + * @norm_read_forced_write: Job chain for starting incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain exceeded + * its memory usage threshold for the first time and + * no fragment job chain was previously run for the + * same renderpass. + * Writes unresolved multisampled and normally- + * discarded output to temporary buffers that must be + * read back by a subsequent forced_read job chain + * before the renderpass is complete. + * It is used no more than once per renderpass. + * @forced_read_forced_write: Job chain for continuing incremental + * rendering. + * GPU address of a fragment job chain to render in + * the circumstance where the tiler job chain + * exceeded its memory usage threshold again + * and a fragment job chain was previously run for + * the same renderpass. + * Reads unresolved multisampled and + * normally-discarded output from temporary buffers + * written by a previous forced_write job chain and + * writes the same to temporary buffers again. + * It is used as many times as required until + * rendering completes. + * @forced_read_norm_write: Job chain for ending incremental rendering. + * GPU address of a fragment job chain to render in the + * circumstance where the tiler job chain did not + * exceed its memory usage threshold this time and a + * fragment job chain was previously run for the same + * renderpass. + * Reads unresolved multisampled and normally-discarded + * output from temporary buffers written by a previous + * forced_write job chain in order to complete a + * renderpass. + * It is used no more than once per renderpass. + * + * This structure is referenced by the main atom structure if + * BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req. + */ +struct base_jd_fragment { + __u64 norm_read_norm_write; + __u64 norm_read_forced_write; + __u64 forced_read_forced_write; + __u64 forced_read_norm_write; +}; + +/** + * typedef base_jd_prio - Base Atom priority. + * + * Only certain priority levels are actually implemented, as specified by the + * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority + * level that is not one of those defined below. + * + * Priority levels only affect scheduling after the atoms have had dependencies + * resolved. For example, a low priority atom that has had its dependencies + * resolved might run before a higher priority atom that has not had its + * dependencies resolved. + * + * In general, fragment atoms do not affect non-fragment atoms with + * lower priorities, and vice versa. One exception is that there is only one + * priority value for each context. So a high-priority (e.g.) fragment atom + * could increase its context priority, causing its non-fragment atoms to also + * be scheduled sooner. + * + * The atoms are scheduled as follows with respect to their priorities: + * * Let atoms 'X' and 'Y' be for the same job slot who have dependencies + * resolved, and atom 'X' has a higher priority than atom 'Y' + * * If atom 'Y' is currently running on the HW, then it is interrupted to + * allow atom 'X' to run soon after + * * If instead neither atom 'Y' nor atom 'X' are running, then when choosing + * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' + * * Any two atoms that have the same priority could run in any order with + * respect to each other. That is, there is no ordering constraint between + * atoms of the same priority. + * + * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are + * scheduled between contexts. The default value, 0, will cause higher-priority + * atoms to be scheduled first, regardless of their context. The value 1 will + * use a round-robin algorithm when deciding which context's atoms to schedule + * next, so higher-priority atoms can only preempt lower priority atoms within + * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and + * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details. + */ +typedef __u8 base_jd_prio; + +/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and + * BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +/* Low atom priority. */ +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) +/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH, + * BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW + */ +#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3) + +/* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting + */ +#define BASE_JD_NR_PRIO_LEVELS 4 + +/** + * struct base_jd_atom_v2 - Node of a dependency graph used to submit a + * GPU job chain or soft-job to the kernel driver. + * + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + * + * This structure has changed since UK 10.2 for which base_jd_core_req was a + * __u16 value. + * + * In UK 10.3 a core_req field of a __u32 type was added to the end of the + * structure, and the place in the structure previously occupied by __u16 + * core_req was kept but renamed to compat_core_req. + * + * From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2]. + * Compatibility with UK 10.x from UK 11.y is not handled because + * the major version increase prevents this. + * + * For UK 11.20 jit_id[2] must be initialized to zero. + */ +struct base_jd_atom_v2 { + __u64 jc; + struct base_jd_udata udata; + __u64 extres_list; + __u16 nr_extres; + __u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + __u8 device_nr; + __u8 jobslot; + base_jd_core_req core_req; + __u8 renderpass_id; + __u8 padding[7]; +}; + +/** + * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr + * at the beginning. + * + * @seq_nr: Sequence number of logical grouping of atoms. + * @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS + * is set in the base_jd_core_req) the CPU address of a + * base_jd_fragment object. + * @udata: User data. + * @extres_list: List of external resources. + * @nr_extres: Number of external resources or JIT allocations. + * @jit_id: Zero-terminated array of IDs of just-in-time memory + * allocations written to by the atom. When the atom + * completes, the value stored at the + * &struct_base_jit_alloc_info.heap_info_gpu_addr of + * each allocation is read in order to enforce an + * overall physical memory usage limit. + * @pre_dep: Pre-dependencies. One need to use SETTER function to assign + * this field; this is done in order to reduce possibility of + * improper assignment of a dependency field. + * @atom_number: Unique number to identify the atom. + * @prio: Atom priority. Refer to base_jd_prio for more details. + * @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP + * specified. + * @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified. + * @core_req: Core requirements. + * @renderpass_id: Renderpass identifier used to associate an atom that has + * BASE_JD_REQ_START_RENDERPASS set in its core requirements + * with an atom that has BASE_JD_REQ_END_RENDERPASS set. + * @padding: Unused. Must be zero. + */ +typedef struct base_jd_atom { + __u64 seq_nr; + __u64 jc; + struct base_jd_udata udata; + __u64 extres_list; + __u16 nr_extres; + __u8 jit_id[2]; + struct base_dependency pre_dep[2]; + base_atom_id atom_number; + base_jd_prio prio; + __u8 device_nr; + __u8 jobslot; + base_jd_core_req core_req; + __u8 renderpass_id; + __u8 padding[7]; +} base_jd_atom; + +/* Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */ + /* Event indicates success (SW events only) */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), + BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */ + /* Mask to extract the type from an event code */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) +}; + +/** + * enum base_jd_event_code - Job chain event codes + * + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status + * codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, because the + * job was hard-stopped. + * @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as + * 'previous job done'. + * @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes + * TERMINATED, DONE or JOB_CANCELLED. + * @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job + * was hard stopped. + * @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on + * complete/fail/cancel. + * @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes. + * Obscurely, BASE_JD_EVENT_TERMINATED + * indicates a real fault, + * because the job was hard-stopped. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and + * software error status codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status + * codes. + * @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes. + * Such codes are never returned to + * user-space. + * @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes. + * @BASE_JD_EVENT_DONE: atom has completed successfull + * @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which + * shall result in a failed atom + * @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the + * part of the memory system required to access + * job descriptors was not powered on + * @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job + * manager failed + * @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job + * manager failed + * @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the + * specified affinity mask does not intersect + * any available cores + * @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job + * @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program + * counter was executed. + * @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal + * encoding was executed. + * @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where + * the instruction encoding did not match the + * instruction type encoded in the program + * counter. + * @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that + * contained invalid combinations of operands. + * @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried + * to access the thread local storage section + * of another thread. + * @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that + * tried to do an unsupported unaligned memory + * access. + * @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that + * failed to complete an instruction barrier. + * @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job + * contains invalid combinations of data. + * @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to + * process a tile that is entirely outside the + * bounding box of the frame. + * @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address + * has been found that exceeds the virtual + * address range. + * @BASE_JD_EVENT_OUT_OF_MEMORY: The tiler ran out of memory when executing a job. + * @BASE_JD_EVENT_UNKNOWN: If multiple jobs in a job chain fail, only + * the first one the reports an error will set + * and return full error information. + * Subsequent failing jobs will not update the + * error status registers, and may write an + * error status of UNKNOWN. + * @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to + * physical memory where the original virtual + * address is no longer available. + * @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache + * has detected that the same line has been + * accessed as both shareable and non-shareable + * memory from inside the GPU. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table + * entry at level 1 of the translation table. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table + * entry at level 2 of the translation table. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table + * entry at level 3 of the translation table. + * @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table + * entry at level 4 of the translation table. + * @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to + * the permission flags set in translation + * table + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading + * level 0 of the translation tables. + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading + * level 1 of the translation tables. + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading + * level 2 of the translation tables. + * @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading + * level 3 of the translation tables. + * @BASE_JD_EVENT_ACCESS_FLAG: Matches ACCESS_FLAG_0. A memory access hit a + * translation table entry with the ACCESS_FLAG + * bit set to zero in level 0 of the + * page table, and the DISABLE_AF_FAULT flag + * was not set. + * @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to + * grow memory on demand + * @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its + * dependencies failed + * @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data + * in the atom which overlaps with + * BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the + * platform doesn't support the feature specified in + * the atom. + * @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used + * @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used + * @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used + * @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used + * @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used + * @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate + * to userspace that the KBase context has been + * destroyed and Base should stop listening for + * further events + * @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in + * the GPU has to be retried (but it has not + * started) due to e.g., GPU reset + * @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal + * the completion of a renderpass. This value + * shouldn't be returned to userspace but I haven't + * seen where it is reset back to JD_EVENT_DONE. + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see @BASE_JD_EVENT_DONE). + * Events are usually reported as part of a &struct base_jd_event. + * + * The event codes are encoded in the following way: + * * 10:0 - subtype + * * 12:11 - type + * * 13 - SW success (only valid if the SW bit is set) + * * 14 - SW event (HW event if not set) + * * 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * * BASE_JD_EVENT_RANGE__START + * * BASE_JD_EVENT_RANGE__END + * + * code is in 's range when: + * BASE_JD_EVENT_RANGE__START <= code < + * BASE_JD_EVENT_RANGE__END + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +enum base_jd_event_code { + /* HW defined exceptions */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, + BASE_JD_EVENT_TERMINATED = 0x04, + BASE_JD_EVENT_ACTIVE = 0x08, + + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + + BASE_JD_EVENT_BAG_INVALID = + BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | + BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x001, + + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | + BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +}; + +/** + * struct base_jd_event_v2 - Event reporting structure + * + * @event_code: event code. + * @atom_number: the atom number that has completed. + * @udata: user data. + * + * This structure is used by the kernel driver to report information + * about GPU events. They can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with BASE_JD_SW_EVENT_TYPE_MASK. + */ +struct base_jd_event_v2 { + enum base_jd_event_code event_code; + base_atom_id atom_number; + struct base_jd_udata udata; +}; + +/** + * struct base_dump_cpu_gpu_counters - Structure for + * BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS + * jobs. + * @system_time: gpu timestamp + * @cycle_counter: gpu cycle count + * @sec: cpu time(sec) + * @usec: cpu time(usec) + * @padding: padding + * + * This structure is stored into the memory pointed to by the @jc field + * of &struct base_jd_atom. + * + * It must not occupy the same CPU cache line(s) as any neighboring data. + * This is to avoid cases where access to pages containing the structure + * is shared between cached and un-cached memory regions, which would + * cause memory corruption. + */ + +struct base_dump_cpu_gpu_counters { + __u64 system_time; + __u64 cycle_counter; + __u64 sec; + __u32 usec; + __u8 padding[36]; +}; + +#endif /* _UAPI_BASE_JM_KERNEL_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h new file mode 100644 index 0000000..72d75cb --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_JM_IOCTL_H_ +#define _UAPI_KBASE_JM_IOCTL_H_ + +#include +#include + +/* + * 11.1: + * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags + * 11.2: + * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED, + * which some user-side clients prior to 11.2 might fault if they received + * them + * 11.3: + * - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and + * KBASE_IOCTL_STICKY_RESOURCE_UNMAP + * 11.4: + * - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET + * 11.5: + * - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD) + * 11.6: + * - Added flags field to base_jit_alloc_info structure, which can be used to + * specify pseudo chunked tiler alignment for JIT allocations. + * 11.7: + * - Removed UMP support + * 11.8: + * - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags + * 11.9: + * - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY + * under base_mem_alloc_flags + * 11.10: + * - Enabled the use of nr_extres field of base_jd_atom_v2 structure for + * JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations + * with one softjob. + * 11.11: + * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags + * 11.12: + * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS + * 11.13: + * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT + * 11.14: + * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set + * under base_mem_alloc_flags + * 11.15: + * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags. + * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be + * passed to mmap(). + * 11.16: + * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf. + * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for + * dma-buf. Now, buffers are mapped on GPU when first imported, no longer + * requiring external resource or sticky resource tracking. UNLESS, + * CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled. + * 11.17: + * - Added BASE_JD_REQ_JOB_SLOT. + * - Reused padding field in base_jd_atom_v2 to pass job slot number. + * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO + * 11.18: + * - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags + * 11.19: + * - Extended base_jd_atom_v2 to allow a renderpass ID to be specified. + * 11.20: + * - Added new phys_pages member to kbase_ioctl_mem_jit_init for + * KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2 + * (replacing '_OLD') and _11_5 suffixes + * - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in + * base_jd_atom_v2. It must currently be initialized to zero. + * - Added heap_info_gpu_addr to base_jit_alloc_info, and + * BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's + * flags member. Previous variants of this structure are kept and given _10_2 + * and _11_5 suffixes. + * - The above changes are checked for safe values in usual builds + * 11.21: + * - v2.0 of mali_trace debugfs file, which now versions the file separately + * 11.22: + * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2. + * KBASE_IOCTL_JOB_SUBMIT supports both in parallel. + * 11.23: + * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify + * the physical memory backing of JIT allocations. This was not supposed + * to be a valid use case, but it was allowed by the previous implementation. + * 11.24: + * - Added a sysfs file 'serialize_jobs' inside a new sub-directory + * 'scheduling'. + * 11.25: + * - Enabled JIT pressure limit in base/kbase by default + * 11.26 + * - Added kinstr_jm API + * 11.27 + * - Backwards compatible extension to HWC ioctl. + * 11.28: + * - Added kernel side cache ops needed hint + * 11.29: + * - Reserve ioctl 52 + * 11.30: + * - Add a new priority level BASE_JD_PRIO_REALTIME + * - Add ioctl 54: This controls the priority setting. + * 11.31: + * - Added BASE_JD_REQ_LIMITED_CORE_MASK. + * - Added ioctl 55: set_limited_core_count. + */ +#define BASE_UK_VERSION_MAJOR 11 +#define BASE_UK_VERSION_MINOR 31 + +/** + * struct kbase_ioctl_version_check - Check version compatibility between + * kernel and userspace + * + * @major: Major version number + * @minor: Minor version number + */ +struct kbase_ioctl_version_check { + __u16 major; + __u16 minor; +}; + +#define KBASE_IOCTL_VERSION_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check) + + +/** + * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel + * + * @addr: Memory address of an array of struct base_jd_atom_v2 or v3 + * @nr_atoms: Number of entries in the array + * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom) + */ +struct kbase_ioctl_job_submit { + __u64 addr; + __u32 nr_atoms; + __u32 stride; +}; + +#define KBASE_IOCTL_JOB_SUBMIT \ + _IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit) + +#define KBASE_IOCTL_POST_TERM \ + _IO(KBASE_IOCTL_TYPE, 4) + +/** + * struct kbase_ioctl_soft_event_update - Update the status of a soft-event + * @event: GPU address of the event which has been updated + * @new_status: The new status to set + * @flags: Flags for future expansion + */ +struct kbase_ioctl_soft_event_update { + __u64 event; + __u32 new_status; + __u32 flags; +}; + +#define KBASE_IOCTL_SOFT_EVENT_UPDATE \ + _IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update) + +/** + * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for + * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the + * kernel + * + * @size: The size of the `struct kbase_kinstr_jm_atom_state_change` + * @version: Represents a breaking change in the + * `struct kbase_kinstr_jm_atom_state_change` + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + * + * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the + * end of the structure that older user space might not understand. If the + * `version` is the same, the structure is still compatible with newer kernels. + * The `size` can be used to cast the opaque memory returned from the kernel. + */ +struct kbase_kinstr_jm_fd_out { + __u16 size; + __u8 version; + __u8 padding[5]; +}; + +/** + * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor + * + * @count: Number of atom states that can be stored in the kernel circular + * buffer. Must be a power of two + * @padding: Explicit padding to get the structure up to 64bits. See + * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst + */ +struct kbase_kinstr_jm_fd_in { + __u16 count; + __u8 padding[6]; +}; + +union kbase_kinstr_jm_fd { + struct kbase_kinstr_jm_fd_in in; + struct kbase_kinstr_jm_fd_out out; +}; + +#define KBASE_IOCTL_KINSTR_JM_FD \ + _IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd) + + +#define KBASE_IOCTL_VERSION_CHECK_RESERVED \ + _IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check) + +#endif /* _UAPI_KBASE_JM_IOCTL_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h new file mode 100644 index 0000000..a46c41f --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_kernel.h @@ -0,0 +1,826 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Base structures shared with the kernel. + */ + +#ifndef _UAPI_BASE_KERNEL_H_ +#define _UAPI_BASE_KERNEL_H_ + +#include + +struct base_mem_handle { + struct { + __u64 handle; + } basep; +}; + +#include "mali_base_mem_priv.h" +#include "gpu/mali_kbase_gpu_id.h" +#include "gpu/mali_kbase_gpu_coherency.h" + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +#define BASE_MAX_COHERENT_GROUPS 16 + +#if defined(CDBG_ASSERT) +#define LOCAL_ASSERT CDBG_ASSERT +#elif defined(KBASE_DEBUG_ASSERT) +#define LOCAL_ASSERT KBASE_DEBUG_ASSERT +#else +#if defined(__KERNEL__) +#error assert macro not defined! +#else +#define LOCAL_ASSERT(...) ((void)#__VA_ARGS__) +#endif +#endif + +#if defined(PAGE_MASK) && defined(PAGE_SHIFT) +#define LOCAL_PAGE_SHIFT PAGE_SHIFT +#define LOCAL_PAGE_LSB ~PAGE_MASK +#else +#ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12 +#endif + +#if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2) +#define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) +#else +#error Failed to find page size +#endif +#endif + +/* Physical memory group ID for normal usage. + */ +#define BASE_MEM_GROUP_DEFAULT (0) + +/* Number of physical memory groups. + */ +#define BASE_MEM_GROUP_COUNT (16) + +/** + * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. + * + * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator + * in order to determine the best cache policy. Some combinations are + * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), + * which defines a write-only region on the CPU side, which is + * heavily read by the CPU... + * Other flags are only meaningful to a particular allocator. + * More flags can be added to this list, as long as they don't clash + * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). + */ +typedef __u32 base_mem_alloc_flags; + +/* A mask for all the flags which are modifiable via the base_mem_set_flags + * interface. + */ +#define BASE_MEM_FLAGS_MODIFIABLE \ + (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ + BASE_MEM_COHERENT_LOCAL) + +/* A mask of all the flags that can be returned via the base_mem_get_flags() + * interface. + */ +#define BASE_MEM_FLAGS_QUERYABLE \ + (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ + BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ + BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ + BASEP_MEM_FLAGS_KERNEL_ONLY)) + +/** + * enum base_mem_import_type - Memory types supported by @a base_mem_import + * + * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type + * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) + * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a + * base_mem_import_user_buffer + * + * Each type defines what the supported handle type is. + * + * If any new type is added here ARM must be contacted + * to allocate a numeric value for it. + * Do not just add a new type without synchronizing with ARM + * as future releases from ARM might include other new types + * which could clash with your custom types. + */ +enum base_mem_import_type { + BASE_MEM_IMPORT_TYPE_INVALID = 0, + /* + * Import type with value 1 is deprecated. + */ + BASE_MEM_IMPORT_TYPE_UMM = 2, + BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 +}; + +/** + * struct base_mem_import_user_buffer - Handle of an imported user buffer + * + * @ptr: address of imported user buffer + * @length: length of imported user buffer in bytes + * + * This structure is used to represent a handle of an imported user buffer. + */ + +struct base_mem_import_user_buffer { + __u64 ptr; + __u64 length; +}; + +/* Mask to detect 4GB boundary alignment */ +#define BASE_MEM_MASK_4GB 0xfffff000UL +/* Mask to detect 4GB boundary (in page units) alignment */ +#define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) + +/* Limit on the 'extension' parameter for an allocation with the + * BASE_MEM_TILER_ALIGN_TOP flag set + * + * This is the same as the maximum limit for a Buffer Descriptor's chunk size + */ +#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ + (21u - (LOCAL_PAGE_SHIFT)) +#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ + (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) + +/* Bit mask of cookies used for for memory allocation setup */ +#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ + +/* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ +#define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ + +/* + * struct base_fence - Cross-device synchronisation fence. + * + * A fence is used to signal when the GPU has finished accessing a resource that + * may be shared with other devices, and also to delay work done asynchronously + * by the GPU until other devices have finished accessing a shared resource. + */ +struct base_fence { + struct { + int fd; + int stream_fd; + } basep; +}; + +/** + * struct base_mem_aliasing_info - Memory aliasing info + * + * Describes a memory handle to be aliased. + * A subset of the handle can be chosen for aliasing, given an offset and a + * length. + * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a + * region where a special page is mapped with a write-alloc cache setup, + * typically used when the write result of the GPU isn't needed, but the GPU + * must write anyway. + * + * Offset and length are specified in pages. + * Offset must be within the size of the handle. + * Offset+length must not overrun the size of the handle. + * + * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset: Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. + */ +struct base_mem_aliasing_info { + struct base_mem_handle handle; + __u64 offset; + __u64 length; +}; + +/* Maximum percentage of just-in-time memory allocation trimming to perform + * on free. + */ +#define BASE_JIT_MAX_TRIM_LEVEL (100) + +/* Maximum number of concurrent just-in-time memory allocations. + */ +#define BASE_JIT_ALLOC_COUNT (255) + +/* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 + * + * jit_version is 1 + * + * Due to the lack of padding specified, user clients between 32 and 64-bit + * may have assumed a different size of the struct + * + * An array of structures was not supported + */ +struct base_jit_alloc_info_10_2 { + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; +}; + +/* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up + * to 11.19 + * + * This structure had a number of modifications during and after kernel driver + * version 11.5, but remains size-compatible throughout its version history, and + * with earlier variants compatible with future variants by requiring + * zero-initialization to the unused space in the structure. + * + * jit_version is 2 + * + * Kernel driver version history: + * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes + * must be zero. Kbase minor version was not incremented, so some + * versions of 11.5 do not have this change. + * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase + * minor version not incremented) + * 11.6: Added 'flags', replacing 1 padding byte + * 11.10: Arrays of this structure are supported + */ +struct base_jit_alloc_info_11_5 { + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; + __u8 bin_id; + __u8 max_allocations; + __u8 flags; + __u8 padding[2]; + __u16 usage_id; +}; + +/** + * struct base_jit_alloc_info - Structure which describes a JIT allocation + * request. + * @gpu_alloc_addr: The GPU virtual address to write the JIT + * allocated GPU virtual address to. + * @va_pages: The minimum number of virtual pages required. + * @commit_pages: The minimum number of physical pages which + * should back the allocation. + * @extension: Granularity of physical pages to grow the + * allocation by during a fault. + * @id: Unique ID provided by the caller, this is used + * to pair allocation and free requests. + * Zero is not a valid value. + * @bin_id: The JIT allocation bin, used in conjunction with + * @max_allocations to limit the number of each + * type of JIT allocation. + * @max_allocations: The maximum number of allocations allowed within + * the bin specified by @bin_id. Should be the same + * for all allocations within the same bin. + * @flags: flags specifying the special requirements for + * the JIT allocation, see + * %BASE_JIT_ALLOC_VALID_FLAGS + * @padding: Expansion space - should be initialised to zero + * @usage_id: A hint about which allocation should be reused. + * The kernel should attempt to use a previous + * allocation with the same usage_id + * @heap_info_gpu_addr: Pointer to an object in GPU memory describing + * the actual usage of the region. + * + * jit_version is 3. + * + * When modifications are made to this structure, it is still compatible with + * jit_version 3 when: a) the size is unchanged, and b) new members only + * replace the padding bytes. + * + * Previous jit_version history: + * jit_version == 1, refer to &base_jit_alloc_info_10_2 + * jit_version == 2, refer to &base_jit_alloc_info_11_5 + * + * Kbase version history: + * 11.20: added @heap_info_gpu_addr + */ +struct base_jit_alloc_info { + __u64 gpu_alloc_addr; + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u8 id; + __u8 bin_id; + __u8 max_allocations; + __u8 flags; + __u8 padding[2]; + __u16 usage_id; + __u64 heap_info_gpu_addr; +}; + +enum base_external_resource_access { + BASE_EXT_RES_ACCESS_SHARED, + BASE_EXT_RES_ACCESS_EXCLUSIVE +}; + +struct base_external_resource { + __u64 ext_resource; +}; + + +/** + * The maximum number of external resources which can be mapped/unmapped + * in a single request. + */ +#define BASE_EXT_RES_COUNT_MAX 10 + +/** + * struct base_external_resource_list - Structure which describes a list of + * external resources. + * @count: The number of resources. + * @ext_res: Array of external resources which is + * sized at allocation time. + */ +struct base_external_resource_list { + __u64 count; + struct base_external_resource ext_res[1]; +}; + +struct base_jd_debug_copy_buffer { + __u64 address; + __u64 size; + struct base_external_resource extres; +}; + +#define GPU_MAX_JOB_SLOTS 16 + +/** + * User-side Base GPU Property Queries + * + * The User-side Base GPU Property Query interface encapsulates two + * sub-modules: + * + * - "Dynamic GPU Properties" + * - "Base Platform Config GPU Properties" + * + * Base only deals with properties that vary between different GPU + * implementations - the Dynamic GPU properties and the Platform Config + * properties. + * + * For properties that are constant for the GPU Architecture, refer to the + * GPU module. However, we will discuss their relevance here just to + * provide background information. + * + * About the GPU Properties in Base and GPU modules + * + * The compile-time properties (Platform Config, GPU Compile-time + * properties) are exposed as pre-processor macros. + * + * Complementing the compile-time properties are the Dynamic GPU + * Properties, which act as a conduit for the GPU Configuration + * Discovery. + * + * In general, the dynamic properties are present to verify that the platform + * has been configured correctly with the right set of Platform Config + * Compile-time Properties. + * + * As a consistent guide across the entire DDK, the choice for dynamic or + * compile-time should consider the following, in order: + * 1. Can the code be written so that it doesn't need to know the + * implementation limits at all? + * 2. If you need the limits, get the information from the Dynamic Property + * lookup. This should be done once as you fetch the context, and then cached + * as part of the context data structure, so it's cheap to access. + * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or GPU Compile-time + * property). Examples of where this might be sensible follow: + * - Part of a critical inner-loop + * - Frequent re-use throughout the driver, causing significant extra load + * instructions or control flow that would be worthwhile optimizing out. + * + * We cannot provide an exhaustive set of examples, neither can we provide a + * rule for every possible situation. Use common sense, and think about: what + * the rest of the driver will be doing; how the compiler might represent the + * value if it is a compile-time constant; whether an OEM shipping multiple + * devices would benefit much more from a single DDK binary, instead of + * insignificant micro-optimizations. + * + * Dynamic GPU Properties + * + * Dynamic GPU properties are presented in two sets: + * 1. the commonly used properties in @ref base_gpu_props, which have been + * unpacked from GPU register bitfields. + * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props + * (also a member of base_gpu_props). All of these are presented in + * the packed form, as presented by the GPU registers themselves. + * + * The raw properties in gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + * The properties returned extend the GPU Configuration Discovery + * registers. For example, GPU clock speed is not specified in the GPU + * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. + * + * The GPU properties are obtained by a call to + * base_get_gpu_props(). This simply returns a pointer to a const + * base_gpu_props structure. It is constant for the life of a base + * context. Multiple calls to base_get_gpu_props() to a base context + * return the same pointer to a constant structure. This avoids cache pollution + * of the common data. + * + * This pointer must not be freed, because it does not point to the start of a + * region allocated by the memory allocator; instead, just close the @ref + * base_context. + * + * + * Kernel Operation + * + * During Base Context Create time, user-side makes a single kernel call: + * - A call to fill user memory with GPU information structures + * + * The kernel-side will fill the provided the entire processed base_gpu_props + * structure, because this information is required in both + * user and kernel side; it does not make sense to decode it twice. + * + * Coherency groups must be derived from the bitmasks, but this can be done + * kernel side, and just once at kernel startup: Coherency groups must already + * be known kernel-side, to support chains that specify a 'Only Coherent Group' + * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. + * + * Coherency Group calculation + * + * Creation of the coherent group data is done at device-driver startup, and so + * is one-time. This will most likely involve a loop with CLZ, shifting, and + * bit clearing on the L2_PRESENT mask, depending on whether the + * system is L2 Coherent. The number of shader cores is done by a + * population count, since faulty cores may be disabled during production, + * producing a non-contiguous mask. + * + * The memory requirements for this algorithm can be determined either by a __u64 + * population count on the L2_PRESENT mask (a LUT helper already is + * required for the above), or simple assumption that there can be no more than + * 16 coherent groups, since core groups are typically 4 cores. + */ + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 + +#define BASE_MAX_COHERENT_GROUPS 16 +/** + * struct mali_base_gpu_core_props - GPU core props info + * @product_id: Pro specific value. + * @version_status: Status of the GPU release. No defined values, but starts at + * 0 and increases by one for each release status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" + * release number. + * 8 bit values (0-255). + * @major_revision: Major release number of the GPU. "R" part of an "RnPn" + * release number. + * 4 bit values (0-15). + * @padding: padding to allign to 8-byte + * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by + * clGetDeviceInfo() + * @log2_program_counter_size: Size of the shader program counter, in bits. + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This + * is a bitpattern where a set bit indicates that the format is supported. + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. + * It is unlikely that a client will be able to allocate all of this memory + * for their own purposes, but this at least provides an upper bound on the + * memory available to the GPU. + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + * @num_exec_engines: The number of execution engines. + */ +struct mali_base_gpu_core_props { + __u32 product_id; + __u16 version_status; + __u16 minor_revision; + __u16 major_revision; + __u16 padding; + __u32 gpu_freq_khz_max; + __u32 log2_program_counter_size; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + __u64 gpu_available_memory_size; + __u8 num_exec_engines; +}; + +/* + * More information is possible - but associativity and bus width are not + * required by upper-level apis. + */ +struct mali_base_gpu_l2_cache_props { + __u8 log2_line_size; + __u8 log2_cache_size; + __u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ + __u8 padding[5]; +}; + +struct mali_base_gpu_tiler_props { + __u32 bin_size_bytes; /* Max is 4*2^15 */ + __u32 max_active_levels; /* Max is 2^15 */ +}; + +/** + * struct mali_base_gpu_thread_props - GPU threading system details. + * @max_threads: Max. number of threads per core + * @max_workgroup_size: Max. number of threads per workgroup + * @max_barrier_size: Max. number of threads that can synchronize on a + * simple barrier + * @max_registers: Total size [1..65535] of the register file available + * per core. + * @max_task_queue: Max. tasks [1..255] which may be sent to a core + * before it becomes blocked. + * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split + * field. + * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, + * 3 = SW Model/Emulation + * @padding: padding to allign to 8-byte + * @tls_alloc: Number of threads per core that TLS must be + * allocated for + */ +struct mali_base_gpu_thread_props { + __u32 max_threads; + __u32 max_workgroup_size; + __u32 max_barrier_size; + __u16 max_registers; + __u8 max_task_queue; + __u8 max_thread_group_split; + __u8 impl_tech; + __u8 padding[3]; + __u32 tls_alloc; +}; + +/** + * struct mali_base_gpu_coherent_group - descriptor for a coherent group + * @core_mask: Core restriction mask required for the group + * @num_cores: Number of cores in the group + * @padding: padding to allign to 8-byte + * + * \c core_mask exposes all cores in that coherent group, and \c num_cores + * provides a cached population-count for that mask. + * + * @note Whilst all cores are exposed in the mask, not all may be available to + * the application, depending on the Kernel Power policy. + * + * @note if u64s must be 8-byte aligned, then this structure has 32-bits of + * wastage. + */ +struct mali_base_gpu_coherent_group { + __u64 core_mask; + __u16 num_cores; + __u16 padding[3]; +}; + +/** + * struct mali_base_gpu_coherent_group_info - Coherency group information + * @num_groups: Number of coherent groups in the GPU. + * @num_core_groups: Number of core groups (coherent or not) in the GPU. + * Equivalent to the number of L2 Caches. + * The GPU Counter dumping writes 2048 bytes per core group, regardless + * of whether the core groups are coherent or not. Hence this member is + * needed to calculate how much memory is required for dumping. + * @note Do not use it to work out how many valid elements are in the + * group[] member. Use num_groups instead. + * @coherency: Coherency features of the memory, accessed by gpu_mem_features + * methods + * @padding: padding to allign to 8-byte + * @group: Descriptors of coherent groups + * + * Note that the sizes of the members could be reduced. However, the \c group + * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte + * aligned, thus leading to wastage if the other members sizes were reduced. + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ +struct mali_base_gpu_coherent_group_info { + __u32 num_groups; + __u32 num_core_groups; + __u32 coherency; + __u32 padding; + struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; +}; + +/** + * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware + * Configuration Discovery registers. + * @shader_present: Shader core present bitmap + * @tiler_present: Tiler core present bitmap + * @l2_present: Level 2 cache present bitmap + * @stack_present: Core stack present bitmap + * @l2_features: L2 features + * @core_features: Core features + * @mem_features: Mem features + * @mmu_features: Mmu features + * @as_present: Bitmap of address spaces present + * @js_present: Job slots present + * @js_features: Array of job slot features. + * @tiler_features: Tiler features + * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU + * @gpu_id: GPU and revision identifier + * @thread_max_threads: Maximum number of threads per core + * @thread_max_workgroup_size: Maximum number of threads per workgroup + * @thread_max_barrier_size: Maximum number of threads per barrier + * @thread_features: Thread features + * @coherency_mode: Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register + * @thread_tls_alloc: Number of threads per core that TLS must be allocated for + * @gpu_features: GPU features + * + * The information is presented inefficiently for access. For frequent access, + * the values should be better expressed in an unpacked form in the + * base_gpu_props structure. + * + * The raw properties in gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it does not need to be processed + * by the driver. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + */ +struct gpu_raw_gpu_props { + __u64 shader_present; + __u64 tiler_present; + __u64 l2_present; + __u64 stack_present; + __u32 l2_features; + __u32 core_features; + __u32 mem_features; + __u32 mmu_features; + + __u32 as_present; + + __u32 js_present; + __u32 js_features[GPU_MAX_JOB_SLOTS]; + __u32 tiler_features; + __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + __u32 gpu_id; + + __u32 thread_max_threads; + __u32 thread_max_workgroup_size; + __u32 thread_max_barrier_size; + __u32 thread_features; + + /* + * Note: This is the _selected_ coherency mode rather than the + * available modes as exposed in the coherency_features register. + */ + __u32 coherency_mode; + + __u32 thread_tls_alloc; + __u64 gpu_features; +}; + +/** + * struct base_gpu_props - Return structure for base_get_gpu_props(). + * @core_props: Core props. + * @l2_props: L2 props. + * @unused_1: Keep for backwards compatibility. + * @tiler_props: Tiler props. + * @thread_props: Thread props. + * @raw_props: This member is large, likely to be 128 bytes. + * @coherency_info: This must be last member of the structure. + * + * NOTE: the raw_props member in this data structure contains the register + * values from which the value of the other members are derived. The derived + * members exist to allow for efficient access and/or shielding the details + * of the layout of the registers. + * */ +struct base_gpu_props { + struct mali_base_gpu_core_props core_props; + struct mali_base_gpu_l2_cache_props l2_props; + __u64 unused_1; + struct mali_base_gpu_tiler_props tiler_props; + struct mali_base_gpu_thread_props thread_props; + struct gpu_raw_gpu_props raw_props; + struct mali_base_gpu_coherent_group_info coherency_info; +}; + +#if MALI_USE_CSF +#include "csf/mali_base_csf_kernel.h" +#else +#include "jm/mali_base_jm_kernel.h" +#endif + +/** + * base_mem_group_id_get() - Get group ID from flags + * @flags: Flags to pass to base_mem_alloc + * + * This inline function extracts the encoded group ID from flags + * and converts it into numeric value (0~15). + * + * Return: group ID(0~15) extracted from the parameter + */ +static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags) +{ + LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); + return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> + BASEP_MEM_GROUP_ID_SHIFT); +} + +/** + * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags + * @id: group ID(0~15) you want to encode + * + * This inline function encodes specific group ID into base_mem_alloc_flags. + * Parameter 'id' should lie in-between 0 to 15. + * + * Return: base_mem_alloc_flags with the group ID (id) encoded + * + * The return value can be combined with other flags against base_mem_alloc + * to identify a specific memory group. + */ +static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id) +{ + if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { + /* Set to default value when id is out of range. */ + id = BASE_MEM_GROUP_DEFAULT; + } + + return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & + BASE_MEM_GROUP_ID_MASK; +} + +/** + * base_context_mmu_group_id_set - Encode a memory group ID in + * base_context_create_flags + * + * Memory allocated for GPU page tables will come from the specified group. + * + * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1). + * + * Return: Bitmask of flags to pass to base_context_init. + */ +static __inline__ base_context_create_flags base_context_mmu_group_id_set( + int const group_id) +{ + LOCAL_ASSERT(group_id >= 0); + LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT); + return BASEP_CONTEXT_MMU_GROUP_ID_MASK & + ((base_context_create_flags)group_id << + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); +} + +/** + * base_context_mmu_group_id_get - Decode a memory group ID from + * base_context_create_flags + * + * Memory allocated for GPU page tables will come from the returned group. + * + * @flags: Bitmask of flags to pass to base_context_init. + * + * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). + */ +static __inline__ int base_context_mmu_group_id_get( + base_context_create_flags const flags) +{ + LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); + return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> + BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); +} + +/* + * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These + * flags are also used, where applicable, for specifying which fields + * are valid following the request operation. + */ + +/* For monotonic (counter) timefield */ +#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) +/* For system wide timestamp */ +#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) +/* For GPU cycle counter */ +#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) +/* Specify kernel GPU register timestamp */ +#define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) +/* Specify userspace cntvct_el0 timestamp source */ +#define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) + +#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ + BASE_TIMEINFO_MONOTONIC_FLAG | \ + BASE_TIMEINFO_TIMESTAMP_FLAG | \ + BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ + BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ + BASE_TIMEINFO_USER_SOURCE_FLAG) + +/* Maximum number of source allocations allowed to create an alias allocation. + * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array + * layers, since each cube map in the array will have 6 faces. + */ +#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576) + +#endif /* _UAPI_BASE_KERNEL_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h new file mode 100644 index 0000000..304a334 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_base_mem_priv.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2010-2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_BASE_MEM_PRIV_H_ +#define _UAPI_BASE_MEM_PRIV_H_ + +#include + +#include "mali_base_kernel.h" + +#define BASE_SYNCSET_OP_MSYNC (1U << 0) +#define BASE_SYNCSET_OP_CSYNC (1U << 1) + +/* + * This structure describe a basic memory coherency operation. + * It can either be: + * @li a sync from CPU to Memory: + * - type = ::BASE_SYNCSET_OP_MSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes + * - offset is ignored. + * @li a sync from Memory to CPU: + * - type = ::BASE_SYNCSET_OP_CSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes. + * - offset is ignored. + */ +struct basep_syncset { + struct base_mem_handle mem_handle; + __u64 user_addr; + __u64 size; + __u8 type; + __u8 padding[7]; +}; + +#endif /* _UAPI_BASE_MEM_PRIV_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h new file mode 100644 index 0000000..9baaec1 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2015, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_HWCNT_READER_H_ +#define _UAPI_KBASE_HWCNT_READER_H_ + +#include +#include + +/* The ids of ioctl commands. */ +#define KBASE_HWCNT_READER 0xBE +#define KBASE_HWCNT_READER_GET_HWVER _IOR(KBASE_HWCNT_READER, 0x00, __u32) +#define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, __u32) +#define KBASE_HWCNT_READER_DUMP _IOW(KBASE_HWCNT_READER, 0x10, __u32) +#define KBASE_HWCNT_READER_CLEAR _IOW(KBASE_HWCNT_READER, 0x11, __u32) +#define KBASE_HWCNT_READER_GET_BUFFER _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES _IOR(KBASE_HWCNT_READER, 0x20,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_PUT_BUFFER _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\ + offsetof(struct kbase_hwcnt_reader_metadata, cycles)) +#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES _IOW(KBASE_HWCNT_READER, 0x21,\ + struct kbase_hwcnt_reader_metadata) +#define KBASE_HWCNT_READER_SET_INTERVAL _IOW(KBASE_HWCNT_READER, 0x30, __u32) +#define KBASE_HWCNT_READER_ENABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x40, __u32) +#define KBASE_HWCNT_READER_DISABLE_EVENT _IOW(KBASE_HWCNT_READER, 0x41, __u32) +#define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, __u32) +#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \ + _IOW(KBASE_HWCNT_READER, 0xFF, \ + struct kbase_hwcnt_reader_api_version) + +/** + * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles + * @top: the number of cycles associated with the main clock for the + * GPU + * @shader_cores: the cycles that have elapsed on the GPU shader cores + */ +struct kbase_hwcnt_reader_metadata_cycles { + __u64 top; + __u64 shader_cores; +}; + +/** + * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata + * @timestamp: time when sample was collected + * @event_id: id of an event that triggered sample collection + * @buffer_idx: position in sampling area where sample buffer was stored + * @cycles: the GPU cycles that occurred since the last sample + */ +struct kbase_hwcnt_reader_metadata { + __u64 timestamp; + __u32 event_id; + __u32 buffer_idx; + struct kbase_hwcnt_reader_metadata_cycles cycles; +}; + +/** + * enum base_hwcnt_reader_event - hwcnt dumping events + * @BASE_HWCNT_READER_EVENT_MANUAL: manual request for dump + * @BASE_HWCNT_READER_EVENT_PERIODIC: periodic dump + * @BASE_HWCNT_READER_EVENT_PREJOB: prejob dump request + * @BASE_HWCNT_READER_EVENT_POSTJOB: postjob dump request + * @BASE_HWCNT_READER_EVENT_COUNT: number of supported events + */ +enum base_hwcnt_reader_event { + BASE_HWCNT_READER_EVENT_MANUAL, + BASE_HWCNT_READER_EVENT_PERIODIC, + BASE_HWCNT_READER_EVENT_PREJOB, + BASE_HWCNT_READER_EVENT_POSTJOB, + BASE_HWCNT_READER_EVENT_COUNT +}; + +#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE (0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP (1 << 0) +#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1) +/** + * struct kbase_hwcnt_reader_api_version - hwcnt reader API version + * @version: API version + * @features: available features in this API version + */ +struct kbase_hwcnt_reader_api_version { + __u32 version; + __u32 features; +}; + +#endif /* _UAPI_KBASE_HWCNT_READER_H_ */ + diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h new file mode 100644 index 0000000..29ff32a --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_ioctl.h @@ -0,0 +1,836 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2017-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _UAPI_KBASE_IOCTL_H_ +#define _UAPI_KBASE_IOCTL_H_ + +#ifdef __cpluscplus +extern "C" { +#endif + +#include +#include + +#if MALI_USE_CSF +#include "csf/mali_kbase_csf_ioctl.h" +#else +#include "jm/mali_kbase_jm_ioctl.h" +#endif /* MALI_USE_CSF */ + +#define KBASE_IOCTL_TYPE 0x80 + +/** + * struct kbase_ioctl_set_flags - Set kernel context creation flags + * + * @create_flags: Flags - see base_context_create_flags + */ +struct kbase_ioctl_set_flags { + __u32 create_flags; +}; + +#define KBASE_IOCTL_SET_FLAGS \ + _IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags) + +/** + * struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel + * + * @buffer: Pointer to the buffer to store properties into + * @size: Size of the buffer + * @flags: Flags - must be zero for now + * + * The ioctl will return the number of bytes stored into @buffer or an error + * on failure (e.g. @size is too small). If @size is specified as 0 then no + * data will be written but the return value will be the number of bytes needed + * for all the properties. + * + * @flags may be used in the future to request a different format for the + * buffer. With @flags == 0 the following format is used. + * + * The buffer will be filled with pairs of values, a __u32 key identifying the + * property followed by the value. The size of the value is identified using + * the bottom bits of the key. The value then immediately followed the key and + * is tightly packed (there is no padding). All keys and values are + * little-endian. + * + * 00 = __u8 + * 01 = __u16 + * 10 = __u32 + * 11 = __u64 + */ +struct kbase_ioctl_get_gpuprops { + __u64 buffer; + __u32 size; + __u32 flags; +}; + +#define KBASE_IOCTL_GET_GPUPROPS \ + _IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops) + +/** + * union kbase_ioctl_mem_alloc - Allocate memory on the GPU + * @in: Input parameters + * @in.va_pages: The number of pages of virtual address space to reserve + * @in.commit_pages: The number of physical pages to allocate + * @in.extension: The number of extra pages to allocate on each GPU fault which grows the region + * @in.flags: Flags + * @out: Output parameters + * @out.flags: Flags + * @out.gpu_va: The GPU virtual address which is allocated + */ +union kbase_ioctl_mem_alloc { + struct { + __u64 va_pages; + __u64 commit_pages; + __u64 extension; + __u64 flags; + } in; + struct { + __u64 flags; + __u64 gpu_va; + } out; +}; + +#define KBASE_IOCTL_MEM_ALLOC \ + _IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc) + +/** + * struct kbase_ioctl_mem_query - Query properties of a GPU memory region + * @in: Input parameters + * @in.gpu_addr: A GPU address contained within the region + * @in.query: The type of query + * @out: Output parameters + * @out.value: The result of the query + * + * Use a %KBASE_MEM_QUERY_xxx flag as input for @query. + */ +union kbase_ioctl_mem_query { + struct { + __u64 gpu_addr; + __u64 query; + } in; + struct { + __u64 value; + } out; +}; + +#define KBASE_IOCTL_MEM_QUERY \ + _IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query) + +#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1) +#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2) +#define KBASE_MEM_QUERY_FLAGS ((__u64)3) + +/** + * struct kbase_ioctl_mem_free - Free a memory region + * @gpu_addr: Handle to the region to free + */ +struct kbase_ioctl_mem_free { + __u64 gpu_addr; +}; + +#define KBASE_IOCTL_MEM_FREE \ + _IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free) + +/** + * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader + * @buffer_count: requested number of dumping buffers + * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + * + * A fd is returned from the ioctl if successful, or a negative value on error + */ +struct kbase_ioctl_hwcnt_reader_setup { + __u32 buffer_count; + __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +}; + +#define KBASE_IOCTL_HWCNT_READER_SETUP \ + _IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup) + +/** + * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection + * @dump_buffer: GPU address to write counters to + * @fe_bm: counters selection bitmask (Front end) + * @shader_bm: counters selection bitmask (Shader) + * @tiler_bm: counters selection bitmask (Tiler) + * @mmu_l2_bm: counters selection bitmask (MMU_L2) + */ +struct kbase_ioctl_hwcnt_enable { + __u64 dump_buffer; + __u32 fe_bm; + __u32 shader_bm; + __u32 tiler_bm; + __u32 mmu_l2_bm; +}; + +#define KBASE_IOCTL_HWCNT_ENABLE \ + _IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable) + +#define KBASE_IOCTL_HWCNT_DUMP \ + _IO(KBASE_IOCTL_TYPE, 10) + +#define KBASE_IOCTL_HWCNT_CLEAR \ + _IO(KBASE_IOCTL_TYPE, 11) + +/** + * struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to. + * @data: Counter samples for the dummy model. + * @size: Size of the counter sample data. + * @padding: Padding. + */ +struct kbase_ioctl_hwcnt_values { + __u64 data; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_HWCNT_SET \ + _IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values) + +/** + * struct kbase_ioctl_disjoint_query - Query the disjoint counter + * @counter: A counter of disjoint events in the kernel + */ +struct kbase_ioctl_disjoint_query { + __u32 counter; +}; + +#define KBASE_IOCTL_DISJOINT_QUERY \ + _IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query) + +/** + * struct kbase_ioctl_get_ddk_version - Query the kernel version + * @version_buffer: Buffer to receive the kernel version string + * @size: Size of the buffer + * @padding: Padding + * + * The ioctl will return the number of bytes written into version_buffer + * (which includes a NULL byte) or a negative error code + * + * The ioctl request code has to be _IOW because the data in ioctl struct is + * being copied to the kernel, even though the kernel then writes out the + * version info to the buffer specified in the ioctl. + */ +struct kbase_ioctl_get_ddk_version { + __u64 version_buffer; + __u32 size; + __u32 padding; +}; + +#define KBASE_IOCTL_GET_DDK_VERSION \ + _IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version) + +/** + * struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 10.2--11.4) + * @va_pages: Number of VA pages to reserve for JIT + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_10_2 { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2) + +/** + * struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory + * allocator (between kernel driver + * version 11.5--11.19) + * @va_pages: Number of VA pages to reserve for JIT + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + * + * New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for + * backwards compatibility. + */ +struct kbase_ioctl_mem_jit_init_11_5 { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5) + +/** + * struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory + * allocator + * @va_pages: Number of GPU virtual address pages to reserve for just-in-time + * memory allocations + * @max_allocations: Maximum number of concurrent allocations + * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%) + * @group_id: Group ID to be used for physical allocations + * @padding: Currently unused, must be zero + * @phys_pages: Maximum number of physical pages to allocate just-in-time + * + * Note that depending on the VA size of the application and GPU, the value + * specified in @va_pages may be ignored. + */ +struct kbase_ioctl_mem_jit_init { + __u64 va_pages; + __u8 max_allocations; + __u8 trim_level; + __u8 group_id; + __u8 padding[5]; + __u64 phys_pages; +}; + +#define KBASE_IOCTL_MEM_JIT_INIT \ + _IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init) + +/** + * struct kbase_ioctl_mem_sync - Perform cache maintenance on memory + * + * @handle: GPU memory handle (GPU VA) + * @user_addr: The address where it is mapped in user space + * @size: The number of bytes to synchronise + * @type: The direction to synchronise: 0 is sync to memory (clean), + * 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants. + * @padding: Padding to round up to a multiple of 8 bytes, must be zero + */ +struct kbase_ioctl_mem_sync { + __u64 handle; + __u64 user_addr; + __u64 size; + __u8 type; + __u8 padding[7]; +}; + +#define KBASE_IOCTL_MEM_SYNC \ + _IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync) + +/** + * union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer + * + * @in: Input parameters + * @in.gpu_addr: The GPU address of the memory region + * @in.cpu_addr: The CPU address to locate + * @in.size: A size in bytes to validate is contained within the region + * @out: Output parameters + * @out.offset: The offset from the start of the memory region to @cpu_addr + */ +union kbase_ioctl_mem_find_cpu_offset { + struct { + __u64 gpu_addr; + __u64 cpu_addr; + __u64 size; + } in; + struct { + __u64 offset; + } out; +}; + +#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset) + +/** + * struct kbase_ioctl_get_context_id - Get the kernel context ID + * + * @id: The kernel context ID + */ +struct kbase_ioctl_get_context_id { + __u32 id; +}; + +#define KBASE_IOCTL_GET_CONTEXT_ID \ + _IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id) + +/** + * struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd + * + * @flags: Flags + * + * The ioctl returns a file descriptor when successful + */ +struct kbase_ioctl_tlstream_acquire { + __u32 flags; +}; + +#define KBASE_IOCTL_TLSTREAM_ACQUIRE \ + _IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire) + +#define KBASE_IOCTL_TLSTREAM_FLUSH \ + _IO(KBASE_IOCTL_TYPE, 19) + +/** + * struct kbase_ioctl_mem_commit - Change the amount of memory backing a region + * + * @gpu_addr: The memory region to modify + * @pages: The number of physical pages that should be present + * + * The ioctl may return on the following error codes or 0 for success: + * -ENOMEM: Out of memory + * -EINVAL: Invalid arguments + */ +struct kbase_ioctl_mem_commit { + __u64 gpu_addr; + __u64 pages; +}; + +#define KBASE_IOCTL_MEM_COMMIT \ + _IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit) + +/** + * union kbase_ioctl_mem_alias - Create an alias of memory regions + * @in: Input parameters + * @in.flags: Flags, see BASE_MEM_xxx + * @in.stride: Bytes between start of each memory region + * @in.nents: The number of regions to pack together into the alias + * @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info + * @out: Output parameters + * @out.flags: Flags, see BASE_MEM_xxx + * @out.gpu_va: Address of the new alias + * @out.va_pages: Size of the new alias + */ +union kbase_ioctl_mem_alias { + struct { + __u64 flags; + __u64 stride; + __u64 nents; + __u64 aliasing_info; + } in; + struct { + __u64 flags; + __u64 gpu_va; + __u64 va_pages; + } out; +}; + +#define KBASE_IOCTL_MEM_ALIAS \ + _IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias) + +/** + * union kbase_ioctl_mem_import - Import memory for use by the GPU + * @in: Input parameters + * @in.flags: Flags, see BASE_MEM_xxx + * @in.phandle: Handle to the external memory + * @in.type: Type of external memory, see base_mem_import_type + * @in.padding: Amount of extra VA pages to append to the imported buffer + * @out: Output parameters + * @out.flags: Flags, see BASE_MEM_xxx + * @out.gpu_va: Address of the new alias + * @out.va_pages: Size of the new alias + */ +union kbase_ioctl_mem_import { + struct { + __u64 flags; + __u64 phandle; + __u32 type; + __u32 padding; + } in; + struct { + __u64 flags; + __u64 gpu_va; + __u64 va_pages; + } out; +}; + +#define KBASE_IOCTL_MEM_IMPORT \ + _IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import) + +/** + * struct kbase_ioctl_mem_flags_change - Change the flags for a memory region + * @gpu_va: The GPU region to modify + * @flags: The new flags to set + * @mask: Mask of the flags to modify + */ +struct kbase_ioctl_mem_flags_change { + __u64 gpu_va; + __u64 flags; + __u64 mask; +}; + +#define KBASE_IOCTL_MEM_FLAGS_CHANGE \ + _IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change) + +/** + * struct kbase_ioctl_stream_create - Create a synchronisation stream + * @name: A name to identify this stream. Must be NULL-terminated. + * + * Note that this is also called a "timeline", but is named stream to avoid + * confusion with other uses of the word. + * + * Unused bytes in @name (after the first NULL byte) must be also be NULL bytes. + * + * The ioctl returns a file descriptor. + */ +struct kbase_ioctl_stream_create { + char name[32]; +}; + +#define KBASE_IOCTL_STREAM_CREATE \ + _IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create) + +/** + * struct kbase_ioctl_fence_validate - Validate a fd refers to a fence + * @fd: The file descriptor to validate + */ +struct kbase_ioctl_fence_validate { + int fd; +}; + +#define KBASE_IOCTL_FENCE_VALIDATE \ + _IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate) + +/** + * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel + * @buffer: Pointer to the information + * @len: Length + * @padding: Padding + * + * The data provided is accessible through a debugfs file + */ +struct kbase_ioctl_mem_profile_add { + __u64 buffer; + __u32 len; + __u32 padding; +}; + +#define KBASE_IOCTL_MEM_PROFILE_ADD \ + _IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add) + +/** + * struct kbase_ioctl_sticky_resource_map - Permanently map an external resource + * @count: Number of resources + * @address: Array of __u64 GPU addresses of the external resources to map + */ +struct kbase_ioctl_sticky_resource_map { + __u64 count; + __u64 address; +}; + +#define KBASE_IOCTL_STICKY_RESOURCE_MAP \ + _IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map) + +/** + * struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was + * previously permanently mapped + * @count: Number of resources + * @address: Array of __u64 GPU addresses of the external resources to unmap + */ +struct kbase_ioctl_sticky_resource_unmap { + __u64 count; + __u64 address; +}; + +#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \ + _IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap) + +/** + * union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of + * the GPU memory region for + * the given gpu address and + * the offset of that address + * into the region + * @in: Input parameters + * @in.gpu_addr: GPU virtual address + * @in.size: Size in bytes within the region + * @out: Output parameters + * @out.start: Address of the beginning of the memory region enclosing @gpu_addr + * for the length of @offset bytes + * @out.offset: The offset from the start of the memory region to @gpu_addr + */ +union kbase_ioctl_mem_find_gpu_start_and_offset { + struct { + __u64 gpu_addr; + __u64 size; + } in; + struct { + __u64 start; + __u64 offset; + } out; +}; + +#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \ + _IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset) + +#define KBASE_IOCTL_CINSTR_GWT_START \ + _IO(KBASE_IOCTL_TYPE, 33) + +#define KBASE_IOCTL_CINSTR_GWT_STOP \ + _IO(KBASE_IOCTL_TYPE, 34) + +/** + * union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses. + * @in: Input parameters + * @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas. + * @in.size_buffer: Address of buffer to hold size of modified areas (in pages) + * @in.len: Number of addresses the buffers can hold. + * @in.padding: padding + * @out: Output parameters + * @out.no_of_addr_collected: Number of addresses collected into addr_buffer. + * @out.more_data_available: Status indicating if more addresses are available. + * @out.padding: padding + * + * This structure is used when performing a call to dump GPU write fault + * addresses. + */ +union kbase_ioctl_cinstr_gwt_dump { + struct { + __u64 addr_buffer; + __u64 size_buffer; + __u32 len; + __u32 padding; + + } in; + struct { + __u32 no_of_addr_collected; + __u8 more_data_available; + __u8 padding[27]; + } out; +}; + +#define KBASE_IOCTL_CINSTR_GWT_DUMP \ + _IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump) + +/** + * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone + * + * @va_pages: Number of VA pages to reserve for EXEC_VA + */ +struct kbase_ioctl_mem_exec_init { + __u64 va_pages; +}; + +#define KBASE_IOCTL_MEM_EXEC_INIT \ + _IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init) + +/** + * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of + * cpu/gpu time (counter values) + * @in: Input parameters + * @in.request_flags: Bit-flags indicating the requested types. + * @in.paddings: Unused, size alignment matching the out. + * @out: Output parameters + * @out.sec: Integer field of the monotonic time, unit in seconds. + * @out.nsec: Fractional sec of the monotonic time, in nano-seconds. + * @out.padding: Unused, for __u64 alignment + * @out.timestamp: System wide timestamp (counter) value. + * @out.cycle_counter: GPU cycle counter value. + */ +union kbase_ioctl_get_cpu_gpu_timeinfo { + struct { + __u32 request_flags; + __u32 paddings[7]; + } in; + struct { + __u64 sec; + __u32 nsec; + __u32 padding; + __u64 timestamp; + __u64 cycle_counter; + } out; +}; + +#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \ + _IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo) + +/** + * struct kbase_ioctl_context_priority_check - Check the max possible priority + * @priority: Input priority & output priority + */ + +struct kbase_ioctl_context_priority_check { + __u8 priority; +}; + +#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \ + _IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check) + +/** + * struct kbase_ioctl_set_limited_core_count - Set the limited core count. + * + * @max_core_count: Maximum core count + */ +struct kbase_ioctl_set_limited_core_count { + __u8 max_core_count; +}; + +#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \ + _IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count) + + +/*************** + * test ioctls * + ***************/ +#if MALI_UNIT_TEST +/* These ioctls are purely for test purposes and are not used in the production + * driver, they therefore may change without notice + */ + +#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1) + + +/** + * struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes + * @bytes_collected: number of bytes read by user + * @bytes_generated: number of bytes generated by tracepoints + */ +struct kbase_ioctl_tlstream_stats { + __u32 bytes_collected; + __u32 bytes_generated; +}; + +#define KBASE_IOCTL_TLSTREAM_STATS \ + _IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats) + +#endif /* MALI_UNIT_TEST */ + +/* Customer extension range */ +#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2) + +/* If the integration needs extra ioctl add them there + * like this: + * + * struct my_ioctl_args { + * .... + * } + * + * #define KBASE_IOCTL_MY_IOCTL \ + * _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args) + */ + + +/********************************** + * Definitions for GPU properties * + **********************************/ +#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0) +#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1) +#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2) +#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3) + +#define KBASE_GPUPROP_PRODUCT_ID 1 +#define KBASE_GPUPROP_VERSION_STATUS 2 +#define KBASE_GPUPROP_MINOR_REVISION 3 +#define KBASE_GPUPROP_MAJOR_REVISION 4 +/* 5 previously used for GPU speed */ +#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6 +/* 7 previously used for minimum GPU speed */ +#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8 +#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9 +#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10 +#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11 +#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12 + +#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13 +#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14 +#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15 + +#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16 +#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17 + +#define KBASE_GPUPROP_MAX_THREADS 18 +#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19 +#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20 +#define KBASE_GPUPROP_MAX_REGISTERS 21 +#define KBASE_GPUPROP_MAX_TASK_QUEUE 22 +#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23 +#define KBASE_GPUPROP_IMPL_TECH 24 + +#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25 +#define KBASE_GPUPROP_RAW_TILER_PRESENT 26 +#define KBASE_GPUPROP_RAW_L2_PRESENT 27 +#define KBASE_GPUPROP_RAW_STACK_PRESENT 28 +#define KBASE_GPUPROP_RAW_L2_FEATURES 29 +#define KBASE_GPUPROP_RAW_CORE_FEATURES 30 +#define KBASE_GPUPROP_RAW_MEM_FEATURES 31 +#define KBASE_GPUPROP_RAW_MMU_FEATURES 32 +#define KBASE_GPUPROP_RAW_AS_PRESENT 33 +#define KBASE_GPUPROP_RAW_JS_PRESENT 34 +#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35 +#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36 +#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37 +#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38 +#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39 +#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40 +#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41 +#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42 +#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43 +#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44 +#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45 +#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46 +#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47 +#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48 +#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49 +#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50 +#define KBASE_GPUPROP_RAW_TILER_FEATURES 51 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54 +#define KBASE_GPUPROP_RAW_GPU_ID 55 +#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56 +#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57 +#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58 +#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59 +#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60 + +#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61 +#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62 +#define KBASE_GPUPROP_COHERENCY_COHERENCY 63 +#define KBASE_GPUPROP_COHERENCY_GROUP_0 64 +#define KBASE_GPUPROP_COHERENCY_GROUP_1 65 +#define KBASE_GPUPROP_COHERENCY_GROUP_2 66 +#define KBASE_GPUPROP_COHERENCY_GROUP_3 67 +#define KBASE_GPUPROP_COHERENCY_GROUP_4 68 +#define KBASE_GPUPROP_COHERENCY_GROUP_5 69 +#define KBASE_GPUPROP_COHERENCY_GROUP_6 70 +#define KBASE_GPUPROP_COHERENCY_GROUP_7 71 +#define KBASE_GPUPROP_COHERENCY_GROUP_8 72 +#define KBASE_GPUPROP_COHERENCY_GROUP_9 73 +#define KBASE_GPUPROP_COHERENCY_GROUP_10 74 +#define KBASE_GPUPROP_COHERENCY_GROUP_11 75 +#define KBASE_GPUPROP_COHERENCY_GROUP_12 76 +#define KBASE_GPUPROP_COHERENCY_GROUP_13 77 +#define KBASE_GPUPROP_COHERENCY_GROUP_14 78 +#define KBASE_GPUPROP_COHERENCY_GROUP_15 79 + +#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80 +#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81 + +#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82 + +#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83 +#define KBASE_GPUPROP_TLS_ALLOC 84 +#define KBASE_GPUPROP_RAW_GPU_FEATURES 85 +#ifdef __cpluscplus +} +#endif + +#endif /* _UAPI_KBASE_IOCTL_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h new file mode 100644 index 0000000..72e1b9d --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_kbase_kinstr_jm_reader.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * mali_kbase_kinstr_jm_reader.h + * Provides an ioctl API to read kernel atom state changes. The flow of the + * API is: + * 1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD`` + * 2. Determine the buffer structure layout via the above ioctl's returned + * size and version fields in ``struct kbase_kinstr_jm_fd_out`` + * 4. Poll the file descriptor for ``POLLIN`` + * 5. Get data with read() on the fd + * 6. Use the structure version to understand how to read the data from the + * buffer + * 7. Repeat 4-6 + * 8. Close the file descriptor + */ + +#ifndef _UAPI_KBASE_KINSTR_JM_READER_H_ +#define _UAPI_KBASE_KINSTR_JM_READER_H_ + +/** + * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE: Signifies that an atom has + * entered a hardware queue + * @KBASE_KINSTR_JM_READER_ATOM_STATE_START: Signifies that work has started + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP: Signifies that work has stopped + * on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has + * completed on an atom + * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT: The number of state enumerations + * + * We can add new states to the end of this if they do not break the existing + * state machine. Old user mode code can gracefully ignore states they do not + * understand. + * + * If we need to make a breaking change to the state machine, we can do that by + * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will + * mean that old user mode code will fail to understand the new state field in + * the structure and gracefully not use the state change API. + */ +enum kbase_kinstr_jm_reader_atom_state { + KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE, + KBASE_KINSTR_JM_READER_ATOM_STATE_START, + KBASE_KINSTR_JM_READER_ATOM_STATE_STOP, + KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE, + KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT +}; + +#endif /* _UAPI_KBASE_KINSTR_JM_READER_H_ */ diff --git a/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h new file mode 100644 index 0000000..fcb6cb8 --- /dev/null +++ b/dvalin/kernel/include/uapi/gpu/arm/midgard/mali_uk.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2010, 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * Types and definitions that are common across OSs for both the user + * and kernel side of the User-Kernel interface. + */ + +#ifndef _UAPI_UK_H_ +#define _UAPI_UK_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * DOC: uk_api User-Kernel Interface API + * + * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver. + * + * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent + * kernel-side API (UKK) via an OS-specific communication mechanism. + * + * This API is internal to the Midgard DDK and is not exposed to any applications. + * + */ + +/** + * enum uk_client_id - These are identifiers for kernel-side drivers + * implementing a UK interface, aka UKK clients. + * @UK_CLIENT_MALI_T600_BASE: Value used to identify the Base driver UK client. + * @UK_CLIENT_COUNT: The number of uk clients supported. This must be + * the last member of the enum + * + * The UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this + * identifier to select a UKK client to the uku_open() function. + * + * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id + * enumeration and the uku_open() implemenation for the various OS ports need to be updated to + * provide a mapping of the identifier to the OS specific device name. + * + */ +enum uk_client_id { + UK_CLIENT_MALI_T600_BASE, + UK_CLIENT_COUNT +}; + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _UAPI_UK_H_ */ -- 2.20.1